Skip to content

Commit e66284e

Browse files
committed
added feature
1 parent 11a01f8 commit e66284e

File tree

3 files changed

+334
-0
lines changed

3 files changed

+334
-0
lines changed

SLAM/VisualSLAM/README.md

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# Visual SLAM Module
2+
3+
This module implements Visual SLAM (Simultaneous Localization and Mapping) supporting monocular, stereo, and RGB-D camera inputs. It combines visual odometry and mapping in real-time to track camera motion and build a 3D map of the environment.
4+
5+
## Features
6+
7+
- Support for multiple camera types:
8+
- Monocular camera
9+
- Stereo camera
10+
- RGB-D camera
11+
- Feature detection and tracking using optical flow
12+
- Visual odometry using essential matrix or homography decomposition
13+
- 3D mapping using triangulation
14+
- Real-time trajectory visualization
15+
- Support for keyframe selection
16+
- 3D point cloud visualization
17+
18+
## Requirements
19+
20+
Install the required dependencies:
21+
22+
```bash
23+
pip install -r requirements.txt
24+
```
25+
26+
## Usage
27+
28+
Basic usage example:
29+
30+
```python
31+
from visual_slam import CameraParams, VisualSLAM
32+
33+
# Initialize camera parameters
34+
camera_params = CameraParams(
35+
fx=525.0, # focal length x
36+
fy=525.0, # focal length y
37+
cx=320.0, # principal point x
38+
cy=240.0, # principal point y
39+
baseline=0.075 # for stereo setup
40+
)
41+
42+
# Create SLAM instance
43+
slam = VisualSLAM(camera_params)
44+
45+
# Process frames
46+
pose, map_points = slam.process_frame(frame)
47+
48+
# Visualize results
49+
slam.visualize()
50+
```
51+
52+
## Demo
53+
54+
Run the included demo script to test Visual SLAM with your camera:
55+
56+
```bash
57+
python visual_slam.py
58+
```
59+
60+
## Components
61+
62+
1. **FeatureTracker**: Handles feature detection and tracking between frames
63+
2. **VisualOdometry**: Estimates camera motion using epipolar geometry
64+
3. **Mapping**: Performs 3D reconstruction using triangulation
65+
4. **VisualSLAM**: Main class that integrates all components
66+
67+
## Notes
68+
69+
- Adjust camera parameters according to your specific camera setup
70+
- For best results, ensure good lighting conditions and textured scenes
71+
- The system performs better with slower, smooth camera motion

SLAM/VisualSLAM/requirements.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
numpy>=1.18.0
2+
opencv-python>=4.5.0
3+
matplotlib>=3.3.0
4+
scipy>=1.6.0

SLAM/VisualSLAM/visual_slam.py

Lines changed: 259 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,259 @@
1+
"""
2+
Visual SLAM Implementation
3+
This module implements Visual SLAM (Simultaneous Localization and Mapping)
4+
supporting monocular, stereo, and RGB-D camera inputs.
5+
"""
6+
7+
import numpy as np
8+
import cv2
9+
from dataclasses import dataclass
10+
from typing import List, Tuple, Optional
11+
import matplotlib.pyplot as plt
12+
from mpl_toolkits.mplot3d import Axes3D
13+
14+
15+
@dataclass
16+
class CameraParams:
17+
"""Camera parameters for calibration"""
18+
fx: float # Focal length x
19+
fy: float # Focal length y
20+
cx: float # Principal point x
21+
cy: float # Principal point y
22+
baseline: float = 0.0 # Baseline for stereo setup
23+
24+
25+
class FeatureTracker:
26+
"""Track features across frames using optical flow"""
27+
28+
def __init__(self, max_corners: int = 1000,
29+
quality_level: float = 0.01,
30+
min_distance: float = 7):
31+
self.max_corners = max_corners
32+
self.quality_level = quality_level
33+
self.min_distance = min_distance
34+
self.feature_params = dict(
35+
maxCorners=max_corners,
36+
qualityLevel=quality_level,
37+
minDistance=min_distance,
38+
blockSize=7
39+
)
40+
self.lk_params = dict(
41+
winSize=(21, 21),
42+
maxLevel=3,
43+
criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 30, 0.01)
44+
)
45+
46+
def detect_features(self, frame: np.ndarray) -> np.ndarray:
47+
"""Detect features in the frame"""
48+
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) if len(frame.shape) == 3 else frame
49+
features = cv2.goodFeaturesToTrack(gray, mask=None, **self.feature_params)
50+
return features
51+
52+
def track_features(self, prev_frame: np.ndarray, curr_frame: np.ndarray,
53+
prev_features: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
54+
"""Track features between consecutive frames"""
55+
if prev_features is None or len(prev_features) < 1:
56+
return None, None
57+
58+
prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY) if len(prev_frame.shape) == 3 else prev_frame
59+
curr_gray = cv2.cvtColor(curr_frame, cv2.COLOR_BGR2GRAY) if len(curr_frame.shape) == 3 else curr_frame
60+
61+
curr_features, status, _ = cv2.calcOpticalFlowPyrLK(
62+
prev_gray, curr_gray, prev_features, None, **self.lk_params
63+
)
64+
65+
# Filter out points where tracking failed
66+
good_curr = curr_features[status == 1]
67+
good_prev = prev_features[status == 1]
68+
69+
return good_prev, good_curr
70+
71+
72+
class VisualOdometry:
73+
"""Estimate camera motion between frames"""
74+
75+
def __init__(self, camera_params: CameraParams):
76+
self.camera_params = camera_params
77+
self.K = np.array([
78+
[camera_params.fx, 0, camera_params.cx],
79+
[0, camera_params.fy, camera_params.cy],
80+
[0, 0, 1]
81+
])
82+
self.pose = np.eye(4) # Current camera pose
83+
self.trajectory = [self.pose[:3, 3]] # Camera trajectory
84+
85+
def estimate_motion(self, prev_points: np.ndarray, curr_points: np.ndarray,
86+
method: str = "essential") -> np.ndarray:
87+
"""Estimate camera motion between frames using epipolar geometry"""
88+
if method == "essential":
89+
E, mask = cv2.findEssentialMat(
90+
curr_points, prev_points, self.K,
91+
method=cv2.RANSAC, prob=0.999, threshold=1.0
92+
)
93+
_, R, t, _ = cv2.recoverPose(E, curr_points, prev_points, self.K, mask)
94+
else: # Use homography for planar scenes
95+
H, mask = cv2.findHomography(
96+
prev_points, curr_points, cv2.RANSAC, 5.0
97+
)
98+
# Decompose homography to get rotation and translation
99+
_, Rs, ts, _ = cv2.decomposeHomographyMat(H, self.K)
100+
# Use the first solution (usually the most probable one)
101+
R, t = Rs[0], ts[0]
102+
103+
# Update pose
104+
T = np.eye(4)
105+
T[:3, :3] = R
106+
T[:3, 3] = t.ravel()
107+
self.pose = self.pose @ T
108+
self.trajectory.append(self.pose[:3, 3])
109+
110+
return self.pose
111+
112+
113+
class Mapping:
114+
"""3D mapping using triangulation"""
115+
116+
def __init__(self, camera_params: CameraParams):
117+
self.camera_params = camera_params
118+
self.points_3d = []
119+
self.colors = []
120+
121+
def triangulate_points(self, points1: np.ndarray, points2: np.ndarray,
122+
pose1: np.ndarray, pose2: np.ndarray) -> np.ndarray:
123+
"""Triangulate 3D points from corresponding image points and camera poses"""
124+
P1 = self.camera_params.K @ pose1[:3] # 3x4 projection matrix for first camera
125+
P2 = self.camera_params.K @ pose2[:3] # 3x4 projection matrix for second camera
126+
127+
# Triangulate points
128+
points_4d = cv2.triangulatePoints(P1, P2, points1.T, points2.T)
129+
points_3d = points_4d[:3] / points_4d[3] # Convert from homogeneous coordinates
130+
131+
return points_3d.T
132+
133+
134+
class VisualSLAM:
135+
"""Main Visual SLAM class integrating all components"""
136+
137+
def __init__(self, camera_params: CameraParams):
138+
self.tracker = FeatureTracker()
139+
self.odometry = VisualOdometry(camera_params)
140+
self.mapping = Mapping(camera_params)
141+
self.prev_frame = None
142+
self.prev_features = None
143+
self.map_points = []
144+
self.keyframes = []
145+
146+
def process_frame(self, frame: np.ndarray) -> Tuple[np.ndarray, List[np.ndarray]]:
147+
"""Process a new frame"""
148+
if self.prev_frame is None:
149+
self.prev_frame = frame
150+
self.prev_features = self.tracker.detect_features(frame)
151+
return np.eye(4), []
152+
153+
# Track features
154+
prev_pts, curr_pts = self.tracker.track_features(
155+
self.prev_frame, frame, self.prev_features
156+
)
157+
158+
if prev_pts is not None and len(prev_pts) >= 8:
159+
# Estimate motion
160+
pose = self.odometry.estimate_motion(prev_pts, curr_pts)
161+
162+
# Triangulate new points if this is a keyframe
163+
if self._is_keyframe(pose):
164+
points_3d = self.mapping.triangulate_points(
165+
prev_pts, curr_pts,
166+
np.eye(4), pose
167+
)
168+
self.map_points.extend(points_3d.tolist())
169+
self.keyframes.append({
170+
'frame': frame,
171+
'pose': pose,
172+
'points': curr_pts
173+
})
174+
else:
175+
pose = np.eye(4)
176+
self.prev_features = self.tracker.detect_features(frame)
177+
178+
# Update previous frame and features
179+
self.prev_frame = frame
180+
self.prev_features = self.tracker.detect_features(frame)
181+
182+
return pose, self.map_points
183+
184+
def _is_keyframe(self, pose: np.ndarray, min_distance: float = 0.3) -> bool:
185+
"""Determine if the current frame should be a keyframe"""
186+
if not self.keyframes:
187+
return True
188+
189+
last_pose = self.keyframes[-1]['pose']
190+
distance = np.linalg.norm(pose[:3, 3] - last_pose[:3, 3])
191+
return distance > min_distance
192+
193+
def visualize(self):
194+
"""Visualize the camera trajectory and 3D map"""
195+
fig = plt.figure()
196+
ax = fig.add_subplot(111, projection='3d')
197+
198+
# Plot trajectory
199+
trajectory = np.array(self.odometry.trajectory)
200+
ax.plot(trajectory[:, 0], trajectory[:, 1], trajectory[:, 2], 'r-')
201+
202+
# Plot map points
203+
if self.map_points:
204+
points = np.array(self.map_points)
205+
ax.scatter(points[:, 0], points[:, 1], points[:, 2], c='blue', s=1)
206+
207+
ax.set_xlabel('X')
208+
ax.set_ylabel('Y')
209+
ax.set_zlabel('Z')
210+
plt.show()
211+
212+
213+
def demo_visual_slam():
214+
"""Demo function to test Visual SLAM with video input"""
215+
# Example camera parameters (adjust according to your camera)
216+
camera_params = CameraParams(
217+
fx=525.0, # focal length x
218+
fy=525.0, # focal length y
219+
cx=320.0, # principal point x
220+
cy=240.0, # principal point y
221+
baseline=0.075 # for stereo setup
222+
)
223+
224+
slam = VisualSLAM(camera_params)
225+
226+
# Open video capture (adjust source as needed)
227+
cap = cv2.VideoCapture(0) # Use 0 for webcam
228+
229+
try:
230+
while True:
231+
ret, frame = cap.read()
232+
if not ret:
233+
break
234+
235+
# Process frame
236+
pose, map_points = slam.process_frame(frame)
237+
238+
# Visualize current frame with tracked features
239+
if slam.prev_features is not None:
240+
for pt in slam.prev_features:
241+
pt = pt.ravel()
242+
cv2.circle(frame, (int(pt[0]), int(pt[1])), 3, (0, 255, 0), -1)
243+
244+
# Display frame
245+
cv2.imshow('Frame', frame)
246+
247+
if cv2.waitKey(1) & 0xFF == ord('q'):
248+
break
249+
250+
finally:
251+
cap.release()
252+
cv2.destroyAllWindows()
253+
254+
# Visualize final trajectory and map
255+
slam.visualize()
256+
257+
258+
if __name__ == "__main__":
259+
demo_visual_slam()

0 commit comments

Comments
 (0)