-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathmain.py
104 lines (88 loc) · 3.63 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import argparse
import cv2
import moondream as md
from async_tracking import AsyncTracker
import rerun as rr
import numpy as np
def main():
parser = argparse.ArgumentParser(description="Object tracking demo with Rerun visualization")
parser.add_argument("--headless", action="store_true",
help="Run without any visualization (default)")
args = parser.parse_args()
# Initialize Rerun for visualization
if not args.headless:
rr.init("Object Tracking", spawn=True)
print("Initialized Rerun visualization")
try:
# Initialize camera
cap = cv2.VideoCapture(0)
if not cap.isOpened():
print("Error: Cannot open webcam")
return
# Initialize Moondream model
try:
with open('api_key.txt', 'r') as f:
api_key = f.read().strip()
model = md.vl(api_key=api_key)
except FileNotFoundError:
print("Error: api_key.txt not found. Please create this file with your Moondream API key.")
return
except Exception as e:
print(f"Error initializing Moondream model: {e}")
return
# Get tracking target
prompt = input("What object would you like to track? ")
# Create and start tracker
tracker = AsyncTracker(model, prompt)
tracker.start()
print("\nPress Ctrl+C to stop tracking")
while True:
ret, frame = cap.read()
if not ret:
print("Failed to grab frame")
break
# Process the frame through the tracker
tracker.process_frame(frame)
# Get current tracking state and visualize with Rerun
if not args.headless:
state = tracker.get_state()
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Log the base frame
rr.log("camera/frame", rr.Image(frame_rgb))
if state['bbox'] is not None:
center = state['kalman']
bbox = state['bbox']
if center and bbox:
# Log tracking information
rr.log("camera/tracking/center",
rr.Points2D(positions=np.array([center]),
colors=np.array([[255, 0, 0]])))
# Log bounding box as points
bbox_points = np.array([
[bbox[0], bbox[1]], # top-left
[bbox[2], bbox[1]], # top-right
[bbox[2], bbox[3]], # bottom-right
[bbox[0], bbox[3]], # bottom-left
[bbox[0], bbox[1]] # close the box
])
rr.log("camera/tracking/bbox",
rr.LineStrips2D(bbox_points,
colors=np.array([[0, 255, 0]])))
# Print tracking info to console
print(f"\rTracking at ({center[0]:.1f}, {center[1]:.1f})",
end="", flush=True)
except KeyboardInterrupt:
print("\nShutting down...")
except Exception as e:
print(f"\nUnexpected error: {e}")
finally:
# Clean up
print("\nCleaning up...")
if tracker:
tracker.stop()
if cap:
cap.release()
if not args.headless:
rr.disconnect()
if __name__ == '__main__':
main()