-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
364 lines (284 loc) · 13.4 KB
/
main.py
File metadata and controls
364 lines (284 loc) · 13.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
import tkinter as tk
from tkinter import filedialog, ttk
import cv2
from PIL import Image, ImageTk
import argparse
import sys
import os
class FFmpegCropTool:
def __init__(self, root, video_path=None, frame_num=1):
self.root = root
self.root.title("FFmpeg Crop Tool")
self.video_path = video_path
self.frame_num = frame_num
self.cap = None
self.original_image = None
self.display_image = None
self.photo = None
self.scale_factor = 1.0
self.rectangles = [] # List of (rect_id, x1, y1, x2, y2)
self.start_x = None
self.start_y = None
self.current_rect = None
# UI Setup
self.setup_ui()
# Load video if provided
if self.video_path:
self.load_video()
else:
# Delay opening file dialog slightly to let mainloop start or show UI first
self.root.after(100, self.open_file_dialog)
def setup_ui(self):
# Control Panel (Left/Top)
control_frame = ttk.Frame(self.root, padding="10")
control_frame.pack(side=tk.TOP, fill=tk.X)
ttk.Button(control_frame, text="Open Video", command=self.open_file_dialog).pack(side=tk.LEFT, padx=5)
ttk.Label(control_frame, text="Frame/Time:").pack(side=tk.LEFT, padx=5)
self.frame_entry = ttk.Entry(control_frame, width=10)
self.frame_entry.insert(0, str(self.frame_num))
self.frame_entry.pack(side=tk.LEFT, padx=5)
# Bind Enter key
self.frame_entry.bind("<Return>", lambda event: self.reload_frame())
ttk.Button(control_frame, text="Go", command=self.reload_frame).pack(side=tk.LEFT)
ttk.Button(control_frame, text="Reset Crops", command=self.reset_crops).pack(side=tk.LEFT, padx=20)
self.coord_label = ttk.Label(control_frame, text="Mouse: (0, 0)")
self.coord_label.pack(side=tk.RIGHT, padx=10)
# Canvas for Image
self.canvas_frame = ttk.Frame(self.root)
self.canvas_frame.pack(side=tk.TOP, fill=tk.BOTH, expand=True)
self.canvas = tk.Canvas(self.canvas_frame, bg="black", cursor="cross")
self.canvas.pack(fill=tk.BOTH, expand=True)
self.canvas.bind("<ButtonPress-1>", self.on_mouse_down)
self.canvas.bind("<B1-Motion>", self.on_mouse_drag)
self.canvas.bind("<ButtonRelease-1>", self.on_mouse_up)
self.canvas.bind("<Motion>", self.update_mouse_coords)
self.canvas.bind("<Configure>", self.on_resize)
# Output Panel (Bottom)
output_frame = ttk.LabelFrame(self.root, text="FFmpeg Crop Parameters", padding="10")
output_frame.pack(side=tk.BOTTOM, fill=tk.X)
# Ranges Button Frame
self.ranges_frame = ttk.Frame(output_frame)
self.ranges_frame.pack(side=tk.TOP, fill=tk.X, pady=(0, 5))
self.output_text = tk.Text(output_frame, height=5)
self.output_text.pack(fill=tk.X)
def open_file_dialog(self):
filename = filedialog.askopenfilename(filetypes=[("Video files", "*.mp4 *.avi *.mkv *.mov *.flv *.wmv")])
if filename:
self.video_path = filename
self.load_video()
def reload_frame(self):
input_str = self.frame_entry.get().strip()
# Need FPS to convert time -> frame
# We ensure cap is available or open it temporarily
temp_cap = None
current_fps = 30.0
if self.cap is not None and self.cap.isOpened():
current_fps = self.cap.get(cv2.CAP_PROP_FPS)
elif self.video_path:
try:
temp_cap = cv2.VideoCapture(self.video_path)
if temp_cap.isOpened():
current_fps = temp_cap.get(cv2.CAP_PROP_FPS)
finally:
if temp_cap: temp_cap.release()
if current_fps <= 0: current_fps = 30.0
try:
target_frame = self.frame_num
if ":" in input_str:
# HH:MM:SS or MM:SS
parts = list(map(float, input_str.split(":")))
seconds = 0
if len(parts) == 3: # HH:MM:SS
seconds = parts[0] * 3600 + parts[1] * 60 + parts[2]
elif len(parts) == 2: # MM:SS
seconds = parts[0] * 60 + parts[1]
else:
# Just assume seconds if weird format like "30" but with logic flow this hits number check usually
pass
target_frame = int(seconds * current_fps) + 1
elif input_str.lower().endswith("s"):
seconds = float(input_str[:-1])
target_frame = int(seconds * current_fps) + 1
else:
target_frame = int(input_str)
self.frame_num = target_frame
except ValueError:
pass # Keep old value
self.load_video()
def load_video(self):
if not self.video_path:
return
if self.cap is not None:
self.cap.release()
self.cap = cv2.VideoCapture(self.video_path)
if not self.cap.isOpened():
print(f"Error: Could not open video {self.video_path}")
return
# Set frame position (0-based index, so frame_num - 1)
self.cap.set(cv2.CAP_PROP_POS_FRAMES, max(0, self.frame_num - 1))
ret, frame = self.cap.read()
if ret:
# Convert BGR to RGB
self.original_image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
self.display_image_on_canvas()
else:
print(f"Error: Could not read frame {self.frame_num}")
def display_image_on_canvas(self):
if self.original_image is None:
return
# Get canvas dimensions (or window dimensions if canvas not ready)
canvas_width = self.canvas.winfo_width()
canvas_height = self.canvas.winfo_height()
if canvas_width <= 1 or canvas_height <= 1:
# If canvas hasn't been drawn yet, use a default or wait
canvas_width = 800
canvas_height = 600
img_h, img_w, _ = self.original_image.shape
# Calculate scale to fit
scale_w = canvas_width / img_w
scale_h = canvas_height / img_h
self.scale_factor = min(scale_w, scale_h, 1.0) # Do not upscale
new_w = int(img_w * self.scale_factor)
new_h = int(img_h * self.scale_factor)
resized_image = cv2.resize(self.original_image, (new_w, new_h), interpolation=cv2.INTER_AREA)
self.display_image = Image.fromarray(resized_image)
self.photo = ImageTk.PhotoImage(image=self.display_image)
self.canvas.delete("all")
# Center image
x_centered = (canvas_width - new_w) // 2
y_centered = (canvas_height - new_h) // 2
self.canvas.create_image(x_centered, y_centered, anchor=tk.NW, image=self.photo, tags="image")
# Store offset for coordinate checking
self.img_offset_x = x_centered
self.img_offset_y = y_centered
# Redraw existing rectangles
self.redraw_rectangles()
def on_resize(self, event):
if self.original_image is not None:
# Debounce or just redraw? simple redraw might be laggy for large videos but ok for MVP
self.display_image_on_canvas()
def canvas_to_image_coords(self, cx, cy):
if self.scale_factor == 0: return 0, 0
ix = int((cx - self.img_offset_x) / self.scale_factor)
iy = int((cy - self.img_offset_y) / self.scale_factor)
# Clamp to image bounds
img_h, img_w, _ = self.original_image.shape
ix = max(0, min(ix, img_w))
iy = max(0, min(iy, img_h))
return ix, iy
def update_mouse_coords(self, event):
if self.original_image is not None:
ix, iy = self.canvas_to_image_coords(event.x, event.y)
self.coord_label.config(text=f"Mouse: ({ix}, {iy})")
def on_mouse_down(self, event):
if self.original_image is None: return
self.start_x = event.x
self.start_y = event.y
self.current_rect = self.canvas.create_rectangle(self.start_x, self.start_y, self.start_x, self.start_y, outline="red", width=2)
def on_mouse_drag(self, event):
if self.current_rect:
self.canvas.coords(self.current_rect, self.start_x, self.start_y, event.x, event.y)
self.update_mouse_coords(event)
def on_mouse_up(self, event):
if self.current_rect:
# Normalize coordinates (handle dragging left/up)
x1, y1, x2, y2 = self.canvas.coords(self.current_rect)
# Store in normalized form
real_x1 = min(x1, x2)
real_y1 = min(y1, y2)
real_x2 = max(x1, x2)
real_y2 = max(y1, y2)
self.rectangles.append((real_x1, real_y1, real_x2, real_y2))
# Remove the temporary dragging rect and redraw properly managed rects (optional, but cleaner if we resize)
self.canvas.delete(self.current_rect)
self.redraw_rectangles()
self.update_output()
self.current_rect = None
def redraw_rectangles(self):
# Clear only rectangle objects? Easier to verify "image" tag vs others
# For this simple version, we might just clear all and redraw image + rects
# But we already cleared all in display_image_on_canvas
# We need to know which canvas objects are rectangles.
# A simpler approach: Store rectangles in Image Coordinates, then draw them on canvas
# The current implementation stored canvas coords in `self.rectangles` which is wrong if we resize!
# FIXED LOGIC:
pass # See fix below in stored logic
# Redefine logic for storing rectangles:
# We should store Image Coordinates in self.rectangles
def on_mouse_up_fixed(self, event):
# ... logic to store image coords
pass
# Let's fix the class methods now to correct the storage issue
def on_mouse_up(self, event):
if self.current_rect:
c_x1, c_y1, c_x2, c_y2 = self.canvas.coords(self.current_rect)
self.canvas.delete(self.current_rect)
self.current_rect = None
# Convert to image coords
ix1, iy1 = self.canvas_to_image_coords(c_x1, c_y1)
ix2, iy2 = self.canvas_to_image_coords(c_x2, c_y2)
# Normalize
x = min(ix1, ix2)
y = min(iy1, iy2)
w = abs(ix1 - ix2)
h = abs(iy1 - iy2)
if w > 0 and h > 0:
self.rectangles.append((x, y, w, h))
self.redraw_rectangles()
self.update_output()
def redraw_rectangles(self):
# Ensure image is drawn first (handled by display_image_on_canvas)
# But wait, display_image_on_canvas clears everything.
# So we should call Draw Rects inside Display Image OR call Display Image then Draw Rects.
# display_image_on_canvas calls this at the end.
# Remove old rects?
# We can tag them.
self.canvas.delete("rect")
for (x, y, w, h) in self.rectangles:
# Convert back to canvas coords
cx1 = x * self.scale_factor + self.img_offset_x
cy1 = y * self.scale_factor + self.img_offset_y
cx2 = (x + w) * self.scale_factor + self.img_offset_x
cy2 = (y + h) * self.scale_factor + self.img_offset_y
self.canvas.create_rectangle(cx1, cy1, cx2, cy2, outline="red", width=2, tags="rect")
# Label
self.canvas.create_text(cx1, cy1 - 10, text=f"{w}x{h}", fill="red", anchor=tk.SW, tags="rect")
def copy_to_clipboard(self, text):
self.root.clipboard_clear()
self.root.clipboard_append(text)
print(f"Copied to clipboard: {text}")
def update_output(self):
# Clear Text
self.output_text.delete("1.0", tk.END)
# Clear buttons
for widget in self.ranges_frame.winfo_children():
widget.destroy()
for i, (x, y, w, h) in enumerate(self.rectangles):
crop_str = f"{w}:{h}:{x}:{y}"
line = f"crop={crop_str}\n"
self.output_text.insert(tk.END, line)
# Add Button
btn_text = f"Range {i+1}: {crop_str}"
btn = ttk.Button(
self.ranges_frame,
text=btn_text,
command=lambda s=crop_str: self.copy_to_clipboard(s)
)
btn.pack(side=tk.LEFT, padx=5)
def reset_crops(self):
self.rectangles = []
self.output_text.delete("1.0", tk.END)
for widget in self.ranges_frame.winfo_children():
widget.destroy()
self.redraw_rectangles()
def main():
parser = argparse.ArgumentParser(description="FFmpeg Crop Tool")
parser.add_argument("--video_path", type=str, help="Path to video file")
parser.add_argument("--frame", type=int, default=1, help="Frame number to extract (1-based)")
args = parser.parse_args()
root = tk.Tk()
root.geometry("1000x800")
app = FFmpegCropTool(root, video_path=args.video_path, frame_num=args.frame)
root.mainloop()
if __name__ == "__main__":
main()