-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbasic.py
More file actions
40 lines (33 loc) · 1.43 KB
/
basic.py
File metadata and controls
40 lines (33 loc) · 1.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import os
import tkinter as tk
from tkinter import filedialog, messagebox
from doctr.io import DocumentFile
from doctr.models import ocr_predictor
# Load OCR model
model = ocr_predictor(pretrained=True)
def extract_text_from_file(file_path):
try:
# Load document (image or pdf)
doc = DocumentFile.from_images(file_path) if file_path.lower().endswith(('.png', '.jpg', '.jpeg')) else DocumentFile.from_pdf(file_path)
result = model(doc)
extracted_text = result.render()
return extracted_text
except Exception as e:
return f"Error processing {file_path}: {str(e)}"
def browse_files():
file_paths = filedialog.askopenfilenames(title="Select Files (Image or PDF)",
filetypes=[("Image and PDF files", "*.jpg *.jpeg *.png *.pdf")])
text_box.delete("1.0", tk.END) # Clear existing text
for path in file_paths:
text_box.insert(tk.END, f"\n--- Extracted from: {os.path.basename(path)} ---\n")
extracted = extract_text_from_file(path)
text_box.insert(tk.END, extracted + "\n")
# GUI setup
root = tk.Tk()
root.title("Basic OCR Extractor using Doctr")
root.geometry("800x600")
btn = tk.Button(root, text="Select Files (IMG, PDF)", command=browse_files)
btn.pack(pady=10)
text_box = tk.Text(root, wrap="word", font=("Courier", 10))
text_box.pack(expand=True, fill="both")
root.mainloop()