-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapi.py
More file actions
112 lines (94 loc) · 3.85 KB
/
api.py
File metadata and controls
112 lines (94 loc) · 3.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
from fastapi import FastAPI, File, UploadFile, HTTPException, Query
from uuid import uuid4
import shutil
from pathlib import Path
from pydantic import BaseModel, Field
from typing import Dict, Any, List, Optional
from core.document_pipeline import DocumentPipelineManager
from core.action_generator import ACTION_GENERATORS, actions_for_other
###### Load shared components and initialize FastAPI app ######
app = FastAPI()
documents_db = {}
# Load shared components
LABELS_WITH_DESCRIPTIONS = {
"Invoice": "A bill for goods or services, typically including vendor, amount, due date, and line items.",
"Contract": "A legal agreement between parties, containing terms, dates, and responsibilities.",
"Earnings": "A financial or business report summarizing revenue, profits, expenses, and other key metrics.",
"Other": "Any other type of document that does not fit the above categories."
}
pipeline = DocumentPipelineManager()
class DocumentEntry(BaseModel):
id: str = Field(..., description="A unique identifier for the uploaded document.")
classification: Dict[str, Any] = Field(
...,
description="""
The classification for the document, including:
- 'type': the predicted document type ('Invoice', 'Contract', 'Earnings' or 'Other')
- 'confidence': a score between 0 and 1 indicating model certainty
"""
)
metadata: Dict[str, Any] = Field(
...,
description="""
Structured metadata fields extracted from the document, tailored to the document type.
For example:
- Invoice → vendor, amount, due date, line items
- Contract → parties, effective/termination dates, key terms
- Earnings Report → reporting period, key metrics, executive summary
- Other → summary
"""
)
class DocumentAction(BaseModel):
type: str
description: str
deadline: str | None = None
priority: str | None = "medium"
@app.post("/documents/analyze")
async def analyze_document(file: UploadFile = File(...)):
try:
tmp_path = Path("tmp") / file.filename
tmp_path.parent.mkdir(exist_ok=True)
with open(tmp_path, "wb") as buffer:
shutil.copyfileobj(file.file, buffer)
pages = pipeline.load_document(str(tmp_path))
classification_result = pipeline.classify(pages)
metadata_result = pipeline.extract_metadata(pages, classification_result["type"])
doc_id = str(uuid4())
entry = DocumentEntry(
id=doc_id,
classification=classification_result,
metadata=metadata_result.model_dump() if hasattr(metadata_result, "model_dump") else metadata_result,
)
documents_db[doc_id] = entry
return {
"status": "success",
"document_id": doc_id,
"classification": entry.classification,
"metadata": entry.metadata
}
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to process document: {e}")
@app.get("/documents/{id}", response_model=DocumentEntry)
def get_document(id: str):
doc = documents_db.get(id)
if not doc:
raise HTTPException(status_code=404, detail="Document not found")
return doc
@app.get("/documents/{id}/actions", response_model=List[DocumentAction])
def get_actions(
id: str,
priority: Optional[str] = Query(None, description="Filter actions by priority (e.g., high, medium, low)"),
):
doc = documents_db.get(id)
if not doc:
raise HTTPException(status_code=404, detail="Document not found")
doc_type = doc.classification.get("type")
generator = ACTION_GENERATORS.get(doc_type, actions_for_other)
actions = generator(doc.metadata)
# Apply optional filters
if priority:
actions = [a for a in actions if a.get("priority") == priority]
return actions
if __name__ == "__main__":
import uvicorn
uvicorn.run("api:app", host="127.0.0.1", port=8000, reload=True)