We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent b006fd5 commit a37ae5dCopy full SHA for a37ae5d
1 file changed
mindee/parsing/common/document.py
@@ -62,17 +62,18 @@ def __str__(self) -> str:
62
def _inject_full_text_ocr(self, raw_prediction: StringDict) -> None:
63
pages = raw_prediction.get("inference", {}).get("pages", [])
64
65
+ # check for: empty, missing, or null
66
if (
67
not pages
- or "extras" not in pages[0]
68
- or "full_text_ocr" not in pages[0]["extras"]
+ or not pages[0].get("extras", None)
69
+ or not pages[0]["extras"].get("full_text_ocr", None)
70
):
71
return
72
73
full_text_content = "\n".join(
74
page["extras"]["full_text_ocr"]["content"]
75
for page in pages
- if "extras" in page and "full_text_ocr" in page["extras"]
76
+ if page.get("extras", None) and page["extras"].get("full_text_ocr", None)
77
)
78
79
artificial_text_obj = {"content": full_text_content}
0 commit comments