diff --git a/process_any_pdf.py b/process_any_pdf.py index 01a7197..218d94a 100644 --- a/process_any_pdf.py +++ b/process_any_pdf.py @@ -157,13 +157,12 @@ def process_pdf(pdf_path: Path, out_dir: Path, use_qwen: bool = False, use_tilin def _run_rapidocr(img_path: Path): res = engine(img_path) ocr_lines = [] - if res and res[0] is not None: - for item in res[0]: - box, txt, score = item + if res and res.txts is not None: + for txt, box, score in zip(res.txts, res.boxes, res.scores): ocr_lines.append({ "text": txt, "confidence": float(score), - "bbox": box + "bbox": box.tolist() if hasattr(box, "tolist") else box }) return ocr_lines