From b344ef6f592ca6411cd1a471998cd5e10f4a7097 Mon Sep 17 00:00:00 2001 From: keboss-m <85340750+keboss-m@users.noreply.github.com> Date: Mon, 1 Jun 2026 13:24:49 +0300 Subject: [PATCH] Fix _run_rapidocr: rapidocr API --- process_any_pdf.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/process_any_pdf.py b/process_any_pdf.py index 01a7197..218d94a 100644 --- a/process_any_pdf.py +++ b/process_any_pdf.py @@ -157,13 +157,12 @@ def process_pdf(pdf_path: Path, out_dir: Path, use_qwen: bool = False, use_tilin def _run_rapidocr(img_path: Path): res = engine(img_path) ocr_lines = [] - if res and res[0] is not None: - for item in res[0]: - box, txt, score = item + if res and res.txts is not None: + for txt, box, score in zip(res.txts, res.boxes, res.scores): ocr_lines.append({ "text": txt, "confidence": float(score), - "bbox": box + "bbox": box.tolist() if hasattr(box, "tolist") else box }) return ocr_lines