Fix _run_rapidocr: rapidocr API

This commit is contained in:
keboss-m 2026-06-01 13:24:49 +03:00
parent 1a72168f98
commit b344ef6f59

View File

@ -157,13 +157,12 @@ def process_pdf(pdf_path: Path, out_dir: Path, use_qwen: bool = False, use_tilin
def _run_rapidocr(img_path: Path):
res = engine(img_path)
ocr_lines = []
if res and res[0] is not None:
for item in res[0]:
box, txt, score = item
if res and res.txts is not None:
for txt, box, score in zip(res.txts, res.boxes, res.scores):
ocr_lines.append({
"text": txt,
"confidence": float(score),
"bbox": box
"bbox": box.tolist() if hasattr(box, "tolist") else box
})
return ocr_lines