Fix _run_rapidocr: rapidocr API
This commit is contained in:
parent
1a72168f98
commit
b344ef6f59
@ -157,13 +157,12 @@ def process_pdf(pdf_path: Path, out_dir: Path, use_qwen: bool = False, use_tilin
|
||||
def _run_rapidocr(img_path: Path):
|
||||
res = engine(img_path)
|
||||
ocr_lines = []
|
||||
if res and res[0] is not None:
|
||||
for item in res[0]:
|
||||
box, txt, score = item
|
||||
if res and res.txts is not None:
|
||||
for txt, box, score in zip(res.txts, res.boxes, res.scores):
|
||||
ocr_lines.append({
|
||||
"text": txt,
|
||||
"confidence": float(score),
|
||||
"bbox": box
|
||||
"bbox": box.tolist() if hasattr(box, "tolist") else box
|
||||
})
|
||||
return ocr_lines
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user