Fix _run_rapidocr: rapidocr API
This commit is contained in:
parent
1a72168f98
commit
b344ef6f59
@ -157,13 +157,12 @@ def process_pdf(pdf_path: Path, out_dir: Path, use_qwen: bool = False, use_tilin
|
|||||||
def _run_rapidocr(img_path: Path):
|
def _run_rapidocr(img_path: Path):
|
||||||
res = engine(img_path)
|
res = engine(img_path)
|
||||||
ocr_lines = []
|
ocr_lines = []
|
||||||
if res and res[0] is not None:
|
if res and res.txts is not None:
|
||||||
for item in res[0]:
|
for txt, box, score in zip(res.txts, res.boxes, res.scores):
|
||||||
box, txt, score = item
|
|
||||||
ocr_lines.append({
|
ocr_lines.append({
|
||||||
"text": txt,
|
"text": txt,
|
||||||
"confidence": float(score),
|
"confidence": float(score),
|
||||||
"bbox": box
|
"bbox": box.tolist() if hasattr(box, "tolist") else box
|
||||||
})
|
})
|
||||||
return ocr_lines
|
return ocr_lines
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user