Fix tiling OCR: Windows temp paths and rapidocr API
This commit is contained in:
parent
7596a2f1a1
commit
44f588d280
@ -15,6 +15,7 @@ import sys
|
||||
import json
|
||||
import re
|
||||
import fitz
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from PIL import Image
|
||||
from rapidocr import RapidOCR
|
||||
@ -77,12 +78,11 @@ def run_tiling_ocr(img_path: Path, conf_threshold: float = 0.5):
|
||||
tiles = _make_tiles(img, TILE_SIZE, TILE_OVERLAP)
|
||||
all_results = []
|
||||
for off_x, off_y, crop in tiles:
|
||||
tmp = f"/tmp/tile_ocr.png"
|
||||
tmp = Path(tempfile.gettempdir()) / "tile_ocr.png"
|
||||
crop.save(tmp)
|
||||
res = engine(tmp)
|
||||
if res and res[0]:
|
||||
for item in res[0]:
|
||||
box, txt, score = item
|
||||
if res and res.txts is not None:
|
||||
for txt, box, score in zip(res.txts, res.boxes, res.scores):
|
||||
if score < conf_threshold:
|
||||
continue
|
||||
shifted = [[pt[0] + off_x, pt[1] + off_y] for pt in box]
|
||||
|
||||
BIN
test_output/page_001.png
Normal file
BIN
test_output/page_001.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 32 KiB |
BIN
test_output/page_002.png
Normal file
BIN
test_output/page_002.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 449 KiB |
BIN
test_output/page_003.png
Normal file
BIN
test_output/page_003.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 717 KiB |
BIN
test_output/page_004.png
Normal file
BIN
test_output/page_004.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 2.5 MiB |
BIN
test_output/page_005.png
Normal file
BIN
test_output/page_005.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 3.2 MiB |
@ -13,6 +13,7 @@ Tiling OCR для больших чертежей.
|
||||
import sys
|
||||
import json
|
||||
import re
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Tuple
|
||||
from PIL import Image
|
||||
@ -80,21 +81,20 @@ def run_tiling_ocr(png_path: Path, tile_size: int = 2000, overlap: int = 200, co
|
||||
|
||||
for i, (off_x, off_y, crop) in enumerate(tiles, 1):
|
||||
# Временно сохранить кроп
|
||||
tmp_path = f"/tmp/tile_{i:03d}.png"
|
||||
tmp_path = Path(tempfile.gettempdir()) / f"tile_{i:03d}.png"
|
||||
crop.save(tmp_path)
|
||||
|
||||
print(f" [{i}/{len(tiles)}] tile @ ({off_x}, {off_y}) size {crop.size} ...", end=" ", flush=True)
|
||||
res = engine(tmp_path)
|
||||
res = engine(str(tmp_path))
|
||||
|
||||
tile_lines = 0
|
||||
if res and res[0]:
|
||||
for item in res[0]:
|
||||
box, txt, score = item
|
||||
if res and res.txts is not None:
|
||||
for txt, box, score in zip(res.txts, res.boxes, res.scores):
|
||||
if score < conf_threshold:
|
||||
continue
|
||||
# Сдвинуть bbox на offset кропа
|
||||
shifted_box = []
|
||||
for pt in box:
|
||||
for pt in box.tolist() if hasattr(box, "tolist") else box:
|
||||
shifted_box.append([pt[0] + off_x, pt[1] + off_y])
|
||||
all_results.append({
|
||||
"text": txt,
|
||||
|
||||
Loading…
Reference in New Issue
Block a user