diff --git a/process_any_pdf.py b/process_any_pdf.py index 332f706..01a7197 100644 --- a/process_any_pdf.py +++ b/process_any_pdf.py @@ -15,6 +15,7 @@ import sys import json import re import fitz +import tempfile from pathlib import Path from PIL import Image from rapidocr import RapidOCR @@ -77,12 +78,11 @@ def run_tiling_ocr(img_path: Path, conf_threshold: float = 0.5): tiles = _make_tiles(img, TILE_SIZE, TILE_OVERLAP) all_results = [] for off_x, off_y, crop in tiles: - tmp = f"/tmp/tile_ocr.png" + tmp = Path(tempfile.gettempdir()) / "tile_ocr.png" crop.save(tmp) res = engine(tmp) - if res and res[0]: - for item in res[0]: - box, txt, score = item + if res and res.txts is not None: + for txt, box, score in zip(res.txts, res.boxes, res.scores): if score < conf_threshold: continue shifted = [[pt[0] + off_x, pt[1] + off_y] for pt in box] diff --git a/test_output/page_001.png b/test_output/page_001.png new file mode 100644 index 0000000..57df497 Binary files /dev/null and b/test_output/page_001.png differ diff --git a/test_output/page_002.png b/test_output/page_002.png new file mode 100644 index 0000000..a584a02 Binary files /dev/null and b/test_output/page_002.png differ diff --git a/test_output/page_003.png b/test_output/page_003.png new file mode 100644 index 0000000..4fdfc26 Binary files /dev/null and b/test_output/page_003.png differ diff --git a/test_output/page_004.png b/test_output/page_004.png new file mode 100644 index 0000000..66f2d6f Binary files /dev/null and b/test_output/page_004.png differ diff --git a/test_output/page_005.png b/test_output/page_005.png new file mode 100644 index 0000000..57d1dca Binary files /dev/null and b/test_output/page_005.png differ diff --git a/tiling_ocr.py b/tiling_ocr.py index e142c14..6fcc3f5 100644 --- a/tiling_ocr.py +++ b/tiling_ocr.py @@ -13,6 +13,7 @@ Tiling OCR для больших чертежей. import sys import json import re +import tempfile from pathlib import Path from typing import List, Dict, Tuple from PIL import Image @@ -80,21 +81,20 @@ def run_tiling_ocr(png_path: Path, tile_size: int = 2000, overlap: int = 200, co for i, (off_x, off_y, crop) in enumerate(tiles, 1): # Временно сохранить кроп - tmp_path = f"/tmp/tile_{i:03d}.png" + tmp_path = Path(tempfile.gettempdir()) / f"tile_{i:03d}.png" crop.save(tmp_path) print(f" [{i}/{len(tiles)}] tile @ ({off_x}, {off_y}) size {crop.size} ...", end=" ", flush=True) - res = engine(tmp_path) + res = engine(str(tmp_path)) tile_lines = 0 - if res and res[0]: - for item in res[0]: - box, txt, score = item + if res and res.txts is not None: + for txt, box, score in zip(res.txts, res.boxes, res.scores): if score < conf_threshold: continue # Сдвинуть bbox на offset кропа shifted_box = [] - for pt in box: + for pt in box.tolist() if hasattr(box, "tolist") else box: shifted_box.append([pt[0] + off_x, pt[1] + off_y]) all_results.append({ "text": txt,