Fix tiling OCR: Windows temp paths and rapidocr API

This commit is contained in:
keboss-m 2026-06-01 12:55:19 +03:00
parent 7596a2f1a1
commit 44f588d280
7 changed files with 10 additions and 10 deletions

View File

@ -15,6 +15,7 @@ import sys
import json
import re
import fitz
import tempfile
from pathlib import Path
from PIL import Image
from rapidocr import RapidOCR
@ -77,12 +78,11 @@ def run_tiling_ocr(img_path: Path, conf_threshold: float = 0.5):
tiles = _make_tiles(img, TILE_SIZE, TILE_OVERLAP)
all_results = []
for off_x, off_y, crop in tiles:
tmp = f"/tmp/tile_ocr.png"
tmp = Path(tempfile.gettempdir()) / "tile_ocr.png"
crop.save(tmp)
res = engine(tmp)
if res and res[0]:
for item in res[0]:
box, txt, score = item
if res and res.txts is not None:
for txt, box, score in zip(res.txts, res.boxes, res.scores):
if score < conf_threshold:
continue
shifted = [[pt[0] + off_x, pt[1] + off_y] for pt in box]

BIN
test_output/page_001.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

BIN
test_output/page_002.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 449 KiB

BIN
test_output/page_003.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 717 KiB

BIN
test_output/page_004.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.5 MiB

BIN
test_output/page_005.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.2 MiB

View File

@ -13,6 +13,7 @@ Tiling OCR для больших чертежей.
import sys
import json
import re
import tempfile
from pathlib import Path
from typing import List, Dict, Tuple
from PIL import Image
@ -80,21 +81,20 @@ def run_tiling_ocr(png_path: Path, tile_size: int = 2000, overlap: int = 200, co
for i, (off_x, off_y, crop) in enumerate(tiles, 1):
# Временно сохранить кроп
tmp_path = f"/tmp/tile_{i:03d}.png"
tmp_path = Path(tempfile.gettempdir()) / f"tile_{i:03d}.png"
crop.save(tmp_path)
print(f" [{i}/{len(tiles)}] tile @ ({off_x}, {off_y}) size {crop.size} ...", end=" ", flush=True)
res = engine(tmp_path)
res = engine(str(tmp_path))
tile_lines = 0
if res and res[0]:
for item in res[0]:
box, txt, score = item
if res and res.txts is not None:
for txt, box, score in zip(res.txts, res.boxes, res.scores):
if score < conf_threshold:
continue
# Сдвинуть bbox на offset кропа
shifted_box = []
for pt in box:
for pt in box.tolist() if hasattr(box, "tolist") else box:
shifted_box.append([pt[0] + off_x, pt[1] + off_y])
all_results.append({
"text": txt,