Fix tiling OCR: Windows temp paths and rapidocr API

This commit is contained in:
keboss-m 2026-06-01 12:55:19 +03:00
parent 7596a2f1a1
commit 44f588d280
7 changed files with 10 additions and 10 deletions

View File

@ -15,6 +15,7 @@ import sys
import json import json
import re import re
import fitz import fitz
import tempfile
from pathlib import Path from pathlib import Path
from PIL import Image from PIL import Image
from rapidocr import RapidOCR from rapidocr import RapidOCR
@ -77,12 +78,11 @@ def run_tiling_ocr(img_path: Path, conf_threshold: float = 0.5):
tiles = _make_tiles(img, TILE_SIZE, TILE_OVERLAP) tiles = _make_tiles(img, TILE_SIZE, TILE_OVERLAP)
all_results = [] all_results = []
for off_x, off_y, crop in tiles: for off_x, off_y, crop in tiles:
tmp = f"/tmp/tile_ocr.png" tmp = Path(tempfile.gettempdir()) / "tile_ocr.png"
crop.save(tmp) crop.save(tmp)
res = engine(tmp) res = engine(tmp)
if res and res[0]: if res and res.txts is not None:
for item in res[0]: for txt, box, score in zip(res.txts, res.boxes, res.scores):
box, txt, score = item
if score < conf_threshold: if score < conf_threshold:
continue continue
shifted = [[pt[0] + off_x, pt[1] + off_y] for pt in box] shifted = [[pt[0] + off_x, pt[1] + off_y] for pt in box]

BIN
test_output/page_001.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

BIN
test_output/page_002.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 449 KiB

BIN
test_output/page_003.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 717 KiB

BIN
test_output/page_004.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.5 MiB

BIN
test_output/page_005.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.2 MiB

View File

@ -13,6 +13,7 @@ Tiling OCR для больших чертежей.
import sys import sys
import json import json
import re import re
import tempfile
from pathlib import Path from pathlib import Path
from typing import List, Dict, Tuple from typing import List, Dict, Tuple
from PIL import Image from PIL import Image
@ -80,21 +81,20 @@ def run_tiling_ocr(png_path: Path, tile_size: int = 2000, overlap: int = 200, co
for i, (off_x, off_y, crop) in enumerate(tiles, 1): for i, (off_x, off_y, crop) in enumerate(tiles, 1):
# Временно сохранить кроп # Временно сохранить кроп
tmp_path = f"/tmp/tile_{i:03d}.png" tmp_path = Path(tempfile.gettempdir()) / f"tile_{i:03d}.png"
crop.save(tmp_path) crop.save(tmp_path)
print(f" [{i}/{len(tiles)}] tile @ ({off_x}, {off_y}) size {crop.size} ...", end=" ", flush=True) print(f" [{i}/{len(tiles)}] tile @ ({off_x}, {off_y}) size {crop.size} ...", end=" ", flush=True)
res = engine(tmp_path) res = engine(str(tmp_path))
tile_lines = 0 tile_lines = 0
if res and res[0]: if res and res.txts is not None:
for item in res[0]: for txt, box, score in zip(res.txts, res.boxes, res.scores):
box, txt, score = item
if score < conf_threshold: if score < conf_threshold:
continue continue
# Сдвинуть bbox на offset кропа # Сдвинуть bbox на offset кропа
shifted_box = [] shifted_box = []
for pt in box: for pt in box.tolist() if hasattr(box, "tolist") else box:
shifted_box.append([pt[0] + off_x, pt[1] + off_y]) shifted_box.append([pt[0] + off_x, pt[1] + off_y])
all_results.append({ all_results.append({
"text": txt, "text": txt,