- FastAPI app with SQLite DB (projects, pages, issues, feedback) - OpenSeadragon DZI viewer with inline SVG overlays - Dashboard: upload, project list, tiling toggle, review mode - Pipeline integration: tiling OCR → layout → elements → rules QC → DZI → DB - Feedback collection: true_positive / false_positive / not_sure per issue
138 lines
5.2 KiB
Python
138 lines
5.2 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
Импорт существующего output_123 в backend БД.
|
||
|
||
Использование:
|
||
python import_existing.py
|
||
"""
|
||
|
||
import sys
|
||
import json
|
||
from pathlib import Path
|
||
|
||
# Добавить backend в path
|
||
sys.path.insert(0, str(Path(__file__).parent.parent / "backend"))
|
||
|
||
from app.database import SessionLocal, Base, engine
|
||
from app import models, crud, schemas
|
||
|
||
|
||
def import_output_123():
|
||
"""Импорт существующего проекта output_123 в БД."""
|
||
|
||
output_folder = Path("/Users/kirillblinov/development/opencode/OCR/opencode/output_123")
|
||
if not output_folder.exists():
|
||
print(f"[ERR] Не найден: {output_folder}")
|
||
return
|
||
|
||
db = SessionLocal()
|
||
|
||
try:
|
||
# Создать проект
|
||
project = crud.create_project(
|
||
db,
|
||
pdf_filename="123.pdf",
|
||
name="Test Project 123"
|
||
)
|
||
print(f"[INFO] Создан проект: ID={project.id}")
|
||
|
||
# Обновить статус
|
||
crud.update_project_status(db, project.id, "completed", output_folder=str(output_folder))
|
||
|
||
# Загрузить OCR
|
||
ocr_path = output_folder / "full_ocr_results.json"
|
||
if ocr_path.exists():
|
||
ocr = json.loads(ocr_path.read_text(encoding="utf-8"))
|
||
|
||
# Создать страницы
|
||
for page_data in ocr.get("pages", []):
|
||
page_num = page_data["page_number"]
|
||
png_path = output_folder / f"page_{page_num:03d}.png"
|
||
|
||
from PIL import Image
|
||
width = height = None
|
||
if png_path.exists():
|
||
with Image.open(png_path) as img:
|
||
width, height = img.size
|
||
|
||
page = crud.create_page(
|
||
db,
|
||
project_id=project.id,
|
||
page_number=page_num,
|
||
png_path=str(png_path) if png_path.exists() else None,
|
||
ocr_data=page_data,
|
||
width=width,
|
||
height=height
|
||
)
|
||
print(f" [OK] Страница {page_num}: {width}x{height}")
|
||
|
||
# Загрузить QC issues
|
||
qc_path = output_folder / "dimension_qc_report.json"
|
||
if qc_path.exists():
|
||
qc = json.loads(qc_path.read_text(encoding="utf-8"))
|
||
|
||
total_imported = 0
|
||
for severity in ["errors", "warnings", "infos"]:
|
||
for item in qc.get(severity, []):
|
||
page_num = item["page"]
|
||
page = crud.get_page_by_number(db, project.id, page_num)
|
||
|
||
# Извлечь bbox
|
||
bbox = item.get("bbox") or item.get("bbox1") or item.get("bbox_dim")
|
||
x1 = y1 = x2 = y2 = None
|
||
if bbox:
|
||
if isinstance(bbox[0], list):
|
||
xs = [p[0] for p in bbox]
|
||
ys = [p[1] for p in bbox]
|
||
x1, y1, x2, y2 = min(xs), min(ys), max(xs), max(ys)
|
||
else:
|
||
x1, y1, x2, y2 = bbox[0], bbox[1], bbox[2], bbox[3]
|
||
|
||
crud.create_issue(
|
||
db,
|
||
project_id=project.id,
|
||
page_id=page.id if page else None,
|
||
issue_type=item["type"],
|
||
severity=item["severity"],
|
||
message=item["message"],
|
||
bbox_x1=x1, bbox_y1=y1, bbox_x2=x2, bbox_y2=y2,
|
||
dimension_text=item.get("text"),
|
||
confidence=item.get("confidence"),
|
||
extra_data={k: v for k, v in item.items()
|
||
if k not in ["type", "severity", "message", "page", "text", "confidence", "bbox", "bbox1", "bbox2", "bbox_dim"]}
|
||
)
|
||
total_imported += 1
|
||
|
||
print(f"[OK] Импортировано замечаний: {total_imported}")
|
||
|
||
# Загрузить VLM descriptions
|
||
vlm_path = output_folder / "vlm_descriptions.json"
|
||
if vlm_path.exists():
|
||
vlm = json.loads(vlm_path.read_text(encoding="utf-8"))
|
||
for page in project.pages:
|
||
for item in vlm.get("descriptions", []):
|
||
if item.get("page") == page.page_number:
|
||
page.vlm_description = item.get("description")
|
||
db.commit()
|
||
break
|
||
print(f"[OK] VLM описания загружены")
|
||
|
||
print(f"\n[INFO] Проект #{project.id} готов!")
|
||
print(f" Dashboard: http://localhost:8001/")
|
||
print(f" Viewer: http://localhost:8001/viewer/{project.id}/5")
|
||
print(f" Review: http://localhost:8001/review.html?project={project.id}")
|
||
|
||
return project.id
|
||
|
||
except Exception as e:
|
||
print(f"[ERR] {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
finally:
|
||
db.close()
|
||
|
||
|
||
if __name__ == "__main__":
|
||
import_output_123()
|