opencode/backend/import_existing.py
Кирилл Блинов f37c477a0a Add FastAPI backend with DZI viewer and feedback system
- FastAPI app with SQLite DB (projects, pages, issues, feedback)
- OpenSeadragon DZI viewer with inline SVG overlays
- Dashboard: upload, project list, tiling toggle, review mode
- Pipeline integration: tiling OCR → layout → elements → rules QC → DZI → DB
- Feedback collection: true_positive / false_positive / not_sure per issue
2026-06-01 12:29:41 +03:00

138 lines
5.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Импорт существующего output_123 в backend БД.
Использование:
python import_existing.py
"""
import sys
import json
from pathlib import Path
# Добавить backend в path
sys.path.insert(0, str(Path(__file__).parent.parent / "backend"))
from app.database import SessionLocal, Base, engine
from app import models, crud, schemas
def import_output_123():
"""Импорт существующего проекта output_123 в БД."""
output_folder = Path("/Users/kirillblinov/development/opencode/OCR/opencode/output_123")
if not output_folder.exists():
print(f"[ERR] Не найден: {output_folder}")
return
db = SessionLocal()
try:
# Создать проект
project = crud.create_project(
db,
pdf_filename="123.pdf",
name="Test Project 123"
)
print(f"[INFO] Создан проект: ID={project.id}")
# Обновить статус
crud.update_project_status(db, project.id, "completed", output_folder=str(output_folder))
# Загрузить OCR
ocr_path = output_folder / "full_ocr_results.json"
if ocr_path.exists():
ocr = json.loads(ocr_path.read_text(encoding="utf-8"))
# Создать страницы
for page_data in ocr.get("pages", []):
page_num = page_data["page_number"]
png_path = output_folder / f"page_{page_num:03d}.png"
from PIL import Image
width = height = None
if png_path.exists():
with Image.open(png_path) as img:
width, height = img.size
page = crud.create_page(
db,
project_id=project.id,
page_number=page_num,
png_path=str(png_path) if png_path.exists() else None,
ocr_data=page_data,
width=width,
height=height
)
print(f" [OK] Страница {page_num}: {width}x{height}")
# Загрузить QC issues
qc_path = output_folder / "dimension_qc_report.json"
if qc_path.exists():
qc = json.loads(qc_path.read_text(encoding="utf-8"))
total_imported = 0
for severity in ["errors", "warnings", "infos"]:
for item in qc.get(severity, []):
page_num = item["page"]
page = crud.get_page_by_number(db, project.id, page_num)
# Извлечь bbox
bbox = item.get("bbox") or item.get("bbox1") or item.get("bbox_dim")
x1 = y1 = x2 = y2 = None
if bbox:
if isinstance(bbox[0], list):
xs = [p[0] for p in bbox]
ys = [p[1] for p in bbox]
x1, y1, x2, y2 = min(xs), min(ys), max(xs), max(ys)
else:
x1, y1, x2, y2 = bbox[0], bbox[1], bbox[2], bbox[3]
crud.create_issue(
db,
project_id=project.id,
page_id=page.id if page else None,
issue_type=item["type"],
severity=item["severity"],
message=item["message"],
bbox_x1=x1, bbox_y1=y1, bbox_x2=x2, bbox_y2=y2,
dimension_text=item.get("text"),
confidence=item.get("confidence"),
extra_data={k: v for k, v in item.items()
if k not in ["type", "severity", "message", "page", "text", "confidence", "bbox", "bbox1", "bbox2", "bbox_dim"]}
)
total_imported += 1
print(f"[OK] Импортировано замечаний: {total_imported}")
# Загрузить VLM descriptions
vlm_path = output_folder / "vlm_descriptions.json"
if vlm_path.exists():
vlm = json.loads(vlm_path.read_text(encoding="utf-8"))
for page in project.pages:
for item in vlm.get("descriptions", []):
if item.get("page") == page.page_number:
page.vlm_description = item.get("description")
db.commit()
break
print(f"[OK] VLM описания загружены")
print(f"\n[INFO] Проект #{project.id} готов!")
print(f" Dashboard: http://localhost:8001/")
print(f" Viewer: http://localhost:8001/viewer/{project.id}/5")
print(f" Review: http://localhost:8001/review.html?project={project.id}")
return project.id
except Exception as e:
print(f"[ERR] {e}")
import traceback
traceback.print_exc()
finally:
db.close()
if __name__ == "__main__":
import_output_123()