opencode/backend/import_existing.py

138 lines
5.2 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Импорт существующего output_123 в backend БД.
Использование:
python import_existing.py
"""
import sys
import json
from pathlib import Path
# Добавить backend в path
sys.path.insert(0, str(Path(__file__).parent.parent / "backend"))
from app.database import SessionLocal, Base, engine
from app import models, crud, schemas
def import_output_123():
"""Импорт существующего проекта output_123 в БД."""
output_folder = Path("/Users/kirillblinov/development/opencode/OCR/opencode/output_123")
if not output_folder.exists():
print(f"[ERR] Не найден: {output_folder}")
return
db = SessionLocal()
try:
# Создать проект
project = crud.create_project(
db,
pdf_filename="123.pdf",
name="Test Project 123"
)
print(f"[INFO] Создан проект: ID={project.id}")
# Обновить статус
crud.update_project_status(db, project.id, "completed", output_folder=str(output_folder))
# Загрузить OCR
ocr_path = output_folder / "full_ocr_results.json"
if ocr_path.exists():
ocr = json.loads(ocr_path.read_text(encoding="utf-8"))
# Создать страницы
for page_data in ocr.get("pages", []):
page_num = page_data["page_number"]
png_path = output_folder / f"page_{page_num:03d}.png"
from PIL import Image
width = height = None
if png_path.exists():
with Image.open(png_path) as img:
width, height = img.size
page = crud.create_page(
db,
project_id=project.id,
page_number=page_num,
png_path=str(png_path) if png_path.exists() else None,
ocr_data=page_data,
width=width,
height=height
)
print(f" [OK] Страница {page_num}: {width}x{height}")
# Загрузить QC issues
qc_path = output_folder / "dimension_qc_report.json"
if qc_path.exists():
qc = json.loads(qc_path.read_text(encoding="utf-8"))
total_imported = 0
for severity in ["errors", "warnings", "infos"]:
for item in qc.get(severity, []):
page_num = item["page"]
page = crud.get_page_by_number(db, project.id, page_num)
# Извлечь bbox
bbox = item.get("bbox") or item.get("bbox1") or item.get("bbox_dim")
x1 = y1 = x2 = y2 = None
if bbox:
if isinstance(bbox[0], list):
xs = [p[0] for p in bbox]
ys = [p[1] for p in bbox]
x1, y1, x2, y2 = min(xs), min(ys), max(xs), max(ys)
else:
x1, y1, x2, y2 = bbox[0], bbox[1], bbox[2], bbox[3]
crud.create_issue(
db,
project_id=project.id,
page_id=page.id if page else None,
issue_type=item["type"],
severity=item["severity"],
message=item["message"],
bbox_x1=x1, bbox_y1=y1, bbox_x2=x2, bbox_y2=y2,
dimension_text=item.get("text"),
confidence=item.get("confidence"),
extra_data={k: v for k, v in item.items()
if k not in ["type", "severity", "message", "page", "text", "confidence", "bbox", "bbox1", "bbox2", "bbox_dim"]}
)
total_imported += 1
print(f"[OK] Импортировано замечаний: {total_imported}")
# Загрузить VLM descriptions
vlm_path = output_folder / "vlm_descriptions.json"
if vlm_path.exists():
vlm = json.loads(vlm_path.read_text(encoding="utf-8"))
for page in project.pages:
for item in vlm.get("descriptions", []):
if item.get("page") == page.page_number:
page.vlm_description = item.get("description")
db.commit()
break
print(f"[OK] VLM описания загружены")
print(f"\n[INFO] Проект #{project.id} готов!")
print(f" Dashboard: http://localhost:8001/")
print(f" Viewer: http://localhost:8001/viewer/{project.id}/5")
print(f" Review: http://localhost:8001/review.html?project={project.id}")
return project.id
except Exception as e:
print(f"[ERR] {e}")
import traceback
traceback.print_exc()
finally:
db.close()
if __name__ == "__main__":
import_output_123()