138 lines
5.2 KiB
Python
138 lines
5.2 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
# -*- coding: utf-8 -*-
|
|||
|
|
"""
|
|||
|
|
Импорт существующего output_123 в backend БД.
|
|||
|
|
|
|||
|
|
Использование:
|
|||
|
|
python import_existing.py
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import sys
|
|||
|
|
import json
|
|||
|
|
from pathlib import Path
|
|||
|
|
|
|||
|
|
# Добавить backend в path
|
|||
|
|
sys.path.insert(0, str(Path(__file__).parent.parent / "backend"))
|
|||
|
|
|
|||
|
|
from app.database import SessionLocal, Base, engine
|
|||
|
|
from app import models, crud, schemas
|
|||
|
|
|
|||
|
|
|
|||
|
|
def import_output_123():
|
|||
|
|
"""Импорт существующего проекта output_123 в БД."""
|
|||
|
|
|
|||
|
|
output_folder = Path("/Users/kirillblinov/development/opencode/OCR/opencode/output_123")
|
|||
|
|
if not output_folder.exists():
|
|||
|
|
print(f"[ERR] Не найден: {output_folder}")
|
|||
|
|
return
|
|||
|
|
|
|||
|
|
db = SessionLocal()
|
|||
|
|
|
|||
|
|
try:
|
|||
|
|
# Создать проект
|
|||
|
|
project = crud.create_project(
|
|||
|
|
db,
|
|||
|
|
pdf_filename="123.pdf",
|
|||
|
|
name="Test Project 123"
|
|||
|
|
)
|
|||
|
|
print(f"[INFO] Создан проект: ID={project.id}")
|
|||
|
|
|
|||
|
|
# Обновить статус
|
|||
|
|
crud.update_project_status(db, project.id, "completed", output_folder=str(output_folder))
|
|||
|
|
|
|||
|
|
# Загрузить OCR
|
|||
|
|
ocr_path = output_folder / "full_ocr_results.json"
|
|||
|
|
if ocr_path.exists():
|
|||
|
|
ocr = json.loads(ocr_path.read_text(encoding="utf-8"))
|
|||
|
|
|
|||
|
|
# Создать страницы
|
|||
|
|
for page_data in ocr.get("pages", []):
|
|||
|
|
page_num = page_data["page_number"]
|
|||
|
|
png_path = output_folder / f"page_{page_num:03d}.png"
|
|||
|
|
|
|||
|
|
from PIL import Image
|
|||
|
|
width = height = None
|
|||
|
|
if png_path.exists():
|
|||
|
|
with Image.open(png_path) as img:
|
|||
|
|
width, height = img.size
|
|||
|
|
|
|||
|
|
page = crud.create_page(
|
|||
|
|
db,
|
|||
|
|
project_id=project.id,
|
|||
|
|
page_number=page_num,
|
|||
|
|
png_path=str(png_path) if png_path.exists() else None,
|
|||
|
|
ocr_data=page_data,
|
|||
|
|
width=width,
|
|||
|
|
height=height
|
|||
|
|
)
|
|||
|
|
print(f" [OK] Страница {page_num}: {width}x{height}")
|
|||
|
|
|
|||
|
|
# Загрузить QC issues
|
|||
|
|
qc_path = output_folder / "dimension_qc_report.json"
|
|||
|
|
if qc_path.exists():
|
|||
|
|
qc = json.loads(qc_path.read_text(encoding="utf-8"))
|
|||
|
|
|
|||
|
|
total_imported = 0
|
|||
|
|
for severity in ["errors", "warnings", "infos"]:
|
|||
|
|
for item in qc.get(severity, []):
|
|||
|
|
page_num = item["page"]
|
|||
|
|
page = crud.get_page_by_number(db, project.id, page_num)
|
|||
|
|
|
|||
|
|
# Извлечь bbox
|
|||
|
|
bbox = item.get("bbox") or item.get("bbox1") or item.get("bbox_dim")
|
|||
|
|
x1 = y1 = x2 = y2 = None
|
|||
|
|
if bbox:
|
|||
|
|
if isinstance(bbox[0], list):
|
|||
|
|
xs = [p[0] for p in bbox]
|
|||
|
|
ys = [p[1] for p in bbox]
|
|||
|
|
x1, y1, x2, y2 = min(xs), min(ys), max(xs), max(ys)
|
|||
|
|
else:
|
|||
|
|
x1, y1, x2, y2 = bbox[0], bbox[1], bbox[2], bbox[3]
|
|||
|
|
|
|||
|
|
crud.create_issue(
|
|||
|
|
db,
|
|||
|
|
project_id=project.id,
|
|||
|
|
page_id=page.id if page else None,
|
|||
|
|
issue_type=item["type"],
|
|||
|
|
severity=item["severity"],
|
|||
|
|
message=item["message"],
|
|||
|
|
bbox_x1=x1, bbox_y1=y1, bbox_x2=x2, bbox_y2=y2,
|
|||
|
|
dimension_text=item.get("text"),
|
|||
|
|
confidence=item.get("confidence"),
|
|||
|
|
extra_data={k: v for k, v in item.items()
|
|||
|
|
if k not in ["type", "severity", "message", "page", "text", "confidence", "bbox", "bbox1", "bbox2", "bbox_dim"]}
|
|||
|
|
)
|
|||
|
|
total_imported += 1
|
|||
|
|
|
|||
|
|
print(f"[OK] Импортировано замечаний: {total_imported}")
|
|||
|
|
|
|||
|
|
# Загрузить VLM descriptions
|
|||
|
|
vlm_path = output_folder / "vlm_descriptions.json"
|
|||
|
|
if vlm_path.exists():
|
|||
|
|
vlm = json.loads(vlm_path.read_text(encoding="utf-8"))
|
|||
|
|
for page in project.pages:
|
|||
|
|
for item in vlm.get("descriptions", []):
|
|||
|
|
if item.get("page") == page.page_number:
|
|||
|
|
page.vlm_description = item.get("description")
|
|||
|
|
db.commit()
|
|||
|
|
break
|
|||
|
|
print(f"[OK] VLM описания загружены")
|
|||
|
|
|
|||
|
|
print(f"\n[INFO] Проект #{project.id} готов!")
|
|||
|
|
print(f" Dashboard: http://localhost:8001/")
|
|||
|
|
print(f" Viewer: http://localhost:8001/viewer/{project.id}/5")
|
|||
|
|
print(f" Review: http://localhost:8001/review.html?project={project.id}")
|
|||
|
|
|
|||
|
|
return project.id
|
|||
|
|
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"[ERR] {e}")
|
|||
|
|
import traceback
|
|||
|
|
traceback.print_exc()
|
|||
|
|
finally:
|
|||
|
|
db.close()
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
import_output_123()
|