140 lines
4.4 KiB
Python
140 lines
4.4 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
# -*- coding: utf-8 -*-
|
|||
|
|
"""
|
|||
|
|
Тест Alibaba Cloud DashScope qwen-vl-ocr на чертеже.
|
|||
|
|
|
|||
|
|
Использование:
|
|||
|
|
python test_qwen_ocr.py <png_file>
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
import os
|
|||
|
|
import sys
|
|||
|
|
import base64
|
|||
|
|
import json
|
|||
|
|
from pathlib import Path
|
|||
|
|
from openai import OpenAI
|
|||
|
|
|
|||
|
|
# Загрузить ключ из .env (рядом со скриптом)
|
|||
|
|
env_path = Path(__file__).parent / ".env"
|
|||
|
|
API_KEY = None
|
|||
|
|
if env_path.exists():
|
|||
|
|
for line in env_path.read_text().splitlines():
|
|||
|
|
if line.startswith("DASHSCOPE_API_KEY="):
|
|||
|
|
API_KEY = line.split("=", 1)[1].strip()
|
|||
|
|
os.environ["DASHSCOPE_API_KEY"] = API_KEY
|
|||
|
|
break
|
|||
|
|
|
|||
|
|
if not API_KEY:
|
|||
|
|
API_KEY = os.environ.get("DASHSCOPE_API_KEY")
|
|||
|
|
BASE_URL = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1"
|
|||
|
|
MODEL = "qwen-vl-ocr"
|
|||
|
|
|
|||
|
|
|
|||
|
|
def encode_image(image_path: Path) -> str:
|
|||
|
|
with open(image_path, "rb") as f:
|
|||
|
|
return base64.b64encode(f.read()).decode("utf-8")
|
|||
|
|
|
|||
|
|
|
|||
|
|
def test_ocr(image_path: Path):
|
|||
|
|
client = OpenAI(api_key=API_KEY, base_url=BASE_URL)
|
|||
|
|
|
|||
|
|
b64 = encode_image(image_path)
|
|||
|
|
data_url = f"data:image/png;base64,{b64}"
|
|||
|
|
|
|||
|
|
print(f"Отправляем {image_path.name} в qwen-vl-ocr...")
|
|||
|
|
print(f"Размер файла: {image_path.stat().st_size / 1024 / 1024:.1f} MB")
|
|||
|
|
|
|||
|
|
response = client.chat.completions.create(
|
|||
|
|
model=MODEL,
|
|||
|
|
messages=[
|
|||
|
|
{
|
|||
|
|
"role": "user",
|
|||
|
|
"content": [
|
|||
|
|
{
|
|||
|
|
"type": "text",
|
|||
|
|
"text": (
|
|||
|
|
"Распознай все текстовые элементы на этом чертеже. "
|
|||
|
|
"Для каждого текста укажи:\n"
|
|||
|
|
"- сам текст\n"
|
|||
|
|
"- координаты bbox (x1,y1,x2,y2)\n"
|
|||
|
|
"- confidence (если доступен)\n"
|
|||
|
|
"Ответь в формате JSON-массива."
|
|||
|
|
),
|
|||
|
|
},
|
|||
|
|
{"type": "image_url", "image_url": {"url": data_url}},
|
|||
|
|
],
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
temperature=0.1,
|
|||
|
|
max_tokens=2048,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
raw = response.choices[0].message.content
|
|||
|
|
print("\n=== ОТВЕТ МОДЕЛИ ===")
|
|||
|
|
print(raw[:2000])
|
|||
|
|
print("=" * 50)
|
|||
|
|
|
|||
|
|
# Сохранить результат
|
|||
|
|
out_path = image_path.parent / f"qwen_ocr_result_{image_path.stem}.json"
|
|||
|
|
with open(out_path, "w", encoding="utf-8") as f:
|
|||
|
|
f.write(raw)
|
|||
|
|
print(f"\n[OK] Сохранено: {out_path}")
|
|||
|
|
|
|||
|
|
|
|||
|
|
def describe_image(image_path: Path):
|
|||
|
|
"""Просто описание того, что модель видит на чертеже."""
|
|||
|
|
client = OpenAI(api_key=API_KEY, base_url=BASE_URL)
|
|||
|
|
|
|||
|
|
b64 = encode_image(image_path)
|
|||
|
|
data_url = f"data:image/png;base64,{b64}"
|
|||
|
|
|
|||
|
|
print(f"\nОтправляем {image_path.name} на описание...")
|
|||
|
|
|
|||
|
|
response = client.chat.completions.create(
|
|||
|
|
model=MODEL,
|
|||
|
|
messages=[
|
|||
|
|
{
|
|||
|
|
"role": "user",
|
|||
|
|
"content": [
|
|||
|
|
{
|
|||
|
|
"type": "text",
|
|||
|
|
"text": (
|
|||
|
|
"Опиши подробно, что ты видишь на этом изображении. "
|
|||
|
|
"Чертеж здания или что-то другое? Какие элементы видны? "
|
|||
|
|
"Размеры, текст, линии, оси — всё, что различимо."
|
|||
|
|
),
|
|||
|
|
},
|
|||
|
|
{"type": "image_url", "image_url": {"url": data_url}},
|
|||
|
|
],
|
|||
|
|
}
|
|||
|
|
],
|
|||
|
|
temperature=0.3,
|
|||
|
|
max_tokens=1024,
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
desc = response.choices[0].message.content
|
|||
|
|
print("\n=== ОПИСАНИЕ ===")
|
|||
|
|
print(desc)
|
|||
|
|
print("=" * 50)
|
|||
|
|
return desc
|
|||
|
|
|
|||
|
|
|
|||
|
|
def main():
|
|||
|
|
if len(sys.argv) < 2:
|
|||
|
|
print("Usage: python test_qwen_ocr.py <png_file> [--describe]")
|
|||
|
|
sys.exit(1)
|
|||
|
|
|
|||
|
|
image_path = Path(sys.argv[1])
|
|||
|
|
if not image_path.exists():
|
|||
|
|
print(f"[ERR] Файл не найден: {image_path}")
|
|||
|
|
sys.exit(1)
|
|||
|
|
|
|||
|
|
if "--describe" in sys.argv:
|
|||
|
|
describe_image(image_path)
|
|||
|
|
else:
|
|||
|
|
test_ocr(image_path)
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
main()
|