opencode/rag_query.py
Кирилл Блинов 95093736da Add dimension QC, DZI generator, web viewer, and fix RAG query bug
- dimension_qc_checker.py: rules-based QC for dimension chains, overlaps, crowding
- generate_dzi.py: Deep Zoom Image tile pyramid generator for OpenSeadragon
- generate_web_viewer.py: OpenSeadragon viewer with SVG overlays and issue feedback buttons
- rag_query.py: fix LightRAG remove_think_tags crash on None response from LLM
- .gitignore: add *.pdf, *.db, backend/uploads/, backend/outputs/
2026-06-01 12:30:07 +03:00

196 lines
7.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Запросы к LightRAG индексу по чертежам.
Использование:
python rag_query.py <output_folder> "Какие размеры между осями А и Б?"
Режимы поиска:
naive — прямой поиск по чанкам (без графа)
local — поиск по локальному графу соседей сущностей
global — поиск по глобальному контексту проекта
hybrid — комбинация local + global (рекомендуется для чертежей)
"""
import os
import sys
import asyncio
import argparse
from pathlib import Path
from dotenv import load_dotenv
load_dotenv()
try:
from lightrag import LightRAG, QueryParam
from lightrag.utils import EmbeddingFunc
except ImportError as e:
print(f"[ERR] LightRAG не установлен: {e}")
sys.exit(1)
def get_openai_backend(model: str = "gpt-4o-mini"):
from lightrag.llm import openai_complete_if_cache, openai_embedding
async def llm_func(prompt, system_prompt=None, history_messages=[], **kwargs):
return await openai_complete_if_cache(
model, prompt, system_prompt=system_prompt,
history_messages=history_messages, **kwargs
)
async def embed_func(texts: list[str]) -> list[list[float]]:
return await openai_embedding(texts, model="text-embedding-3-small")
return llm_func, EmbeddingFunc(embedding_dim=1536, max_token_size=8192, func=embed_func)
def get_ollama_backend(model: str = "qwen2.5:14b", embed_model: str = "nomic-embed-text"):
from lightrag.llm import ollama_model_complete, ollama_embedding
async def llm_func(prompt, system_prompt=None, history_messages=[], **kwargs):
return await ollama_model_complete(
model, prompt, system_prompt=system_prompt,
history_messages=history_messages, **kwargs
)
async def embed_func(texts: list[str]) -> list[list[float]]:
return await ollama_embedding(texts, model=embed_model)
return llm_func, EmbeddingFunc(embedding_dim=768, max_token_size=8192, func=embed_func)
def get_lmstudio_backend(model: str = "qwen2.5:14b"):
"""LM Studio backend."""
from openai import AsyncOpenAI
from sentence_transformers import SentenceTransformer
base_url = os.environ.get("LMSTUDIO_URL", "http://127.0.0.1:1234/v1")
api_key = os.environ.get("LMSTUDIO_API_KEY", "lm-studio")
client = AsyncOpenAI(base_url=base_url, api_key=api_key)
async def llm_func(prompt, system_prompt=None, history_messages=[], **kwargs):
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
if history_messages:
messages.extend(history_messages)
messages.append({"role": "user", "content": prompt})
response = await client.chat.completions.create(
model=model,
messages=messages,
temperature=kwargs.get("temperature", 0.3),
max_tokens=kwargs.get("max_tokens", 1024),
)
content = response.choices[0].message.content
return content if content is not None else ""
embed_model = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
async def embed_func(texts: list[str]) -> list[list[float]]:
import numpy as np
return embed_model.encode(texts, convert_to_numpy=True)
return llm_func, EmbeddingFunc(embedding_dim=384, max_token_size=512, func=embed_func)
def get_opencode_backend(model: str = "nemotron-3-super-free"):
"""OpenCode backend (DeepSeek V4 Flash Free)."""
from openai import AsyncOpenAI
from sentence_transformers import SentenceTransformer
base_url = os.environ.get("OPENCODE_URL", "https://opencode.ai/zen/v1")
api_key = os.environ.get("OPENCODE_API_KEY", "")
client = AsyncOpenAI(base_url=base_url, api_key=api_key)
async def llm_func(prompt, system_prompt=None, history_messages=[], **kwargs):
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
if history_messages:
messages.extend(history_messages)
messages.append({"role": "user", "content": prompt})
response = await client.chat.completions.create(
model=model,
messages=messages,
temperature=kwargs.get("temperature", 0.3),
max_tokens=kwargs.get("max_tokens", 1024),
)
content = response.choices[0].message.content
return content if content is not None else ""
embed_model = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
async def embed_func(texts: list[str]) -> list[list[float]]:
import numpy as np
return embed_model.encode(texts, convert_to_numpy=True)
return llm_func, EmbeddingFunc(embedding_dim=384, max_token_size=512, func=embed_func)
async def query_index(folder: Path, question: str, mode: str, backend: str, model: str, embed_model: str):
cache_dir = folder / "lightrag_cache"
if not cache_dir.exists():
print(f"[ERR] Не найден индекс {cache_dir}. Сначала запустите rag_indexer.py")
sys.exit(1)
print(f"[INIT] Загрузка индекса из {cache_dir} (backend={backend}, model={model})")
if backend == "openai":
llm_func, embed_cfg = get_openai_backend(model)
elif backend == "ollama":
llm_func, embed_cfg = get_ollama_backend(model, embed_model)
elif backend == "lmstudio":
llm_func, embed_cfg = get_lmstudio_backend(model)
elif backend == "opencode":
llm_func, embed_cfg = get_opencode_backend(model)
else:
print(f"[ERR] Неизвестный backend: {backend}")
sys.exit(1)
rag = LightRAG(
working_dir=str(cache_dir),
llm_model_func=llm_func,
embedding_func=embed_cfg,
)
print(f"[INIT] Инициализация хранилищ...")
await rag.initialize_storages()
print(f"[QUERY] Режим: {mode}")
print(f"[QUERY] Вопрос: {question}\n")
print("=" * 60)
result = await rag.aquery(question, param=QueryParam(mode=mode))
print("=" * 60)
print("\n[RESULT] Ответ:")
print(result)
print("")
def main():
parser = argparse.ArgumentParser(description="Запросы к LightRAG по чертежам")
parser.add_argument("folder", help="Папка с lightrag_cache")
parser.add_argument("question", help="Вопрос на естественном языке")
parser.add_argument("--mode", choices=["naive", "local", "global", "hybrid"],
default="hybrid", help="Режим поиска (default: hybrid)")
parser.add_argument("--backend", choices=["openai", "ollama", "lmstudio", "opencode"], default="openai")
parser.add_argument("--model", default="gpt-4o-mini")
parser.add_argument("--embed-model", default="nomic-embed-text")
args = parser.parse_args()
folder = Path(args.folder)
model = args.model
if args.backend in ("ollama", "lmstudio") and model == "gpt-4o-mini":
model = "qwen2.5:14b"
if args.backend == "opencode" and model == "gpt-4o-mini":
model = "mimo-v2.5-free"
asyncio.run(query_index(folder, args.question, args.mode, args.backend, model, args.embed_model))
if __name__ == "__main__":
main()