opencode/rag_query.py

194 lines
7.3 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Запросы к LightRAG индексу по чертежам.
Использование:
python rag_query.py <output_folder> "Какие размеры между осями А и Б?"
Режимы поиска:
naive прямой поиск по чанкам (без графа)
local поиск по локальному графу соседей сущностей
global поиск по глобальному контексту проекта
hybrid комбинация local + global (рекомендуется для чертежей)
"""
import os
import sys
import asyncio
import argparse
from pathlib import Path
from dotenv import load_dotenv
load_dotenv()
try:
from lightrag import LightRAG, QueryParam
from lightrag.utils import EmbeddingFunc
except ImportError as e:
print(f"[ERR] LightRAG не установлен: {e}")
sys.exit(1)
def get_openai_backend(model: str = "gpt-4o-mini"):
from lightrag.llm import openai_complete_if_cache, openai_embedding
async def llm_func(prompt, system_prompt=None, history_messages=[], **kwargs):
return await openai_complete_if_cache(
model, prompt, system_prompt=system_prompt,
history_messages=history_messages, **kwargs
)
async def embed_func(texts: list[str]) -> list[list[float]]:
return await openai_embedding(texts, model="text-embedding-3-small")
return llm_func, EmbeddingFunc(embedding_dim=1536, max_token_size=8192, func=embed_func)
def get_ollama_backend(model: str = "qwen2.5:14b", embed_model: str = "nomic-embed-text"):
from lightrag.llm import ollama_model_complete, ollama_embedding
async def llm_func(prompt, system_prompt=None, history_messages=[], **kwargs):
return await ollama_model_complete(
model, prompt, system_prompt=system_prompt,
history_messages=history_messages, **kwargs
)
async def embed_func(texts: list[str]) -> list[list[float]]:
return await ollama_embedding(texts, model=embed_model)
return llm_func, EmbeddingFunc(embedding_dim=768, max_token_size=8192, func=embed_func)
def get_lmstudio_backend(model: str = "qwen2.5:14b"):
"""LM Studio backend."""
from openai import AsyncOpenAI
from sentence_transformers import SentenceTransformer
base_url = os.environ.get("LMSTUDIO_URL", "http://127.0.0.1:1234/v1")
api_key = os.environ.get("LMSTUDIO_API_KEY", "lm-studio")
client = AsyncOpenAI(base_url=base_url, api_key=api_key)
async def llm_func(prompt, system_prompt=None, history_messages=[], **kwargs):
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
if history_messages:
messages.extend(history_messages)
messages.append({"role": "user", "content": prompt})
response = await client.chat.completions.create(
model=model,
messages=messages,
temperature=kwargs.get("temperature", 0.3),
max_tokens=kwargs.get("max_tokens", 1024),
)
return response.choices[0].message.content
embed_model = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
async def embed_func(texts: list[str]) -> list[list[float]]:
import numpy as np
return embed_model.encode(texts, convert_to_numpy=True)
return llm_func, EmbeddingFunc(embedding_dim=384, max_token_size=512, func=embed_func)
def get_opencode_backend(model: str = "nemotron-3-super-free"):
"""OpenCode backend (DeepSeek V4 Flash Free)."""
from openai import AsyncOpenAI
from sentence_transformers import SentenceTransformer
base_url = os.environ.get("OPENCODE_URL", "https://opencode.ai/zen/v1")
api_key = os.environ.get("OPENCODE_API_KEY", "")
client = AsyncOpenAI(base_url=base_url, api_key=api_key)
async def llm_func(prompt, system_prompt=None, history_messages=[], **kwargs):
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
if history_messages:
messages.extend(history_messages)
messages.append({"role": "user", "content": prompt})
response = await client.chat.completions.create(
model=model,
messages=messages,
temperature=kwargs.get("temperature", 0.3),
max_tokens=kwargs.get("max_tokens", 1024),
)
return response.choices[0].message.content
embed_model = SentenceTransformer("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
async def embed_func(texts: list[str]) -> list[list[float]]:
import numpy as np
return embed_model.encode(texts, convert_to_numpy=True)
return llm_func, EmbeddingFunc(embedding_dim=384, max_token_size=512, func=embed_func)
async def query_index(folder: Path, question: str, mode: str, backend: str, model: str, embed_model: str):
cache_dir = folder / "lightrag_cache"
if not cache_dir.exists():
print(f"[ERR] Не найден индекс {cache_dir}. Сначала запустите rag_indexer.py")
sys.exit(1)
print(f"[INIT] Загрузка индекса из {cache_dir} (backend={backend}, model={model})")
if backend == "openai":
llm_func, embed_cfg = get_openai_backend(model)
elif backend == "ollama":
llm_func, embed_cfg = get_ollama_backend(model, embed_model)
elif backend == "lmstudio":
llm_func, embed_cfg = get_lmstudio_backend(model)
elif backend == "opencode":
llm_func, embed_cfg = get_opencode_backend(model)
else:
print(f"[ERR] Неизвестный backend: {backend}")
sys.exit(1)
rag = LightRAG(
working_dir=str(cache_dir),
llm_model_func=llm_func,
embedding_func=embed_cfg,
)
print(f"[INIT] Инициализация хранилищ...")
await rag.initialize_storages()
print(f"[QUERY] Режим: {mode}")
print(f"[QUERY] Вопрос: {question}\n")
print("=" * 60)
result = await rag.aquery(question, param=QueryParam(mode=mode))
print("=" * 60)
print("\n[RESULT] Ответ:")
print(result)
print("")
def main():
parser = argparse.ArgumentParser(description="Запросы к LightRAG по чертежам")
parser.add_argument("folder", help="Папка с lightrag_cache")
parser.add_argument("question", help="Вопрос на естественном языке")
parser.add_argument("--mode", choices=["naive", "local", "global", "hybrid"],
default="hybrid", help="Режим поиска (default: hybrid)")
parser.add_argument("--backend", choices=["openai", "ollama", "lmstudio", "opencode"], default="openai")
parser.add_argument("--model", default="gpt-4o-mini")
parser.add_argument("--embed-model", default="nomic-embed-text")
args = parser.parse_args()
folder = Path(args.folder)
model = args.model
if args.backend in ("ollama", "lmstudio") and model == "gpt-4o-mini":
model = "qwen2.5:14b"
if args.backend == "opencode" and model == "gpt-4o-mini":
model = "mimo-v2.5-free"
asyncio.run(query_index(folder, args.question, args.mode, args.backend, model, args.embed_model))
if __name__ == "__main__":
main()