2025-04-22 20:21:38 +01:00
|
|
|
|
|
|
|
|
import asyncio
|
2025-07-30 23:18:38 +01:00
|
|
|
import logging
|
|
|
|
|
|
|
|
|
|
# Module logger
|
|
|
|
|
logger = logging.getLogger(__name__)
|
2025-04-22 20:21:38 +01:00
|
|
|
|
|
|
|
|
LABEL="http://www.w3.org/2000/01/rdf-schema#label"
|
|
|
|
|
|
|
|
|
|
class Query:
|
|
|
|
|
|
|
|
|
|
def __init__(
|
|
|
|
|
self, rag, user, collection, verbose,
|
|
|
|
|
doc_limit=20
|
|
|
|
|
):
|
|
|
|
|
self.rag = rag
|
|
|
|
|
self.user = user
|
|
|
|
|
self.collection = collection
|
|
|
|
|
self.verbose = verbose
|
|
|
|
|
self.doc_limit = doc_limit
|
|
|
|
|
|
|
|
|
|
async def get_vector(self, query):
|
|
|
|
|
|
|
|
|
|
if self.verbose:
|
2025-07-30 23:18:38 +01:00
|
|
|
logger.debug("Computing embeddings...")
|
2025-04-22 20:21:38 +01:00
|
|
|
|
|
|
|
|
qembeds = await self.rag.embeddings_client.embed(query)
|
|
|
|
|
|
|
|
|
|
if self.verbose:
|
2025-07-30 23:18:38 +01:00
|
|
|
logger.debug("Embeddings computed")
|
2025-04-22 20:21:38 +01:00
|
|
|
|
|
|
|
|
return qembeds
|
|
|
|
|
|
|
|
|
|
async def get_docs(self, query):
|
|
|
|
|
|
|
|
|
|
vectors = await self.get_vector(query)
|
|
|
|
|
|
|
|
|
|
if self.verbose:
|
2025-07-30 23:18:38 +01:00
|
|
|
logger.debug("Getting documents...")
|
2025-04-22 20:21:38 +01:00
|
|
|
|
|
|
|
|
docs = await self.rag.doc_embeddings_client.query(
|
|
|
|
|
vectors, limit=self.doc_limit,
|
|
|
|
|
user=self.user, collection=self.collection,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if self.verbose:
|
2025-07-30 23:18:38 +01:00
|
|
|
logger.debug("Documents:")
|
2025-04-22 20:21:38 +01:00
|
|
|
for doc in docs:
|
2025-07-30 23:18:38 +01:00
|
|
|
logger.debug(f" {doc}")
|
2025-04-22 20:21:38 +01:00
|
|
|
|
|
|
|
|
return docs
|
|
|
|
|
|
|
|
|
|
class DocumentRag:
|
|
|
|
|
|
|
|
|
|
def __init__(
|
|
|
|
|
self, prompt_client, embeddings_client, doc_embeddings_client,
|
|
|
|
|
verbose=False,
|
|
|
|
|
):
|
|
|
|
|
|
|
|
|
|
self.verbose = verbose
|
|
|
|
|
|
|
|
|
|
self.prompt_client = prompt_client
|
|
|
|
|
self.embeddings_client = embeddings_client
|
|
|
|
|
self.doc_embeddings_client = doc_embeddings_client
|
|
|
|
|
|
|
|
|
|
if self.verbose:
|
2025-07-30 23:18:38 +01:00
|
|
|
logger.debug("DocumentRag initialized")
|
2025-04-22 20:21:38 +01:00
|
|
|
|
|
|
|
|
async def query(
|
|
|
|
|
self, query, user="trustgraph", collection="default",
|
|
|
|
|
doc_limit=20,
|
|
|
|
|
):
|
|
|
|
|
|
|
|
|
|
if self.verbose:
|
2025-07-30 23:18:38 +01:00
|
|
|
logger.debug("Constructing prompt...")
|
2025-04-22 20:21:38 +01:00
|
|
|
|
|
|
|
|
q = Query(
|
|
|
|
|
rag=self, user=user, collection=collection, verbose=self.verbose,
|
|
|
|
|
doc_limit=doc_limit
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
docs = await q.get_docs(query)
|
|
|
|
|
|
|
|
|
|
if self.verbose:
|
2025-07-30 23:18:38 +01:00
|
|
|
logger.debug("Invoking LLM...")
|
|
|
|
|
logger.debug(f"Documents: {docs}")
|
|
|
|
|
logger.debug(f"Query: {query}")
|
2025-04-22 20:21:38 +01:00
|
|
|
|
|
|
|
|
resp = await self.prompt_client.document_prompt(
|
|
|
|
|
query = query,
|
|
|
|
|
documents = docs
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if self.verbose:
|
2025-07-30 23:18:38 +01:00
|
|
|
logger.debug("Query processing complete")
|
2025-04-22 20:21:38 +01:00
|
|
|
|
|
|
|
|
return resp
|
|
|
|
|
|