mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-27 17:36:23 +02:00
Fix/document embeddings (#247)
* Update schema for doc embeddings * Rename embeddings-vectorize to graph-embeddings * Added document-embeddings processor (broken, needs fixing) * Added scripts * Fixed DE queue schema * Add missing DE process * Fix doc RAG processing, put graph-rag and doc-rag in appropriate component files.
This commit is contained in:
parent
c633652fd2
commit
6aa212061d
22 changed files with 421 additions and 189 deletions
|
|
@ -131,6 +131,35 @@ class Api:
|
|||
except:
|
||||
raise ProtocolException(f"Response not formatted correctly")
|
||||
|
||||
def document_rag(self, question):
|
||||
|
||||
# The input consists of a question
|
||||
input = {
|
||||
"query": question
|
||||
}
|
||||
|
||||
url = f"{self.url}document-rag"
|
||||
|
||||
# Invoke the API, input is passed as JSON
|
||||
resp = requests.post(url, json=input)
|
||||
|
||||
# Should be a 200 status code
|
||||
if resp.status_code != 200:
|
||||
raise ProtocolException(f"Status code {resp.status_code}")
|
||||
|
||||
try:
|
||||
# Parse the response as JSON
|
||||
object = resp.json()
|
||||
except:
|
||||
raise ProtocolException(f"Expected JSON response")
|
||||
|
||||
self.check_error(resp)
|
||||
|
||||
try:
|
||||
return object["response"]
|
||||
except:
|
||||
raise ProtocolException(f"Response not formatted correctly")
|
||||
|
||||
def embeddings(self, text):
|
||||
|
||||
# The input consists of a text block
|
||||
|
|
|
|||
|
|
@ -38,8 +38,12 @@ class DocumentEmbeddingsClient(BaseClient):
|
|||
output_schema=DocumentEmbeddingsResponse,
|
||||
)
|
||||
|
||||
def request(self, vectors, limit=10, timeout=300):
|
||||
def request(
|
||||
self, vectors, user="trustgraph", collection="default",
|
||||
limit=10, timeout=300
|
||||
):
|
||||
return self.call(
|
||||
user=user, collection=collection,
|
||||
vectors=vectors, limit=limit, timeout=timeout
|
||||
).documents
|
||||
|
||||
|
|
|
|||
|
|
@ -35,11 +35,28 @@ chunk_ingest_queue = topic('chunk-load')
|
|||
|
||||
############################################################################
|
||||
|
||||
# Document embeddings are embeddings associated with a chunk
|
||||
|
||||
class ChunkEmbeddings(Record):
|
||||
chunk = Bytes()
|
||||
vectors = Array(Array(Double()))
|
||||
|
||||
# This is a 'batching' mechanism for the above data
|
||||
class DocumentEmbeddings(Record):
|
||||
metadata = Metadata()
|
||||
chunks = Array(ChunkEmbeddings())
|
||||
|
||||
document_embeddings_store_queue = topic('document-embeddings-store')
|
||||
|
||||
############################################################################
|
||||
|
||||
# Doc embeddings query
|
||||
|
||||
class DocumentEmbeddingsRequest(Record):
|
||||
vectors = Array(Array(Double()))
|
||||
limit = Integer()
|
||||
user = String()
|
||||
collection = String()
|
||||
|
||||
class DocumentEmbeddingsResponse(Record):
|
||||
error = Error()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue