mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-07-03 15:01:00 +02:00
Embeddings API scores (#671)
- Put scores in all responses - Remove unused 'middle' vector layer. Vector of texts -> vector of (vector embedding)
This commit is contained in:
parent
4fa7cc7d7c
commit
f2ae0e8623
65 changed files with 1339 additions and 1292 deletions
|
|
@ -66,13 +66,13 @@ class Processor(FlowProcessor):
|
|||
)
|
||||
)
|
||||
|
||||
# vectors[0] is the vector set for the first (only) text
|
||||
vectors = resp.vectors[0] if resp.vectors else []
|
||||
# vectors[0] is the vector for the first (only) text
|
||||
vector = resp.vectors[0] if resp.vectors else []
|
||||
|
||||
embeds = [
|
||||
ChunkEmbeddings(
|
||||
chunk_id=v.document_id,
|
||||
vectors=vectors,
|
||||
vector=vector,
|
||||
)
|
||||
]
|
||||
|
||||
|
|
|
|||
|
|
@ -59,11 +59,8 @@ class Processor(EmbeddingsService):
|
|||
# FastEmbed processes the full batch efficiently
|
||||
vecs = list(self.embeddings.embed(texts))
|
||||
|
||||
# Return list of vector sets, one per input text
|
||||
return [
|
||||
[v.tolist()]
|
||||
for v in vecs
|
||||
]
|
||||
# Return list of vectors, one per input text
|
||||
return [v.tolist() for v in vecs]
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
|
|
|||
|
|
@ -72,10 +72,10 @@ class Processor(FlowProcessor):
|
|||
entities = [
|
||||
EntityEmbeddings(
|
||||
entity=entity.entity,
|
||||
vectors=vectors, # Vector set for this entity
|
||||
vector=vector,
|
||||
chunk_id=entity.chunk_id, # Provenance: source chunk
|
||||
)
|
||||
for entity, vectors in zip(v.entities, all_vectors)
|
||||
for entity, vector in zip(v.entities, all_vectors)
|
||||
]
|
||||
|
||||
# Send in batches to avoid oversized messages
|
||||
|
|
|
|||
|
|
@ -43,11 +43,8 @@ class Processor(EmbeddingsService):
|
|||
input = texts
|
||||
)
|
||||
|
||||
# Return list of vector sets, one per input text
|
||||
return [
|
||||
[embedding]
|
||||
for embedding in embeds.embeddings
|
||||
]
|
||||
# Return list of vectors, one per input text
|
||||
return list(embeds.embeddings)
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
|
|
|||
|
|
@ -208,7 +208,7 @@ class Processor(CollectionConfigHandler, FlowProcessor):
|
|||
all_vectors = await flow("embeddings-request").embed(texts=texts)
|
||||
|
||||
# Pair results with metadata
|
||||
for text, (index_name, index_value), vectors in zip(
|
||||
for text, (index_name, index_value), vector in zip(
|
||||
texts, metadata, all_vectors
|
||||
):
|
||||
embeddings_list.append(
|
||||
|
|
@ -216,7 +216,7 @@ class Processor(CollectionConfigHandler, FlowProcessor):
|
|||
index_name=index_name,
|
||||
index_value=index_value,
|
||||
text=text,
|
||||
vectors=vectors # Vector set for this text
|
||||
vector=vector
|
||||
)
|
||||
)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue