Embeddings API scores (#671)

- Put scores in all responses
- Remove unused 'middle' vector layer. Vector of texts -> vector of (vector embedding)
This commit is contained in:
cybermaggedon 2026-03-09 10:53:44 +00:00 committed by GitHub
parent 4fa7cc7d7c
commit f2ae0e8623
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
65 changed files with 1339 additions and 1292 deletions

View file

@ -66,13 +66,13 @@ class Processor(FlowProcessor):
)
)
# vectors[0] is the vector set for the first (only) text
vectors = resp.vectors[0] if resp.vectors else []
# vectors[0] is the vector for the first (only) text
vector = resp.vectors[0] if resp.vectors else []
embeds = [
ChunkEmbeddings(
chunk_id=v.document_id,
vectors=vectors,
vector=vector,
)
]

View file

@ -59,11 +59,8 @@ class Processor(EmbeddingsService):
# FastEmbed processes the full batch efficiently
vecs = list(self.embeddings.embed(texts))
# Return list of vector sets, one per input text
return [
[v.tolist()]
for v in vecs
]
# Return list of vectors, one per input text
return [v.tolist() for v in vecs]
@staticmethod
def add_args(parser):

View file

@ -72,10 +72,10 @@ class Processor(FlowProcessor):
entities = [
EntityEmbeddings(
entity=entity.entity,
vectors=vectors, # Vector set for this entity
vector=vector,
chunk_id=entity.chunk_id, # Provenance: source chunk
)
for entity, vectors in zip(v.entities, all_vectors)
for entity, vector in zip(v.entities, all_vectors)
]
# Send in batches to avoid oversized messages

View file

@ -43,11 +43,8 @@ class Processor(EmbeddingsService):
input = texts
)
# Return list of vector sets, one per input text
return [
[embedding]
for embedding in embeds.embeddings
]
# Return list of vectors, one per input text
return list(embeds.embeddings)
@staticmethod
def add_args(parser):

View file

@ -208,7 +208,7 @@ class Processor(CollectionConfigHandler, FlowProcessor):
all_vectors = await flow("embeddings-request").embed(texts=texts)
# Pair results with metadata
for text, (index_name, index_value), vectors in zip(
for text, (index_name, index_value), vector in zip(
texts, metadata, all_vectors
):
embeddings_list.append(
@ -216,7 +216,7 @@ class Processor(CollectionConfigHandler, FlowProcessor):
index_name=index_name,
index_value=index_value,
text=text,
vectors=vectors # Vector set for this text
vector=vector
)
)