Fix doc embeddings invocation (#672)

* Fix doc embeddings invocation

* Tidy query embeddings invocation
This commit is contained in:
cybermaggedon 2026-03-09 11:07:32 +00:00 committed by GitHub
parent f2ae0e8623
commit b2ef7bbb8c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 27 additions and 9 deletions

View file

@ -599,6 +599,7 @@ class FlowInstance:
collection="scientists",
limit=5
)
# results contains {"entities": [{"entity": {...}, "score": 0.95}, ...]}
```
"""
@ -633,7 +634,7 @@ class FlowInstance:
limit: Maximum number of results (default: 10)
Returns:
dict: Query results with chunk_ids of matching document chunks
dict: Query results with chunks containing chunk_id and score
Example:
```python
@ -644,7 +645,7 @@ class FlowInstance:
collection="research-papers",
limit=5
)
# results contains {"chunk_ids": ["doc1/p0/c0", "doc2/p1/c3", ...]}
# results contains {"chunks": [{"chunk_id": "doc1/p0/c0", "score": 0.95}, ...]}
```
"""

View file

@ -695,7 +695,7 @@ class SocketFlowInstance:
collection="research-papers",
limit=5
)
# results contains {"chunk_ids": ["doc1/p0/c0", ...]}
# results contains {"chunks": [{"chunk_id": "...", "score": 0.95}, ...]}
```
"""
# First convert text to embedding vector

View file

@ -26,12 +26,14 @@ def query(url, flow_id, query_text, user, collection, limit, token=None):
limit=limit
)
chunk_ids = result.get("chunk_ids", [])
if not chunk_ids:
chunks = result.get("chunks", [])
if not chunks:
print("No matching chunks found.")
else:
for i, chunk_id in enumerate(chunk_ids, 1):
print(f"{i}. {chunk_id}")
for i, chunk in enumerate(chunks, 1):
chunk_id = chunk.get("chunk_id", "")
score = chunk.get("score", 0.0)
print(f"{i}. {chunk_id} (score: {score:.4f})")
finally:
# Clean up socket connection

View file

@ -27,8 +27,23 @@ def query(url, flow_id, query_text, user, collection, limit, token=None):
)
entities = result.get("entities", [])
for entity in entities:
print(entity)
if not entities:
print("No matching entities found.")
else:
for i, match in enumerate(entities, 1):
entity = match.get("entity", {})
score = match.get("score", 0.0)
# Format entity based on type (wire format uses compact keys)
term_type = entity.get("t", "")
if term_type == "i": # IRI
entity_str = entity.get("i", "")
elif term_type == "l": # Literal
entity_str = f'"{entity.get("v", "")}"'
elif term_type == "b": # Blank node
entity_str = f'_:{entity.get("d", "")}'
else:
entity_str = str(entity)
print(f"{i}. {entity_str} (score: {score:.4f})")
finally:
# Clean up socket connection