mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 00:16:23 +02:00
Fix doc embeddings invocation (#672)
* Fix doc embeddings invocation * Tidy query embeddings invocation
This commit is contained in:
parent
f2ae0e8623
commit
b2ef7bbb8c
4 changed files with 27 additions and 9 deletions
|
|
@ -599,6 +599,7 @@ class FlowInstance:
|
||||||
collection="scientists",
|
collection="scientists",
|
||||||
limit=5
|
limit=5
|
||||||
)
|
)
|
||||||
|
# results contains {"entities": [{"entity": {...}, "score": 0.95}, ...]}
|
||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
@ -633,7 +634,7 @@ class FlowInstance:
|
||||||
limit: Maximum number of results (default: 10)
|
limit: Maximum number of results (default: 10)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
dict: Query results with chunk_ids of matching document chunks
|
dict: Query results with chunks containing chunk_id and score
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
```python
|
```python
|
||||||
|
|
@ -644,7 +645,7 @@ class FlowInstance:
|
||||||
collection="research-papers",
|
collection="research-papers",
|
||||||
limit=5
|
limit=5
|
||||||
)
|
)
|
||||||
# results contains {"chunk_ids": ["doc1/p0/c0", "doc2/p1/c3", ...]}
|
# results contains {"chunks": [{"chunk_id": "doc1/p0/c0", "score": 0.95}, ...]}
|
||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -695,7 +695,7 @@ class SocketFlowInstance:
|
||||||
collection="research-papers",
|
collection="research-papers",
|
||||||
limit=5
|
limit=5
|
||||||
)
|
)
|
||||||
# results contains {"chunk_ids": ["doc1/p0/c0", ...]}
|
# results contains {"chunks": [{"chunk_id": "...", "score": 0.95}, ...]}
|
||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
# First convert text to embedding vector
|
# First convert text to embedding vector
|
||||||
|
|
|
||||||
|
|
@ -26,12 +26,14 @@ def query(url, flow_id, query_text, user, collection, limit, token=None):
|
||||||
limit=limit
|
limit=limit
|
||||||
)
|
)
|
||||||
|
|
||||||
chunk_ids = result.get("chunk_ids", [])
|
chunks = result.get("chunks", [])
|
||||||
if not chunk_ids:
|
if not chunks:
|
||||||
print("No matching chunks found.")
|
print("No matching chunks found.")
|
||||||
else:
|
else:
|
||||||
for i, chunk_id in enumerate(chunk_ids, 1):
|
for i, chunk in enumerate(chunks, 1):
|
||||||
print(f"{i}. {chunk_id}")
|
chunk_id = chunk.get("chunk_id", "")
|
||||||
|
score = chunk.get("score", 0.0)
|
||||||
|
print(f"{i}. {chunk_id} (score: {score:.4f})")
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
# Clean up socket connection
|
# Clean up socket connection
|
||||||
|
|
|
||||||
|
|
@ -27,8 +27,23 @@ def query(url, flow_id, query_text, user, collection, limit, token=None):
|
||||||
)
|
)
|
||||||
|
|
||||||
entities = result.get("entities", [])
|
entities = result.get("entities", [])
|
||||||
for entity in entities:
|
if not entities:
|
||||||
print(entity)
|
print("No matching entities found.")
|
||||||
|
else:
|
||||||
|
for i, match in enumerate(entities, 1):
|
||||||
|
entity = match.get("entity", {})
|
||||||
|
score = match.get("score", 0.0)
|
||||||
|
# Format entity based on type (wire format uses compact keys)
|
||||||
|
term_type = entity.get("t", "")
|
||||||
|
if term_type == "i": # IRI
|
||||||
|
entity_str = entity.get("i", "")
|
||||||
|
elif term_type == "l": # Literal
|
||||||
|
entity_str = f'"{entity.get("v", "")}"'
|
||||||
|
elif term_type == "b": # Blank node
|
||||||
|
entity_str = f'_:{entity.get("d", "")}'
|
||||||
|
else:
|
||||||
|
entity_str = str(entity)
|
||||||
|
print(f"{i}. {entity_str} (score: {score:.4f})")
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
# Clean up socket connection
|
# Clean up socket connection
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue