Merge 2.0 to master (#651)

This commit is contained in:
cybermaggedon 2026-02-28 11:03:14 +00:00 committed by GitHub
parent 3666ece2c5
commit b9d7bf9a8b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
212 changed files with 13940 additions and 6180 deletions

View file

@ -10,7 +10,7 @@ description = "TrustGraph provides a means to run a pipeline of flexible AI proc
readme = "README.md"
requires-python = ">=3.8"
dependencies = [
"trustgraph-base>=1.8,<1.9",
"trustgraph-base>=2.0,<2.1",
"requests",
"pulsar-client",
"aiohttp",
@ -43,9 +43,13 @@ tg-invoke-agent = "trustgraph.cli.invoke_agent:main"
tg-invoke-document-rag = "trustgraph.cli.invoke_document_rag:main"
tg-invoke-graph-rag = "trustgraph.cli.invoke_graph_rag:main"
tg-invoke-llm = "trustgraph.cli.invoke_llm:main"
tg-invoke-embeddings = "trustgraph.cli.invoke_embeddings:main"
tg-invoke-graph-embeddings = "trustgraph.cli.invoke_graph_embeddings:main"
tg-invoke-document-embeddings = "trustgraph.cli.invoke_document_embeddings:main"
tg-invoke-mcp-tool = "trustgraph.cli.invoke_mcp_tool:main"
tg-invoke-nlp-query = "trustgraph.cli.invoke_nlp_query:main"
tg-invoke-objects-query = "trustgraph.cli.invoke_objects_query:main"
tg-invoke-rows-query = "trustgraph.cli.invoke_rows_query:main"
tg-invoke-row-embeddings = "trustgraph.cli.invoke_row_embeddings:main"
tg-invoke-prompt = "trustgraph.cli.invoke_prompt:main"
tg-invoke-structured-query = "trustgraph.cli.invoke_structured_query:main"
tg-load-doc-embeds = "trustgraph.cli.load_doc_embeds:main"

View file

@ -0,0 +1,121 @@
"""
Queries document chunks by text similarity using vector embeddings.
Returns a list of matching document chunks, truncated to the specified length.
"""
import argparse
import os
from trustgraph.api import Api
default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
def truncate_chunk(chunk, max_length):
"""Truncate a chunk to max_length characters, adding ellipsis if needed."""
if len(chunk) <= max_length:
return chunk
return chunk[:max_length] + "..."
def query(url, flow_id, query_text, user, collection, limit, max_chunk_length, token=None):
# Create API client
api = Api(url=url, token=token)
socket = api.socket()
flow = socket.flow(flow_id)
try:
# Call document embeddings query service
result = flow.document_embeddings_query(
text=query_text,
user=user,
collection=collection,
limit=limit
)
chunks = result.get("chunks", [])
for i, chunk in enumerate(chunks, 1):
truncated = truncate_chunk(chunk, max_chunk_length)
print(f"{i}. {truncated}")
finally:
# Clean up socket connection
socket.close()
def main():
parser = argparse.ArgumentParser(
prog='tg-invoke-document-embeddings',
description=__doc__,
)
parser.add_argument(
'-u', '--url',
default=default_url,
help=f'API URL (default: {default_url})',
)
parser.add_argument(
'-t', '--token',
default=default_token,
help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
)
parser.add_argument(
'-f', '--flow-id',
default="default",
help=f'Flow ID (default: default)'
)
parser.add_argument(
'-U', '--user',
default="trustgraph",
help='User/keyspace (default: trustgraph)',
)
parser.add_argument(
'-c', '--collection',
default="default",
help='Collection (default: default)',
)
parser.add_argument(
'-l', '--limit',
type=int,
default=10,
help='Maximum number of results (default: 10)',
)
parser.add_argument(
'--max-chunk-length',
type=int,
default=200,
help='Truncate chunks to N characters (default: 200)',
)
parser.add_argument(
'query',
nargs=1,
help='Query text to search for similar document chunks',
)
args = parser.parse_args()
try:
query(
url=args.url,
flow_id=args.flow_id,
query_text=args.query[0],
user=args.user,
collection=args.collection,
limit=args.limit,
max_chunk_length=args.max_chunk_length,
token=args.token,
)
except Exception as e:
print("Exception:", e, flush=True)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,77 @@
"""
Invokes the embeddings service to convert text to a vector embedding.
Returns the embedding vector as a list of floats.
"""
import argparse
import os
from trustgraph.api import Api
default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
def query(url, flow_id, text, token=None):
# Create API client
api = Api(url=url, token=token)
socket = api.socket()
flow = socket.flow(flow_id)
try:
# Call embeddings service
result = flow.embeddings(text=text)
vectors = result.get("vectors", [])
print(vectors)
finally:
# Clean up socket connection
socket.close()
def main():
parser = argparse.ArgumentParser(
prog='tg-invoke-embeddings',
description=__doc__,
)
parser.add_argument(
'-u', '--url',
default=default_url,
help=f'API URL (default: {default_url})',
)
parser.add_argument(
'-t', '--token',
default=default_token,
help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
)
parser.add_argument(
'-f', '--flow-id',
default="default",
help=f'Flow ID (default: default)'
)
parser.add_argument(
'text',
nargs=1,
help='Text to convert to embedding vector',
)
args = parser.parse_args()
try:
query(
url=args.url,
flow_id=args.flow_id,
text=args.text[0],
token=args.token,
)
except Exception as e:
print("Exception:", e, flush=True)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,106 @@
"""
Queries graph entities by text similarity using vector embeddings.
Returns a list of matching graph entities.
"""
import argparse
import os
from trustgraph.api import Api
default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
def query(url, flow_id, query_text, user, collection, limit, token=None):
# Create API client
api = Api(url=url, token=token)
socket = api.socket()
flow = socket.flow(flow_id)
try:
# Call graph embeddings query service
result = flow.graph_embeddings_query(
text=query_text,
user=user,
collection=collection,
limit=limit
)
entities = result.get("entities", [])
for entity in entities:
print(entity)
finally:
# Clean up socket connection
socket.close()
def main():
parser = argparse.ArgumentParser(
prog='tg-invoke-graph-embeddings',
description=__doc__,
)
parser.add_argument(
'-u', '--url',
default=default_url,
help=f'API URL (default: {default_url})',
)
parser.add_argument(
'-t', '--token',
default=default_token,
help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
)
parser.add_argument(
'-f', '--flow-id',
default="default",
help=f'Flow ID (default: default)'
)
parser.add_argument(
'-U', '--user',
default="trustgraph",
help='User/keyspace (default: trustgraph)',
)
parser.add_argument(
'-c', '--collection',
default="default",
help='Collection (default: default)',
)
parser.add_argument(
'-l', '--limit',
type=int,
default=10,
help='Maximum number of results (default: 10)',
)
parser.add_argument(
'query',
nargs=1,
help='Query text to search for similar graph entities',
)
args = parser.parse_args()
try:
query(
url=args.url,
flow_id=args.flow_id,
query_text=args.query[0],
user=args.user,
collection=args.collection,
limit=args.limit,
token=args.token,
)
except Exception as e:
print("Exception:", e, flush=True)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,126 @@
"""
Queries row data by text similarity using vector embeddings on indexed fields.
Returns matching rows with their index values and similarity scores.
"""
import argparse
import os
from trustgraph.api import Api
default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
def query(url, flow_id, query_text, schema_name, user, collection, index_name, limit, token=None):
# Create API client
api = Api(url=url, token=token)
socket = api.socket()
flow = socket.flow(flow_id)
try:
# Call row embeddings query service
result = flow.row_embeddings_query(
text=query_text,
schema_name=schema_name,
user=user,
collection=collection,
index_name=index_name,
limit=limit
)
matches = result.get("matches", [])
for match in matches:
print(f"Index: {match['index_name']}")
print(f" Values: {match['index_value']}")
print(f" Text: {match['text']}")
print(f" Score: {match['score']:.4f}")
print()
finally:
# Clean up socket connection
socket.close()
def main():
parser = argparse.ArgumentParser(
prog='tg-invoke-row-embeddings',
description=__doc__,
)
parser.add_argument(
'-u', '--url',
default=default_url,
help=f'API URL (default: {default_url})',
)
parser.add_argument(
'-t', '--token',
default=default_token,
help='Authentication token (default: $TRUSTGRAPH_TOKEN)',
)
parser.add_argument(
'-f', '--flow-id',
default="default",
help=f'Flow ID (default: default)'
)
parser.add_argument(
'-U', '--user',
default="trustgraph",
help='User/keyspace (default: trustgraph)',
)
parser.add_argument(
'-c', '--collection',
default="default",
help='Collection (default: default)',
)
parser.add_argument(
'-s', '--schema-name',
required=True,
help='Schema name to search within (required)',
)
parser.add_argument(
'-i', '--index-name',
default=None,
help='Index name to filter search (optional)',
)
parser.add_argument(
'-l', '--limit',
type=int,
default=10,
help='Maximum number of results (default: 10)',
)
parser.add_argument(
'query',
nargs=1,
help='Query text to search for similar row index values',
)
args = parser.parse_args()
try:
query(
url=args.url,
flow_id=args.flow_id,
query_text=args.query[0],
schema_name=args.schema_name,
user=args.user,
collection=args.collection,
index_name=args.index_name,
limit=args.limit,
token=args.token,
)
except Exception as e:
print("Exception:", e, flush=True)
if __name__ == "__main__":
main()

View file

@ -1,5 +1,5 @@
"""
Uses the ObjectsQuery service to execute GraphQL queries against structured data
Uses the RowsQuery service to execute GraphQL queries against structured data
"""
import argparse
@ -81,7 +81,7 @@ def format_table_data(rows, table_name, output_format):
else:
return json.dumps({table_name: rows}, indent=2)
def objects_query(
def rows_query(
url, flow_id, query, user, collection, variables, operation_name, output_format='table'
):
@ -96,7 +96,7 @@ def objects_query(
print(f"Error parsing variables JSON: {e}", file=sys.stderr)
sys.exit(1)
resp = api.objects_query(
resp = api.rows_query(
query=query,
user=user,
collection=collection,
@ -126,7 +126,7 @@ def objects_query(
def main():
parser = argparse.ArgumentParser(
prog='tg-invoke-objects-query',
prog='tg-invoke-rows-query',
description=__doc__,
)
@ -181,7 +181,7 @@ def main():
try:
objects_query(
rows_query(
url=args.url,
flow_id=args.flow_id,
query=args.query,

View file

@ -87,13 +87,20 @@ class KnowledgeLoader:
# Load triples from all files
print("Loading triples...")
total_triples = 0
for file in self.files:
print(f" Processing {file}...")
triples = self.load_triples_from_file(file)
count = 0
def counting_triples():
nonlocal count
for triple in self.load_triples_from_file(file):
count += 1
yield triple
bulk.import_triples(
flow=self.flow,
triples=triples,
triples=counting_triples(),
metadata={
"id": self.document_id,
"metadata": [],
@ -101,25 +108,33 @@ class KnowledgeLoader:
"collection": self.collection
}
)
print(f" Loaded {count} triples")
total_triples += count
print("Triples loaded.")
print(f"Triples loaded. Total: {total_triples}")
# Load entity contexts from all files
print("Loading entity contexts...")
total_contexts = 0
for file in self.files:
print(f" Processing {file}...")
count = 0
# Convert tuples to the format expected by import_entity_contexts
# Entity must be in Term format: {"t": "i", "i": uri} for IRI
def entity_context_generator():
nonlocal count
for entity, context in self.load_entity_contexts_from_file(file):
count += 1
# Entities from RDF are URIs, use IRI term format
yield {
"entity": {"v": entity, "e": True},
"entity": {"t": "i", "i": entity},
"context": context
}
bulk.import_entity_contexts(
flow=self.flow,
entities=entity_context_generator(),
contexts=entity_context_generator(),
metadata={
"id": self.document_id,
"metadata": [],
@ -127,8 +142,10 @@ class KnowledgeLoader:
"collection": self.collection
}
)
print(f" Loaded {count} entity contexts")
total_contexts += count
print("Entity contexts loaded.")
print(f"Entity contexts loaded. Total: {total_contexts}")
except Exception as e:
print(f"Error: {e}", flush=True)

View file

@ -573,19 +573,19 @@ def _process_data_pipeline(input_file, descriptor_file, user, collection, sample
return output_records, descriptor
def _send_to_trustgraph(objects, api_url, flow, batch_size=1000, token=None):
def _send_to_trustgraph(rows, api_url, flow, batch_size=1000, token=None):
"""Send ExtractedObject records to TrustGraph using Python API"""
from trustgraph.api import Api
try:
total_records = len(objects)
total_records = len(rows)
logger.info(f"Importing {total_records} records to TrustGraph...")
# Use Python API bulk import
api = Api(api_url, token=token)
bulk = api.bulk()
bulk.import_objects(flow=flow, objects=iter(objects))
bulk.import_rows(flow=flow, rows=iter(rows))
logger.info(f"Successfully imported {total_records} records to TrustGraph")

View file

@ -2,8 +2,9 @@
Configures and registers tools in the TrustGraph system.
This script allows you to define agent tools with various types including:
- knowledge-query: Query knowledge bases
- knowledge-query: Query knowledge bases
- structured-query: Query structured data using natural language
- row-embeddings-query: Semantic search on structured data indexes
- text-completion: Text generation
- mcp-tool: Reference to MCP (Model Context Protocol) tools
- prompt: Prompt template execution
@ -64,6 +65,9 @@ def set_tool(
mcp_tool : str,
collection : str,
template : str,
schema_name : str,
index_name : str,
limit : int,
arguments : List[Argument],
group : List[str],
state : str,
@ -89,6 +93,12 @@ def set_tool(
if template: object["template"] = template
if schema_name: object["schema-name"] = schema_name
if index_name: object["index-name"] = index_name
if limit: object["limit"] = limit
if arguments:
object["arguments"] = [
{
@ -120,30 +130,37 @@ def main():
description=__doc__,
epilog=textwrap.dedent('''
Valid tool types:
knowledge-query - Query knowledge bases (fixed args)
structured-query - Query structured data using natural language (fixed args)
text-completion - Text completion/generation (fixed args)
mcp-tool - Model Control Protocol tool (configurable args)
prompt - Prompt template query (configurable args)
Note: Tools marked "(fixed args)" have predefined arguments and don't need
knowledge-query - Query knowledge bases (fixed args)
structured-query - Query structured data using natural language (fixed args)
row-embeddings-query - Semantic search on structured data indexes (fixed args)
text-completion - Text completion/generation (fixed args)
mcp-tool - Model Control Protocol tool (configurable args)
prompt - Prompt template query (configurable args)
Note: Tools marked "(fixed args)" have predefined arguments and don't need
--argument specified. Tools marked "(configurable args)" require --argument.
Valid argument types:
string - String/text parameter
string - String/text parameter
number - Numeric parameter
Examples:
%(prog)s --id weather_tool --name get_weather \\
--type knowledge-query \\
--description "Get weather information for a location" \\
--collection weather_data
%(prog)s --id data_query_tool --name query_data \\
--type structured-query \\
--description "Query structured data using natural language" \\
--collection sales_data
%(prog)s --id customer_search --name find_customer \\
--type row-embeddings-query \\
--description "Find customers by name using semantic search" \\
--schema-name customers --collection sales \\
--index-name full_name --limit 20
%(prog)s --id calc_tool --name calculate --type mcp-tool \\
--description "Perform mathematical calculations" \\
--mcp-tool calculator \\
@ -181,7 +198,7 @@ def main():
parser.add_argument(
'--type',
help=f'Tool type, one of: knowledge-query, structured-query, text-completion, mcp-tool, prompt',
help=f'Tool type, one of: knowledge-query, structured-query, row-embeddings-query, text-completion, mcp-tool, prompt',
)
parser.add_argument(
@ -191,7 +208,23 @@ def main():
parser.add_argument(
'--collection',
help=f'For knowledge-query and structured-query types: collection to query',
help=f'For knowledge-query, structured-query, and row-embeddings-query types: collection to query',
)
parser.add_argument(
'--schema-name',
help=f'For row-embeddings-query type: schema name to search within (required)',
)
parser.add_argument(
'--index-name',
help=f'For row-embeddings-query type: specific index to filter search (optional)',
)
parser.add_argument(
'--limit',
type=int,
help=f'For row-embeddings-query type: maximum results to return (default: 10)',
)
parser.add_argument(
@ -227,7 +260,8 @@ def main():
try:
valid_types = [
"knowledge-query", "structured-query", "text-completion", "mcp-tool", "prompt"
"knowledge-query", "structured-query", "row-embeddings-query",
"text-completion", "mcp-tool", "prompt"
]
if args.id is None:
@ -261,6 +295,9 @@ def main():
mcp_tool=mcp_tool,
collection=args.collection,
template=args.template,
schema_name=args.schema_name,
index_name=args.index_name,
limit=args.limit,
arguments=arguments,
group=args.group,
state=args.state,

View file

@ -4,8 +4,9 @@ Displays the current agent tool configurations
Shows all configured tools including their types:
- knowledge-query: Tools that query knowledge bases
- structured-query: Tools that query structured data using natural language
- row-embeddings-query: Tools for semantic search on structured data indexes
- text-completion: Tools for text generation
- mcp-tool: References to MCP (Model Context Protocol) tools
- mcp-tool: References to MCP (Model Context Protocol) tools
- prompt: Tools that execute prompt templates
"""
@ -41,11 +42,19 @@ def show_config(url, token=None):
if tp == "mcp-tool":
table.append(("mcp-tool", data["mcp-tool"]))
if tp == "knowledge-query" or tp == "structured-query":
if tp in ("knowledge-query", "structured-query", "row-embeddings-query"):
if "collection" in data:
table.append(("collection", data["collection"]))
if tp == "row-embeddings-query":
if "schema-name" in data:
table.append(("schema-name", data["schema-name"]))
if "index-name" in data:
table.append(("index-name", data["index-name"]))
if "limit" in data:
table.append(("limit", data["limit"]))
if tp == "prompt":
table.append(("template", data["template"]))
for n, arg in enumerate(data["arguments"]):