Structured data 2 (#645)

* Structured data refactor - multi-index tables, remove need for manual mods to the Cassandra tables

* Tech spec updated to track implementation
This commit is contained in:
cybermaggedon 2026-02-23 15:56:29 +00:00 committed by GitHub
parent 5ffad92345
commit 1809c1f56d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
87 changed files with 5233 additions and 3235 deletions

View file

@ -1,5 +1,5 @@
"""
Uses the ObjectsQuery service to execute GraphQL queries against structured data
Uses the RowsQuery service to execute GraphQL queries against structured data
"""
import argparse
@ -81,7 +81,7 @@ def format_table_data(rows, table_name, output_format):
else:
return json.dumps({table_name: rows}, indent=2)
def objects_query(
def rows_query(
url, flow_id, query, user, collection, variables, operation_name, output_format='table'
):
@ -96,7 +96,7 @@ def objects_query(
print(f"Error parsing variables JSON: {e}", file=sys.stderr)
sys.exit(1)
resp = api.objects_query(
resp = api.rows_query(
query=query,
user=user,
collection=collection,
@ -126,7 +126,7 @@ def objects_query(
def main():
parser = argparse.ArgumentParser(
prog='tg-invoke-objects-query',
prog='tg-invoke-rows-query',
description=__doc__,
)
@ -181,7 +181,7 @@ def main():
try:
objects_query(
rows_query(
url=args.url,
flow_id=args.flow_id,
query=args.query,

View file

@ -573,19 +573,19 @@ def _process_data_pipeline(input_file, descriptor_file, user, collection, sample
return output_records, descriptor
def _send_to_trustgraph(objects, api_url, flow, batch_size=1000, token=None):
def _send_to_trustgraph(rows, api_url, flow, batch_size=1000, token=None):
"""Send ExtractedObject records to TrustGraph using Python API"""
from trustgraph.api import Api
try:
total_records = len(objects)
total_records = len(rows)
logger.info(f"Importing {total_records} records to TrustGraph...")
# Use Python API bulk import
api = Api(api_url, token=token)
bulk = api.bulk()
bulk.import_objects(flow=flow, objects=iter(objects))
bulk.import_rows(flow=flow, rows=iter(rows))
logger.info(f"Successfully imported {total_records} records to TrustGraph")