Additional user fixes and test fixes

2026-06-13 08:45:13 +02:00 · 2026-04-21 10:53:15 +01:00 · 2026-04-21 10:53:15 +01:00 · 7f0f79dd15
commit 7f0f79dd15
parent db05427d0e
62 changed files with 1078 additions and 1315 deletions
--- a/trustgraph-cli/trustgraph/cli/load_sample_documents.py
+++ b/trustgraph-cli/trustgraph/cli/load_sample_documents.py
@ -12,7 +12,6 @@ from trustgraph.api import Api
 from trustgraph.api.types import hash, Uri, Literal, Triple

 default_url = os.getenv("TRUSTGRAPH_URL", 'http://localhost:8088/')
-default_user = 'trustgraph'
 default_token = os.getenv("TRUSTGRAPH_TOKEN", None)
 default_workspace = os.getenv("TRUSTGRAPH_WORKSPACE", "default")

--- a/trustgraph-cli/trustgraph/cli/load_structured_data.py
+++ b/trustgraph-cli/trustgraph/cli/load_structured_data.py
@ -40,7 +40,6 @@ def load_structured_data(
    sample_chars: int = 500,
    schema_name: str = None,
    flow: str = 'default',
-    user: str = 'trustgraph',
    collection: str = 'default',
    dry_run: bool = False,
    verbose: bool = False,
@ -64,7 +63,6 @@ def load_structured_data(
        sample_chars: Maximum characters to read for sampling
        schema_name: Target schema name for generation
        flow: TrustGraph flow name to use for prompts
-        user: User name for metadata (default: trustgraph)
        collection: Collection name for metadata (default: default)
        dry_run: If True, validate but don't import data
        verbose: Enable verbose logging
@ -112,7 +110,7 @@ def load_structured_data(
        
        try:
            # Use shared pipeline for preview (small sample)
-            preview_objects, _ = _process_data_pipeline(input_file, temp_descriptor.name, user, collection, sample_size=5)
+            preview_objects, _ = _process_data_pipeline(input_file, temp_descriptor.name, collection, sample_size=5)
            
            # Show preview
            print("📊 Data Preview (first few records):")
@ -133,7 +131,7 @@ def load_structured_data(
                print("🚀 Importing data to TrustGraph...")
                
                # Use shared pipeline for full processing (no sample limit)
-                output_objects, descriptor = _process_data_pipeline(input_file, temp_descriptor.name, user, collection)
+                output_objects, descriptor = _process_data_pipeline(input_file, temp_descriptor.name, collection)
                
                # Get batch size from descriptor
                batch_size = descriptor.get('output', {}).get('options', {}).get('batch_size', 1000)
@ -244,7 +242,7 @@ def load_structured_data(
        logger.info(f"Parsing {input_file} with descriptor {descriptor_file}...")
        
        # Use shared pipeline
-        output_records, descriptor = _process_data_pipeline(input_file, descriptor_file, user, collection, sample_size)
+        output_records, descriptor = _process_data_pipeline(input_file, descriptor_file, collection, sample_size)
        
        # Output results
        if output_file:
@ -288,7 +286,7 @@ def load_structured_data(
        logger.info(f"Loading {input_file} to TrustGraph using descriptor {descriptor_file}...")
        
        # Use shared pipeline (no sample_size limit for full load)
-        output_records, descriptor = _process_data_pipeline(input_file, descriptor_file, user, collection)
+        output_records, descriptor = _process_data_pipeline(input_file, descriptor_file, collection)
        
        # Get batch size from descriptor or use default
        batch_size = descriptor.get('output', {}).get('options', {}).get('batch_size', 1000)
@ -529,18 +527,17 @@ def _apply_transformations(records, mappings):
    return processed_records


-def _format_extracted_objects(processed_records, descriptor, user, collection):
+def _format_extracted_objects(processed_records, descriptor, collection):
    """Convert to TrustGraph ExtractedObject format"""
    output_records = []
    schema_name = descriptor.get('output', {}).get('schema_name', 'default')
    confidence = descriptor.get('output', {}).get('options', {}).get('confidence', 0.9)
-    
+
    for record in processed_records:
        output_record = {
            "metadata": {
                "id": f"parsed-{len(output_records)+1}",
                "metadata": [],  # Empty metadata triples
-                "user": user,
                "collection": collection
            },
            "schema_name": schema_name,
@ -553,7 +550,7 @@ def _format_extracted_objects(processed_records, descriptor, user, collection):
    return output_records


-def _process_data_pipeline(input_file, descriptor_file, user, collection, sample_size=None):
+def _process_data_pipeline(input_file, descriptor_file, collection, sample_size=None):
    """Shared pipeline: load descriptor → read → parse → transform → format"""
    # Load descriptor configuration
    descriptor = _load_descriptor(descriptor_file)
@ -570,7 +567,7 @@ def _process_data_pipeline(input_file, descriptor_file, user, collection, sample
    processed_records = _apply_transformations(parsed_records, mappings)
    
    # Format output for TrustGraph ExtractedObject structure
-    output_records = _format_extracted_objects(processed_records, descriptor, user, collection)
+    output_records = _format_extracted_objects(processed_records, descriptor, collection)
    
    return output_records, descriptor

@ -1048,7 +1045,6 @@ For more information on the descriptor format, see:
            sample_chars=args.sample_chars,
            schema_name=args.schema_name,
            flow=args.flow,
-            user=args.user,
            collection=args.collection,
            dry_run=args.dry_run,
            verbose=args.verbose,