Added XML, JSON, CSV detection (#519)

* Improved XML detect, added schema selection * Add schema select + tests * API additions * More tests * Fixed tests
2026-04-26 00:46:22 +02:00 · 2025-09-16 23:53:43 +01:00 · 2025-09-16 23:53:43 +01:00 · 48016d8fb2
commit 48016d8fb2
parent 3d783f4bd4
10 changed files with 1240 additions and 54 deletions
--- a/trustgraph-base/trustgraph/api/flow.py
+++ b/trustgraph-base/trustgraph/api/flow.py
@ -492,12 +492,148 @@ class FlowInstance:
            "service/structured-query",
            input
        )
-        
+
        # Check for system-level error
        if "error" in response and response["error"]:
            error_type = response["error"].get("type", "unknown")
            error_message = response["error"].get("message", "Unknown error")
            raise ProtocolException(f"{error_type}: {error_message}")
-        
+
        return response

+    def detect_type(self, sample):
+        """
+        Detect the data type of a structured data sample.
+
+        Args:
+            sample: Data sample to analyze (string content)
+
+        Returns:
+            dict with detected_type, confidence, and optional metadata
+        """
+
+        input = {
+            "operation": "detect-type",
+            "sample": sample
+        }
+
+        response = self.request(
+            "service/structured-diag",
+            input
+        )
+
+        # Check for system-level error
+        if "error" in response and response["error"]:
+            error_type = response["error"].get("type", "unknown")
+            error_message = response["error"].get("message", "Unknown error")
+            raise ProtocolException(f"{error_type}: {error_message}")
+
+        return response["detected-type"]
+
+    def generate_descriptor(self, sample, data_type, schema_name, options=None):
+        """
+        Generate a descriptor for structured data mapping to a specific schema.
+
+        Args:
+            sample: Data sample to analyze (string content)
+            data_type: Data type (csv, json, xml)
+            schema_name: Target schema name for descriptor generation
+            options: Optional parameters (e.g., delimiter for CSV)
+
+        Returns:
+            dict with descriptor and metadata
+        """
+
+        input = {
+            "operation": "generate-descriptor",
+            "sample": sample,
+            "type": data_type,
+            "schema-name": schema_name
+        }
+
+        if options:
+            input["options"] = options
+
+        response = self.request(
+            "service/structured-diag",
+            input
+        )
+
+        # Check for system-level error
+        if "error" in response and response["error"]:
+            error_type = response["error"].get("type", "unknown")
+            error_message = response["error"].get("message", "Unknown error")
+            raise ProtocolException(f"{error_type}: {error_message}")
+
+        return response["descriptor"]
+
+    def diagnose_data(self, sample, schema_name=None, options=None):
+        """
+        Perform combined data diagnosis: detect type and generate descriptor.
+
+        Args:
+            sample: Data sample to analyze (string content)
+            schema_name: Optional target schema name for descriptor generation
+            options: Optional parameters (e.g., delimiter for CSV)
+
+        Returns:
+            dict with detected_type, confidence, descriptor, and metadata
+        """
+
+        input = {
+            "operation": "diagnose",
+            "sample": sample
+        }
+
+        if schema_name:
+            input["schema-name"] = schema_name
+
+        if options:
+            input["options"] = options
+
+        response = self.request(
+            "service/structured-diag",
+            input
+        )
+
+        # Check for system-level error
+        if "error" in response and response["error"]:
+            error_type = response["error"].get("type", "unknown")
+            error_message = response["error"].get("message", "Unknown error")
+            raise ProtocolException(f"{error_type}: {error_message}")
+
+        return response
+
+    def schema_selection(self, sample, options=None):
+        """
+        Select matching schemas for a data sample using prompt analysis.
+
+        Args:
+            sample: Data sample to analyze (string content)
+            options: Optional parameters
+
+        Returns:
+            dict with schema_matches array and metadata
+        """
+
+        input = {
+            "operation": "schema-selection",
+            "sample": sample
+        }
+
+        if options:
+            input["options"] = options
+
+        response = self.request(
+            "service/structured-diag",
+            input
+        )
+
+        # Check for system-level error
+        if "error" in response and response["error"]:
+            error_type = response["error"].get("type", "unknown")
+            error_message = response["error"].get("message", "Unknown error")
+            raise ProtocolException(f"{error_type}: {error_message}")
+
+        return response["schema-matches"]
+