Added XML, JSON, CSV detection (#519)

* Improved XML detect, added schema selection

* Add schema select + tests

* API additions

* More tests

* Fixed tests
This commit is contained in:
cybermaggedon 2025-09-16 23:53:43 +01:00 committed by GitHub
parent 3d783f4bd4
commit 48016d8fb2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 1240 additions and 54 deletions

View file

@ -1,4 +1,4 @@
from pulsar.schema import Record, String, Map, Double
from pulsar.schema import Record, String, Map, Double, Array
from ..core.primitives import Error
############################################################################
@ -6,7 +6,7 @@ from ..core.primitives import Error
# Structured data diagnosis services
class StructuredDataDiagnosisRequest(Record):
operation = String() # "detect-type", "generate-descriptor", or "diagnose"
operation = String() # "detect-type", "generate-descriptor", "diagnose", or "schema-selection"
sample = String() # Data sample to analyze (text content)
type = String() # Data type (csv, json, xml) - optional, required for generate-descriptor
schema_name = String() # Target schema name for descriptor generation - optional
@ -27,4 +27,7 @@ class StructuredDataDiagnosisResponse(Record):
# JSON encoded additional metadata (e.g., field count, sample records)
metadata = Map(String())
# Array of matching schema IDs (for schema-selection operation) - optional
schema_matches = Array(String())
############################################################################