mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-26 00:46:22 +02:00
Structure data diagnosis service (#518)
* Import flow tech spec * Structured diag service * Plumbed into API gateway * Type detector * Diag service * Added entry point
This commit is contained in:
parent
d73af56690
commit
3d783f4bd4
13 changed files with 1201 additions and 3 deletions
|
|
@ -24,6 +24,7 @@ from .translators.embeddings_query import (
|
|||
from .translators.objects_query import ObjectsQueryRequestTranslator, ObjectsQueryResponseTranslator
|
||||
from .translators.nlp_query import QuestionToStructuredQueryRequestTranslator, QuestionToStructuredQueryResponseTranslator
|
||||
from .translators.structured_query import StructuredQueryRequestTranslator, StructuredQueryResponseTranslator
|
||||
from .translators.diagnosis import StructuredDataDiagnosisRequestTranslator, StructuredDataDiagnosisResponseTranslator
|
||||
|
||||
# Register all service translators
|
||||
TranslatorRegistry.register_service(
|
||||
|
|
@ -123,11 +124,17 @@ TranslatorRegistry.register_service(
|
|||
)
|
||||
|
||||
TranslatorRegistry.register_service(
|
||||
"structured-query",
|
||||
StructuredQueryRequestTranslator(),
|
||||
"structured-query",
|
||||
StructuredQueryRequestTranslator(),
|
||||
StructuredQueryResponseTranslator()
|
||||
)
|
||||
|
||||
TranslatorRegistry.register_service(
|
||||
"structured-diag",
|
||||
StructuredDataDiagnosisRequestTranslator(),
|
||||
StructuredDataDiagnosisResponseTranslator()
|
||||
)
|
||||
|
||||
# Register single-direction translators for document loading
|
||||
TranslatorRegistry.register_request("document", DocumentTranslator())
|
||||
TranslatorRegistry.register_request("text-document", TextDocumentTranslator())
|
||||
|
|
|
|||
|
|
@ -18,3 +18,4 @@ from .embeddings_query import (
|
|||
GraphEmbeddingsRequestTranslator, GraphEmbeddingsResponseTranslator
|
||||
)
|
||||
from .objects_query import ObjectsQueryRequestTranslator, ObjectsQueryResponseTranslator
|
||||
from .diagnosis import StructuredDataDiagnosisRequestTranslator, StructuredDataDiagnosisResponseTranslator
|
||||
|
|
|
|||
|
|
@ -0,0 +1,65 @@
|
|||
from typing import Dict, Any, Tuple
|
||||
import json
|
||||
from ...schema import StructuredDataDiagnosisRequest, StructuredDataDiagnosisResponse
|
||||
from .base import MessageTranslator
|
||||
|
||||
|
||||
class StructuredDataDiagnosisRequestTranslator(MessageTranslator):
|
||||
"""Translator for StructuredDataDiagnosisRequest schema objects"""
|
||||
|
||||
def to_pulsar(self, data: Dict[str, Any]) -> StructuredDataDiagnosisRequest:
|
||||
return StructuredDataDiagnosisRequest(
|
||||
operation=data["operation"],
|
||||
sample=data["sample"],
|
||||
type=data.get("type", ""),
|
||||
schema_name=data.get("schema-name", ""),
|
||||
options=data.get("options", {})
|
||||
)
|
||||
|
||||
def from_pulsar(self, obj: StructuredDataDiagnosisRequest) -> Dict[str, Any]:
|
||||
result = {
|
||||
"operation": obj.operation,
|
||||
"sample": obj.sample,
|
||||
}
|
||||
|
||||
# Add optional fields if they exist
|
||||
if obj.type:
|
||||
result["type"] = obj.type
|
||||
if obj.schema_name:
|
||||
result["schema-name"] = obj.schema_name
|
||||
if obj.options:
|
||||
result["options"] = obj.options
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class StructuredDataDiagnosisResponseTranslator(MessageTranslator):
|
||||
"""Translator for StructuredDataDiagnosisResponse schema objects"""
|
||||
|
||||
def to_pulsar(self, data: Dict[str, Any]) -> StructuredDataDiagnosisResponse:
|
||||
raise NotImplementedError("Response translation to Pulsar not typically needed")
|
||||
|
||||
def from_pulsar(self, obj: StructuredDataDiagnosisResponse) -> Dict[str, Any]:
|
||||
result = {
|
||||
"operation": obj.operation
|
||||
}
|
||||
|
||||
# Add optional response fields if they exist
|
||||
if obj.detected_type:
|
||||
result["detected-type"] = obj.detected_type
|
||||
if obj.confidence is not None:
|
||||
result["confidence"] = obj.confidence
|
||||
if obj.descriptor:
|
||||
# Parse JSON-encoded descriptor
|
||||
try:
|
||||
result["descriptor"] = json.loads(obj.descriptor)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
result["descriptor"] = obj.descriptor
|
||||
if obj.metadata:
|
||||
result["metadata"] = obj.metadata
|
||||
|
||||
return result
|
||||
|
||||
def from_response_with_completion(self, obj: StructuredDataDiagnosisResponse) -> Tuple[Dict[str, Any], bool]:
|
||||
"""Returns (response_dict, is_final)"""
|
||||
return self.from_pulsar(obj), True
|
||||
|
|
@ -9,4 +9,5 @@ from .library import *
|
|||
from .lookup import *
|
||||
from .nlp_query import *
|
||||
from .structured_query import *
|
||||
from .objects_query import *
|
||||
from .objects_query import *
|
||||
from .diagnosis import *
|
||||
30
trustgraph-base/trustgraph/schema/services/diagnosis.py
Normal file
30
trustgraph-base/trustgraph/schema/services/diagnosis.py
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
from pulsar.schema import Record, String, Map, Double
|
||||
from ..core.primitives import Error
|
||||
|
||||
############################################################################
|
||||
|
||||
# Structured data diagnosis services
|
||||
|
||||
class StructuredDataDiagnosisRequest(Record):
|
||||
operation = String() # "detect-type", "generate-descriptor", or "diagnose"
|
||||
sample = String() # Data sample to analyze (text content)
|
||||
type = String() # Data type (csv, json, xml) - optional, required for generate-descriptor
|
||||
schema_name = String() # Target schema name for descriptor generation - optional
|
||||
|
||||
# JSON encoded options (e.g., delimiter for CSV)
|
||||
options = Map(String())
|
||||
|
||||
class StructuredDataDiagnosisResponse(Record):
|
||||
error = Error()
|
||||
|
||||
operation = String() # The operation that was performed
|
||||
detected_type = String() # Detected data type (for detect-type/diagnose) - optional
|
||||
confidence = Double() # Confidence score for type detection - optional
|
||||
|
||||
# JSON encoded descriptor (for generate-descriptor/diagnose) - optional
|
||||
descriptor = String()
|
||||
|
||||
# JSON encoded additional metadata (e.g., field count, sample records)
|
||||
metadata = Map(String())
|
||||
|
||||
############################################################################
|
||||
Loading…
Add table
Add a link
Reference in a new issue