Messaging fabric plugins (#592)

* Plugin architecture for messaging fabric

* Schemas use a technology neutral expression

* Schemas strictness has uncovered some incorrect schema use which is fixed
This commit is contained in:
cybermaggedon 2025-12-17 21:40:43 +00:00 committed by GitHub
parent 1865b3f3c8
commit 34eb083836
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
100 changed files with 2342 additions and 828 deletions

View file

@ -1,16 +1,14 @@
from pulsar.schema import Record, String, Array
from dataclasses import dataclass, field
from .primitives import Triple
class Metadata(Record):
@dataclass
class Metadata:
# Source identifier
id = String()
id: str = ""
# Subgraph
metadata = Array(Triple())
metadata: list[Triple] = field(default_factory=list)
# Collection management
user = String()
collection = String()
user: str = ""
collection: str = ""

View file

@ -1,34 +1,39 @@
from pulsar.schema import Record, String, Boolean, Array, Integer
from dataclasses import dataclass, field
class Error(Record):
type = String()
message = String()
@dataclass
class Error:
type: str = ""
message: str = ""
class Value(Record):
value = String()
is_uri = Boolean()
type = String()
@dataclass
class Value:
value: str = ""
is_uri: bool = False
type: str = ""
class Triple(Record):
s = Value()
p = Value()
o = Value()
@dataclass
class Triple:
s: Value | None = None
p: Value | None = None
o: Value | None = None
class Field(Record):
name = String()
@dataclass
class Field:
name: str = ""
# int, string, long, bool, float, double, timestamp
type = String()
size = Integer()
primary = Boolean()
description = String()
type: str = ""
size: int = 0
primary: bool = False
description: str = ""
# NEW FIELDS for structured data:
required = Boolean() # Whether field is required
enum_values = Array(String()) # For enum type fields
indexed = Boolean() # Whether field should be indexed
required: bool = False # Whether field is required
enum_values: list[str] = field(default_factory=list) # For enum type fields
indexed: bool = False # Whether field should be indexed
class RowSchema(Record):
name = String()
description = String()
fields = Array(Field())
@dataclass
class RowSchema:
name: str = ""
description: str = ""
fields: list[Field] = field(default_factory=list)

View file

@ -1,4 +1,23 @@
def topic(topic, kind='persistent', tenant='tg', namespace='flow'):
return f"{kind}://{tenant}/{namespace}/{topic}"
def topic(queue_name, qos='q1', tenant='tg', namespace='flow'):
"""
Create a generic topic identifier that can be mapped by backends.
Args:
queue_name: The queue/topic name
qos: Quality of service
- 'q0' = best-effort (no ack)
- 'q1' = at-least-once (ack required)
- 'q2' = exactly-once (two-phase ack)
tenant: Tenant identifier for multi-tenancy
namespace: Namespace within tenant
Returns:
Generic topic string: qos/tenant/namespace/queue_name
Examples:
topic('my-queue') # q1/tg/flow/my-queue
topic('config', qos='q2', namespace='config') # q2/tg/config/config
"""
return f"{qos}/{tenant}/{namespace}/{queue_name}"

View file

@ -1,4 +1,4 @@
from pulsar.schema import Record, Bytes
from dataclasses import dataclass
from ..core.metadata import Metadata
from ..core.topic import topic
@ -6,24 +6,27 @@ from ..core.topic import topic
############################################################################
# PDF docs etc.
class Document(Record):
metadata = Metadata()
data = Bytes()
@dataclass
class Document:
metadata: Metadata | None = None
data: bytes = b""
############################################################################
# Text documents / text from PDF
class TextDocument(Record):
metadata = Metadata()
text = Bytes()
@dataclass
class TextDocument:
metadata: Metadata | None = None
text: bytes = b""
############################################################################
# Chunks of text
class Chunk(Record):
metadata = Metadata()
chunk = Bytes()
@dataclass
class Chunk:
metadata: Metadata | None = None
chunk: bytes = b""
############################################################################
############################################################################

View file

@ -1,4 +1,4 @@
from pulsar.schema import Record, Bytes, String, Boolean, Integer, Array, Double, Map
from dataclasses import dataclass, field
from ..core.metadata import Metadata
from ..core.primitives import Value, RowSchema
@ -8,49 +8,55 @@ from ..core.topic import topic
# Graph embeddings are embeddings associated with a graph entity
class EntityEmbeddings(Record):
entity = Value()
vectors = Array(Array(Double()))
@dataclass
class EntityEmbeddings:
entity: Value | None = None
vectors: list[list[float]] = field(default_factory=list)
# This is a 'batching' mechanism for the above data
class GraphEmbeddings(Record):
metadata = Metadata()
entities = Array(EntityEmbeddings())
@dataclass
class GraphEmbeddings:
metadata: Metadata | None = None
entities: list[EntityEmbeddings] = field(default_factory=list)
############################################################################
# Document embeddings are embeddings associated with a chunk
class ChunkEmbeddings(Record):
chunk = Bytes()
vectors = Array(Array(Double()))
@dataclass
class ChunkEmbeddings:
chunk: bytes = b""
vectors: list[list[float]] = field(default_factory=list)
# This is a 'batching' mechanism for the above data
class DocumentEmbeddings(Record):
metadata = Metadata()
chunks = Array(ChunkEmbeddings())
@dataclass
class DocumentEmbeddings:
metadata: Metadata | None = None
chunks: list[ChunkEmbeddings] = field(default_factory=list)
############################################################################
# Object embeddings are embeddings associated with the primary key of an
# object
class ObjectEmbeddings(Record):
metadata = Metadata()
vectors = Array(Array(Double()))
name = String()
key_name = String()
id = String()
@dataclass
class ObjectEmbeddings:
metadata: Metadata | None = None
vectors: list[list[float]] = field(default_factory=list)
name: str = ""
key_name: str = ""
id: str = ""
############################################################################
# Structured object embeddings with enhanced capabilities
class StructuredObjectEmbedding(Record):
metadata = Metadata()
vectors = Array(Array(Double()))
schema_name = String()
object_id = String() # Primary key value
field_embeddings = Map(Array(Double())) # Per-field embeddings
@dataclass
class StructuredObjectEmbedding:
metadata: Metadata | None = None
vectors: list[list[float]] = field(default_factory=list)
schema_name: str = ""
object_id: str = "" # Primary key value
field_embeddings: dict[str, list[float]] = field(default_factory=dict) # Per-field embeddings
############################################################################
############################################################################

View file

@ -1,4 +1,4 @@
from pulsar.schema import Record, String, Array
from dataclasses import dataclass, field
from ..core.primitives import Value, Triple
from ..core.metadata import Metadata
@ -8,21 +8,24 @@ from ..core.topic import topic
# Entity context are an entity associated with textual context
class EntityContext(Record):
entity = Value()
context = String()
@dataclass
class EntityContext:
entity: Value | None = None
context: str = ""
# This is a 'batching' mechanism for the above data
class EntityContexts(Record):
metadata = Metadata()
entities = Array(EntityContext())
@dataclass
class EntityContexts:
metadata: Metadata | None = None
entities: list[EntityContext] = field(default_factory=list)
############################################################################
# Graph triples
class Triples(Record):
metadata = Metadata()
triples = Array(Triple())
@dataclass
class Triples:
metadata: Metadata | None = None
triples: list[Triple] = field(default_factory=list)
############################################################################
############################################################################

View file

@ -1,5 +1,4 @@
from pulsar.schema import Record, Bytes, String, Array, Long, Boolean
from dataclasses import dataclass, field
from ..core.primitives import Triple, Error
from ..core.topic import topic
from ..core.metadata import Metadata
@ -22,40 +21,40 @@ from .embeddings import GraphEmbeddings
# <- ()
# <- (error)
class KnowledgeRequest(Record):
@dataclass
class KnowledgeRequest:
# get-kg-core, delete-kg-core, list-kg-cores, put-kg-core
# load-kg-core, unload-kg-core
operation = String()
operation: str = ""
# list-kg-cores, delete-kg-core, put-kg-core
user = String()
user: str = ""
# get-kg-core, list-kg-cores, delete-kg-core, put-kg-core,
# load-kg-core, unload-kg-core
id = String()
id: str = ""
# load-kg-core
flow = String()
flow: str = ""
# load-kg-core
collection = String()
collection: str = ""
# put-kg-core
triples = Triples()
graph_embeddings = GraphEmbeddings()
triples: Triples | None = None
graph_embeddings: GraphEmbeddings | None = None
class KnowledgeResponse(Record):
error = Error()
ids = Array(String())
eos = Boolean() # Indicates end of knowledge core stream
triples = Triples()
graph_embeddings = GraphEmbeddings()
@dataclass
class KnowledgeResponse:
error: Error | None = None
ids: list[str] = field(default_factory=list)
eos: bool = False # Indicates end of knowledge core stream
triples: Triples | None = None
graph_embeddings: GraphEmbeddings | None = None
knowledge_request_queue = topic(
'knowledge', kind='non-persistent', namespace='request'
'knowledge', qos='q0', namespace='request'
)
knowledge_response_queue = topic(
'knowledge', kind='non-persistent', namespace='response',
'knowledge', qos='q0', namespace='response',
)

View file

@ -1,4 +1,4 @@
from pulsar.schema import Record, String, Boolean
from dataclasses import dataclass
from ..core.topic import topic
@ -6,21 +6,25 @@ from ..core.topic import topic
# NLP extraction data types
class Definition(Record):
name = String()
definition = String()
@dataclass
class Definition:
name: str = ""
definition: str = ""
class Topic(Record):
name = String()
definition = String()
@dataclass
class Topic:
name: str = ""
definition: str = ""
class Relationship(Record):
s = String()
p = String()
o = String()
o_entity = Boolean()
@dataclass
class Relationship:
s: str = ""
p: str = ""
o: str = ""
o_entity: bool = False
class Fact(Record):
s = String()
p = String()
o = String()
@dataclass
class Fact:
s: str = ""
p: str = ""
o: str = ""

View file

@ -1,4 +1,4 @@
from pulsar.schema import Record, String, Map, Double, Array
from dataclasses import dataclass, field
from ..core.metadata import Metadata
from ..core.topic import topic
@ -7,11 +7,13 @@ from ..core.topic import topic
# Extracted object from text processing
class ExtractedObject(Record):
metadata = Metadata()
schema_name = String() # Which schema this object belongs to
values = Array(Map(String())) # Array of objects, each object is field name -> value
confidence = Double()
source_span = String() # Text span where object was found
@dataclass
class ExtractedObject:
metadata: Metadata | None = None
schema_name: str = "" # Which schema this object belongs to
values: list[dict[str, str]] = field(default_factory=list) # Array of objects, each object is field name -> value
confidence: float = 0.0
source_span: str = "" # Text span where object was found
############################################################################
############################################################################

View file

@ -1,4 +1,4 @@
from pulsar.schema import Record, Array, Map, String
from dataclasses import dataclass, field
from ..core.metadata import Metadata
from ..core.primitives import RowSchema
@ -8,9 +8,10 @@ from ..core.topic import topic
# Stores rows of information
class Rows(Record):
metadata = Metadata()
row_schema = RowSchema()
rows = Array(Map(String()))
@dataclass
class Rows:
metadata: Metadata | None = None
row_schema: RowSchema | None = None
rows: list[dict[str, str]] = field(default_factory=list)
############################################################################
############################################################################

View file

@ -1,4 +1,4 @@
from pulsar.schema import Record, String, Bytes, Map
from dataclasses import dataclass, field
from ..core.metadata import Metadata
from ..core.topic import topic
@ -7,11 +7,13 @@ from ..core.topic import topic
# Structured data submission for fire-and-forget processing
class StructuredDataSubmission(Record):
metadata = Metadata()
format = String() # "json", "csv", "xml"
schema_name = String() # Reference to schema in config
data = Bytes() # Raw data to ingest
options = Map(String()) # Format-specific options
@dataclass
class StructuredDataSubmission:
metadata: Metadata | None = None
format: str = "" # "json", "csv", "xml"
schema_name: str = "" # Reference to schema in config
data: bytes = b"" # Raw data to ingest
options: dict[str, str] = field(default_factory=dict) # Format-specific options
############################################################################
############################################################################

View file

@ -1,5 +1,5 @@
from pulsar.schema import Record, String, Array, Map, Boolean
from dataclasses import dataclass, field
from ..core.topic import topic
from ..core.primitives import Error
@ -8,33 +8,36 @@ from ..core.primitives import Error
# Prompt services, abstract the prompt generation
class AgentStep(Record):
thought = String()
action = String()
arguments = Map(String())
observation = String()
user = String() # User context for the step
@dataclass
class AgentStep:
thought: str = ""
action: str = ""
arguments: dict[str, str] = field(default_factory=dict)
observation: str = ""
user: str = "" # User context for the step
class AgentRequest(Record):
question = String()
state = String()
group = Array(String())
history = Array(AgentStep())
user = String() # User context for multi-tenancy
streaming = Boolean() # NEW: Enable streaming response delivery (default false)
@dataclass
class AgentRequest:
question: str = ""
state: str = ""
group: list[str] | None = None
history: list[AgentStep] = field(default_factory=list)
user: str = "" # User context for multi-tenancy
streaming: bool = False # NEW: Enable streaming response delivery (default false)
class AgentResponse(Record):
@dataclass
class AgentResponse:
# Streaming-first design
chunk_type = String() # "thought", "action", "observation", "answer", "error"
content = String() # The actual content (interpretation depends on chunk_type)
end_of_message = Boolean() # Current chunk type (thought/action/etc.) is complete
end_of_dialog = Boolean() # Entire agent dialog is complete
chunk_type: str = "" # "thought", "action", "observation", "answer", "error"
content: str = "" # The actual content (interpretation depends on chunk_type)
end_of_message: bool = False # Current chunk type (thought/action/etc.) is complete
end_of_dialog: bool = False # Entire agent dialog is complete
# Legacy fields (deprecated but kept for backward compatibility)
answer = String()
error = Error()
thought = String()
observation = String()
answer: str = ""
error: Error | None = None
thought: str = ""
observation: str = ""
############################################################################

View file

@ -1,4 +1,4 @@
from pulsar.schema import Record, String, Integer, Array
from dataclasses import dataclass, field
from datetime import datetime
from ..core.primitives import Error
@ -10,37 +10,40 @@ from ..core.topic import topic
# Collection metadata operations (for librarian service)
class CollectionMetadata(Record):
@dataclass
class CollectionMetadata:
"""Collection metadata record"""
user = String()
collection = String()
name = String()
description = String()
tags = Array(String())
user: str = ""
collection: str = ""
name: str = ""
description: str = ""
tags: list[str] = field(default_factory=list)
############################################################################
class CollectionManagementRequest(Record):
@dataclass
class CollectionManagementRequest:
"""Request for collection management operations"""
operation = String() # e.g., "delete-collection"
operation: str = "" # e.g., "delete-collection"
# For 'list-collections'
user = String()
collection = String()
timestamp = String() # ISO timestamp
name = String()
description = String()
tags = Array(String())
user: str = ""
collection: str = ""
timestamp: str = "" # ISO timestamp
name: str = ""
description: str = ""
tags: list[str] = field(default_factory=list)
# For list
tag_filter = Array(String()) # Optional filter by tags
limit = Integer()
tag_filter: list[str] = field(default_factory=list) # Optional filter by tags
limit: int = 0
class CollectionManagementResponse(Record):
@dataclass
class CollectionManagementResponse:
"""Response for collection management operations"""
error = Error() # Only populated if there's an error
timestamp = String() # ISO timestamp
collections = Array(CollectionMetadata())
error: Error | None = None # Only populated if there's an error
timestamp: str = "" # ISO timestamp
collections: list[CollectionMetadata] = field(default_factory=list)
############################################################################
@ -48,8 +51,9 @@ class CollectionManagementResponse(Record):
# Topics
collection_request_queue = topic(
'collection', kind='non-persistent', namespace='request'
'collection', qos='q0', namespace='request'
)
collection_response_queue = topic(
'collection', kind='non-persistent', namespace='response'
'collection', qos='q0', namespace='response'
)

View file

@ -1,5 +1,5 @@
from pulsar.schema import Record, Bytes, String, Boolean, Array, Map, Integer
from dataclasses import dataclass, field
from ..core.topic import topic
from ..core.primitives import Error
@ -13,58 +13,61 @@ from ..core.primitives import Error
# put(values) -> ()
# delete(keys) -> ()
# config() -> (version, config)
class ConfigKey(Record):
type = String()
key = String()
@dataclass
class ConfigKey:
type: str = ""
key: str = ""
class ConfigValue(Record):
type = String()
key = String()
value = String()
@dataclass
class ConfigValue:
type: str = ""
key: str = ""
value: str = ""
# Prompt services, abstract the prompt generation
class ConfigRequest(Record):
operation = String() # get, list, getvalues, delete, put, config
@dataclass
class ConfigRequest:
operation: str = "" # get, list, getvalues, delete, put, config
# get, delete
keys = Array(ConfigKey())
keys: list[ConfigKey] = field(default_factory=list)
# list, getvalues
type = String()
type: str = ""
# put
values = Array(ConfigValue())
class ConfigResponse(Record):
values: list[ConfigValue] = field(default_factory=list)
@dataclass
class ConfigResponse:
# get, list, getvalues, config
version = Integer()
version: int = 0
# get, getvalues
values = Array(ConfigValue())
values: list[ConfigValue] = field(default_factory=list)
# list
directory = Array(String())
directory: list[str] = field(default_factory=list)
# config
config = Map(Map(String()))
config: dict[str, dict[str, str]] = field(default_factory=dict)
# Everything
error = Error()
error: Error | None = None
class ConfigPush(Record):
version = Integer()
config = Map(Map(String()))
@dataclass
class ConfigPush:
version: int = 0
config: dict[str, dict[str, str]] = field(default_factory=dict)
config_request_queue = topic(
'config', kind='non-persistent', namespace='request'
'config', qos='q0', namespace='request'
)
config_response_queue = topic(
'config', kind='non-persistent', namespace='response'
'config', qos='q0', namespace='response'
)
config_push_queue = topic(
'config', kind='persistent', namespace='config'
'config', qos='q2', namespace='config'
)
############################################################################

View file

@ -1,33 +1,36 @@
from pulsar.schema import Record, String, Map, Double, Array
from dataclasses import dataclass, field
from ..core.primitives import Error
############################################################################
# Structured data diagnosis services
class StructuredDataDiagnosisRequest(Record):
operation = String() # "detect-type", "generate-descriptor", "diagnose", or "schema-selection"
sample = String() # Data sample to analyze (text content)
type = String() # Data type (csv, json, xml) - optional, required for generate-descriptor
schema_name = String() # Target schema name for descriptor generation - optional
@dataclass
class StructuredDataDiagnosisRequest:
operation: str = "" # "detect-type", "generate-descriptor", "diagnose", or "schema-selection"
sample: str = "" # Data sample to analyze (text content)
type: str = "" # Data type (csv, json, xml) - optional, required for generate-descriptor
schema_name: str = "" # Target schema name for descriptor generation - optional
# JSON encoded options (e.g., delimiter for CSV)
options = Map(String())
options: dict[str, str] = field(default_factory=dict)
class StructuredDataDiagnosisResponse(Record):
error = Error()
@dataclass
class StructuredDataDiagnosisResponse:
error: Error | None = None
operation = String() # The operation that was performed
detected_type = String() # Detected data type (for detect-type/diagnose) - optional
confidence = Double() # Confidence score for type detection - optional
operation: str = "" # The operation that was performed
detected_type: str = "" # Detected data type (for detect-type/diagnose) - optional
confidence: float = 0.0 # Confidence score for type detection - optional
# JSON encoded descriptor (for generate-descriptor/diagnose) - optional
descriptor = String()
descriptor: str = ""
# JSON encoded additional metadata (e.g., field count, sample records)
metadata = Map(String())
metadata: dict[str, str] = field(default_factory=dict)
# Array of matching schema IDs (for schema-selection operation) - optional
schema_matches = Array(String())
schema_matches: list[str] = field(default_factory=list)
############################################################################
############################################################################

View file

@ -1,5 +1,5 @@
from pulsar.schema import Record, Bytes, String, Boolean, Array, Map, Integer
from dataclasses import dataclass, field
from ..core.topic import topic
from ..core.primitives import Error
@ -11,61 +11,61 @@ from ..core.primitives import Error
# get_class(classname) -> (class)
# put_class(class) -> (class)
# delete_class(classname) -> ()
#
#
# list_flows() -> (flowid[])
# get_flow(flowid) -> (flow)
# start_flow(flowid, classname) -> ()
# stop_flow(flowid) -> ()
# Prompt services, abstract the prompt generation
class FlowRequest(Record):
operation = String() # list-classes, get-class, put-class, delete-class
@dataclass
class FlowRequest:
operation: str = "" # list-classes, get-class, put-class, delete-class
# list-flows, get-flow, start-flow, stop-flow
# get_class, put_class, delete_class, start_flow
class_name = String()
class_name: str = ""
# put_class
class_definition = String()
class_definition: str = ""
# start_flow
description = String()
description: str = ""
# get_flow, start_flow, stop_flow
flow_id = String()
flow_id: str = ""
# start_flow - optional parameters for flow customization
parameters = Map(String())
class FlowResponse(Record):
parameters: dict[str, str] = field(default_factory=dict)
@dataclass
class FlowResponse:
# list_classes
class_names = Array(String())
class_names: list[str] = field(default_factory=list)
# list_flows
flow_ids = Array(String())
flow_ids: list[str] = field(default_factory=list)
# get_class
class_definition = String()
class_definition: str = ""
# get_flow
flow = String()
flow: str = ""
# get_flow
description = String()
description: str = ""
# get_flow - parameters used when flow was started
parameters = Map(String())
parameters: dict[str, str] = field(default_factory=dict)
# Everything
error = Error()
error: Error | None = None
flow_request_queue = topic(
'flow', kind='non-persistent', namespace='request'
'flow', qos='q0', namespace='request'
)
flow_response_queue = topic(
'flow', kind='non-persistent', namespace='response'
'flow', qos='q0', namespace='response'
)
############################################################################

View file

@ -1,9 +1,8 @@
from pulsar.schema import Record, Bytes, String, Array, Long
from dataclasses import dataclass, field
from ..core.primitives import Triple, Error
from ..core.topic import topic
from ..core.metadata import Metadata
from ..knowledge.document import Document, TextDocument
# Note: Document imports will be updated after knowledge schemas are converted
# add-document
# -> (document_id, document_metadata, content)
@ -50,76 +49,79 @@ from ..knowledge.document import Document, TextDocument
# <- (processing_metadata[])
# <- (error)
class DocumentMetadata(Record):
id = String()
time = Long()
kind = String()
title = String()
comments = String()
metadata = Array(Triple())
user = String()
tags = Array(String())
@dataclass
class DocumentMetadata:
id: str = ""
time: int = 0
kind: str = ""
title: str = ""
comments: str = ""
metadata: list[Triple] = field(default_factory=list)
user: str = ""
tags: list[str] = field(default_factory=list)
class ProcessingMetadata(Record):
id = String()
document_id = String()
time = Long()
flow = String()
user = String()
collection = String()
tags = Array(String())
@dataclass
class ProcessingMetadata:
id: str = ""
document_id: str = ""
time: int = 0
flow: str = ""
user: str = ""
collection: str = ""
tags: list[str] = field(default_factory=list)
class Criteria(Record):
key = String()
value = String()
operator = String()
class LibrarianRequest(Record):
@dataclass
class Criteria:
key: str = ""
value: str = ""
operator: str = ""
@dataclass
class LibrarianRequest:
# add-document, remove-document, update-document, get-document-metadata,
# get-document-content, add-processing, remove-processing, list-documents,
# list-processing
operation = String()
operation: str = ""
# add-document, remove-document, update-document, get-document-metadata,
# get-document-content
document_id = String()
document_id: str = ""
# add-processing, remove-processing
processing_id = String()
processing_id: str = ""
# add-document, update-document
document_metadata = DocumentMetadata()
document_metadata: DocumentMetadata | None = None
# add-processing
processing_metadata = ProcessingMetadata()
processing_metadata: ProcessingMetadata | None = None
# add-document
content = Bytes()
content: bytes = b""
# list-documents, list-processing
user = String()
user: str = ""
# list-documents?, list-processing?
collection = String()
collection: str = ""
#
criteria = Array(Criteria())
#
criteria: list[Criteria] = field(default_factory=list)
class LibrarianResponse(Record):
error = Error()
document_metadata = DocumentMetadata()
content = Bytes()
document_metadatas = Array(DocumentMetadata())
processing_metadatas = Array(ProcessingMetadata())
@dataclass
class LibrarianResponse:
error: Error | None = None
document_metadata: DocumentMetadata | None = None
content: bytes = b""
document_metadatas: list[DocumentMetadata] = field(default_factory=list)
processing_metadatas: list[ProcessingMetadata] = field(default_factory=list)
# FIXME: Is this right? Using persistence on librarian so that
# message chunking works
librarian_request_queue = topic(
'librarian', kind='persistent', namespace='request'
'librarian', qos='q1', namespace='request'
)
librarian_response_queue = topic(
'librarian', kind='persistent', namespace='response',
'librarian', qos='q1', namespace='response',
)

View file

@ -1,5 +1,5 @@
from pulsar.schema import Record, String, Array, Double, Integer, Boolean
from dataclasses import dataclass, field
from ..core.topic import topic
from ..core.primitives import Error
@ -8,46 +8,49 @@ from ..core.primitives import Error
# LLM text completion
class TextCompletionRequest(Record):
system = String()
prompt = String()
streaming = Boolean() # Default false for backward compatibility
@dataclass
class TextCompletionRequest:
system: str = ""
prompt: str = ""
streaming: bool = False # Default false for backward compatibility
class TextCompletionResponse(Record):
error = Error()
response = String()
in_token = Integer()
out_token = Integer()
model = String()
end_of_stream = Boolean() # Indicates final message in stream
@dataclass
class TextCompletionResponse:
error: Error | None = None
response: str = ""
in_token: int = 0
out_token: int = 0
model: str = ""
end_of_stream: bool = False # Indicates final message in stream
############################################################################
# Embeddings
class EmbeddingsRequest(Record):
text = String()
@dataclass
class EmbeddingsRequest:
text: str = ""
class EmbeddingsResponse(Record):
error = Error()
vectors = Array(Array(Double()))
@dataclass
class EmbeddingsResponse:
error: Error | None = None
vectors: list[list[float]] = field(default_factory=list)
############################################################################
# Tool request/response
class ToolRequest(Record):
name = String()
@dataclass
class ToolRequest:
name: str = ""
# Parameters are JSON encoded
parameters = String()
class ToolResponse(Record):
error = Error()
parameters: str = ""
@dataclass
class ToolResponse:
error: Error | None = None
# Plain text aka "unstructured"
text = String()
text: str = ""
# JSON-encoded object aka "structured"
object = String()
object: str = ""

View file

@ -1,5 +1,4 @@
from pulsar.schema import Record, String
from dataclasses import dataclass
from ..core.primitives import Error, Value, Triple
from ..core.topic import topic
@ -9,13 +8,14 @@ from ..core.metadata import Metadata
# Lookups
class LookupRequest(Record):
kind = String()
term = String()
@dataclass
class LookupRequest:
kind: str = ""
term: str = ""
class LookupResponse(Record):
text = String()
error = Error()
@dataclass
class LookupResponse:
text: str = ""
error: Error | None = None
############################################################################

View file

@ -1,4 +1,4 @@
from pulsar.schema import Record, String, Array, Map, Integer, Double
from dataclasses import dataclass, field
from ..core.primitives import Error
from ..core.topic import topic
@ -7,15 +7,18 @@ from ..core.topic import topic
# NLP to Structured Query Service - converts natural language to GraphQL
class QuestionToStructuredQueryRequest(Record):
question = String()
max_results = Integer()
@dataclass
class QuestionToStructuredQueryRequest:
question: str = ""
max_results: int = 0
class QuestionToStructuredQueryResponse(Record):
error = Error()
graphql_query = String() # Generated GraphQL query
variables = Map(String()) # GraphQL variables if any
detected_schemas = Array(String()) # Which schemas the query targets
confidence = Double()
@dataclass
class QuestionToStructuredQueryResponse:
error: Error | None = None
graphql_query: str = "" # Generated GraphQL query
variables: dict[str, str] = field(default_factory=dict) # GraphQL variables if any
detected_schemas: list[str] = field(default_factory=list) # Which schemas the query targets
confidence: float = 0.0
############################################################################

View file

@ -1,4 +1,4 @@
from pulsar.schema import Record, String, Map, Array
from dataclasses import dataclass, field
from ..core.primitives import Error
from ..core.topic import topic
@ -7,22 +7,25 @@ from ..core.topic import topic
# Objects Query Service - executes GraphQL queries against structured data
class GraphQLError(Record):
message = String()
path = Array(String()) # Path to the field that caused the error
extensions = Map(String()) # Additional error metadata
@dataclass
class GraphQLError:
message: str = ""
path: list[str] = field(default_factory=list) # Path to the field that caused the error
extensions: dict[str, str] = field(default_factory=dict) # Additional error metadata
class ObjectsQueryRequest(Record):
user = String() # Cassandra keyspace (follows pattern from TriplesQueryRequest)
collection = String() # Data collection identifier (required for partition key)
query = String() # GraphQL query string
variables = Map(String()) # GraphQL variables
operation_name = String() # Operation to execute for multi-operation documents
@dataclass
class ObjectsQueryRequest:
user: str = "" # Cassandra keyspace (follows pattern from TriplesQueryRequest)
collection: str = "" # Data collection identifier (required for partition key)
query: str = "" # GraphQL query string
variables: dict[str, str] = field(default_factory=dict) # GraphQL variables
operation_name: str = "" # Operation to execute for multi-operation documents
class ObjectsQueryResponse(Record):
error = Error() # System-level error (connection, timeout, etc.)
data = String() # JSON-encoded GraphQL response data
errors = Array(GraphQLError()) # GraphQL field-level errors
extensions = Map(String()) # Query metadata (execution time, etc.)
@dataclass
class ObjectsQueryResponse:
error: Error | None = None # System-level error (connection, timeout, etc.)
data: str = "" # JSON-encoded GraphQL response data
errors: list[GraphQLError] = field(default_factory=list) # GraphQL field-level errors
extensions: dict[str, str] = field(default_factory=dict) # Query metadata (execution time, etc.)
############################################################################
############################################################################

View file

@ -1,4 +1,4 @@
from pulsar.schema import Record, String, Map, Boolean
from dataclasses import dataclass, field
from ..core.primitives import Error
from ..core.topic import topic
@ -18,27 +18,28 @@ from ..core.topic import topic
# extract-rows
# schema, chunk -> rows
class PromptRequest(Record):
id = String()
@dataclass
class PromptRequest:
id: str = ""
# JSON encoded values
terms = Map(String())
terms: dict[str, str] = field(default_factory=dict)
# Streaming support (default false for backward compatibility)
streaming = Boolean()
class PromptResponse(Record):
streaming: bool = False
@dataclass
class PromptResponse:
# Error case
error = Error()
error: Error | None = None
# Just plain text
text = String()
text: str = ""
# JSON encoded
object = String()
object: str = ""
# Indicates final message in stream
end_of_stream = Boolean()
end_of_stream: bool = False
############################################################################

View file

@ -1,4 +1,4 @@
from pulsar.schema import Record, String, Integer, Array, Double
from dataclasses import dataclass, field
from ..core.primitives import Error, Value, Triple
from ..core.topic import topic
@ -7,49 +7,55 @@ from ..core.topic import topic
# Graph embeddings query
class GraphEmbeddingsRequest(Record):
vectors = Array(Array(Double()))
limit = Integer()
user = String()
collection = String()
@dataclass
class GraphEmbeddingsRequest:
vectors: list[list[float]] = field(default_factory=list)
limit: int = 0
user: str = ""
collection: str = ""
class GraphEmbeddingsResponse(Record):
error = Error()
entities = Array(Value())
@dataclass
class GraphEmbeddingsResponse:
error: Error | None = None
entities: list[Value] = field(default_factory=list)
############################################################################
# Graph triples query
class TriplesQueryRequest(Record):
user = String()
collection = String()
s = Value()
p = Value()
o = Value()
limit = Integer()
@dataclass
class TriplesQueryRequest:
user: str = ""
collection: str = ""
s: Value | None = None
p: Value | None = None
o: Value | None = None
limit: int = 0
class TriplesQueryResponse(Record):
error = Error()
triples = Array(Triple())
@dataclass
class TriplesQueryResponse:
error: Error | None = None
triples: list[Triple] = field(default_factory=list)
############################################################################
# Doc embeddings query
class DocumentEmbeddingsRequest(Record):
vectors = Array(Array(Double()))
limit = Integer()
user = String()
collection = String()
@dataclass
class DocumentEmbeddingsRequest:
vectors: list[list[float]] = field(default_factory=list)
limit: int = 0
user: str = ""
collection: str = ""
class DocumentEmbeddingsResponse(Record):
error = Error()
chunks = Array(String())
@dataclass
class DocumentEmbeddingsResponse:
error: Error | None = None
chunks: list[str] = field(default_factory=list)
document_embeddings_request_queue = topic(
"non-persistent://trustgraph/document-embeddings-request"
"document-embeddings-request", qos='q0', tenant='trustgraph', namespace='flow'
)
document_embeddings_response_queue = topic(
"non-persistent://trustgraph/document-embeddings-response"
"document-embeddings-response", qos='q0', tenant='trustgraph', namespace='flow'
)

View file

@ -1,5 +1,4 @@
from pulsar.schema import Record, Bytes, String, Boolean, Integer, Array, Double
from dataclasses import dataclass
from ..core.topic import topic
from ..core.primitives import Error, Value
@ -7,36 +6,37 @@ from ..core.primitives import Error, Value
# Graph RAG text retrieval
class GraphRagQuery(Record):
query = String()
user = String()
collection = String()
entity_limit = Integer()
triple_limit = Integer()
max_subgraph_size = Integer()
max_path_length = Integer()
streaming = Boolean()
@dataclass
class GraphRagQuery:
query: str = ""
user: str = ""
collection: str = ""
entity_limit: int = 0
triple_limit: int = 0
max_subgraph_size: int = 0
max_path_length: int = 0
streaming: bool = False
class GraphRagResponse(Record):
error = Error()
response = String()
chunk = String()
end_of_stream = Boolean()
@dataclass
class GraphRagResponse:
error: Error | None = None
response: str = ""
end_of_stream: bool = False
############################################################################
# Document RAG text retrieval
class DocumentRagQuery(Record):
query = String()
user = String()
collection = String()
doc_limit = Integer()
streaming = Boolean()
class DocumentRagResponse(Record):
error = Error()
response = String()
chunk = String()
end_of_stream = Boolean()
@dataclass
class DocumentRagQuery:
query: str = ""
user: str = ""
collection: str = ""
doc_limit: int = 0
streaming: bool = False
@dataclass
class DocumentRagResponse:
error: Error | None = None
response: str = ""
end_of_stream: bool = False

View file

@ -1,4 +1,4 @@
from pulsar.schema import Record, String
from dataclasses import dataclass
from ..core.primitives import Error
from ..core.topic import topic
@ -7,15 +7,17 @@ from ..core.topic import topic
# Storage management operations
class StorageManagementRequest(Record):
@dataclass
class StorageManagementRequest:
"""Request for storage management operations sent to store processors"""
operation = String() # e.g., "delete-collection"
user = String()
collection = String()
operation: str = "" # e.g., "delete-collection"
user: str = ""
collection: str = ""
class StorageManagementResponse(Record):
@dataclass
class StorageManagementResponse:
"""Response from storage processors for management operations"""
error = Error() # Only populated if there's an error, if null success
error: Error | None = None # Only populated if there's an error, if null success
############################################################################
@ -23,20 +25,21 @@ class StorageManagementResponse(Record):
# Topics for sending collection management requests to different storage types
vector_storage_management_topic = topic(
'vector-storage-management', kind='non-persistent', namespace='request'
'vector-storage-management', qos='q0', namespace='request'
)
object_storage_management_topic = topic(
'object-storage-management', kind='non-persistent', namespace='request'
'object-storage-management', qos='q0', namespace='request'
)
triples_storage_management_topic = topic(
'triples-storage-management', kind='non-persistent', namespace='request'
'triples-storage-management', qos='q0', namespace='request'
)
# Topic for receiving responses from storage processors
storage_management_response_topic = topic(
'storage-management', kind='non-persistent', namespace='response'
'storage-management', qos='q0', namespace='response'
)
############################################################################

View file

@ -1,4 +1,4 @@
from pulsar.schema import Record, String, Map, Array
from dataclasses import dataclass, field
from ..core.primitives import Error
from ..core.topic import topic
@ -7,14 +7,17 @@ from ..core.topic import topic
# Structured Query Service - executes GraphQL queries
class StructuredQueryRequest(Record):
question = String()
user = String() # Cassandra keyspace identifier
collection = String() # Data collection identifier
@dataclass
class StructuredQueryRequest:
question: str = ""
user: str = "" # Cassandra keyspace identifier
collection: str = "" # Data collection identifier
class StructuredQueryResponse(Record):
error = Error()
data = String() # JSON-encoded GraphQL response data
errors = Array(String()) # GraphQL errors if any
@dataclass
class StructuredQueryResponse:
error: Error | None = None
data: str = "" # JSON-encoded GraphQL response data
errors: list[str] = field(default_factory=list) # GraphQL errors if any
############################################################################