Schema structure refactor (#451)

* Write schema refactor spec

* Implemented schema refactor spec
This commit is contained in:
cybermaggedon 2025-08-04 21:42:57 +01:00 committed by GitHub
parent f4733021c5
commit 5de56c5dbc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
27 changed files with 370 additions and 223 deletions

View file

@ -0,0 +1,9 @@
from .llm import *
from .retrieval import *
from .query import *
from .agent import *
from .flow import *
from .prompt import *
from .config import *
from .library import *
from .lookup import *

View file

@ -0,0 +1,30 @@
from pulsar.schema import Record, String, Array, Map
from ..core.topic import topic
from ..core.primitives import Error
############################################################################
# Prompt services, abstract the prompt generation
class AgentStep(Record):
thought = String()
action = String()
arguments = Map(String())
observation = String()
class AgentRequest(Record):
question = String()
plan = String()
state = String()
history = Array(AgentStep())
class AgentResponse(Record):
answer = String()
error = Error()
thought = String()
observation = String()
############################################################################

View file

@ -0,0 +1,71 @@
from pulsar.schema import Record, Bytes, String, Boolean, Array, Map, Integer
from ..core.topic import topic
from ..core.primitives import Error
############################################################################
# Config service:
# get(keys) -> (version, values)
# list(type) -> (version, values)
# getvalues(type) -> (version, values)
# put(values) -> ()
# delete(keys) -> ()
# config() -> (version, config)
class ConfigKey(Record):
type = String()
key = String()
class ConfigValue(Record):
type = String()
key = String()
value = String()
# Prompt services, abstract the prompt generation
class ConfigRequest(Record):
operation = String() # get, list, getvalues, delete, put, config
# get, delete
keys = Array(ConfigKey())
# list, getvalues
type = String()
# put
values = Array(ConfigValue())
class ConfigResponse(Record):
# get, list, getvalues, config
version = Integer()
# get, getvalues
values = Array(ConfigValue())
# list
directory = Array(String())
# config
config = Map(Map(String()))
# Everything
error = Error()
class ConfigPush(Record):
version = Integer()
config = Map(Map(String()))
config_request_queue = topic(
'config', kind='non-persistent', namespace='request'
)
config_response_queue = topic(
'config', kind='non-persistent', namespace='response'
)
config_push_queue = topic(
'config', kind='persistent', namespace='config'
)
############################################################################

View file

@ -0,0 +1,66 @@
from pulsar.schema import Record, Bytes, String, Boolean, Array, Map, Integer
from ..core.topic import topic
from ..core.primitives import Error
############################################################################
# Flow service:
# list_classes() -> (classname[])
# get_class(classname) -> (class)
# put_class(class) -> (class)
# delete_class(classname) -> ()
#
# list_flows() -> (flowid[])
# get_flow(flowid) -> (flow)
# start_flow(flowid, classname) -> ()
# stop_flow(flowid) -> ()
# Prompt services, abstract the prompt generation
class FlowRequest(Record):
operation = String() # list-classes, get-class, put-class, delete-class
# list-flows, get-flow, start-flow, stop-flow
# get_class, put_class, delete_class, start_flow
class_name = String()
# put_class
class_definition = String()
# start_flow
description = String()
# get_flow, start_flow, stop_flow
flow_id = String()
class FlowResponse(Record):
# list_classes
class_names = Array(String())
# list_flows
flow_ids = Array(String())
# get_class
class_definition = String()
# get_flow
flow = String()
# get_flow
description = String()
# Everything
error = Error()
flow_request_queue = topic(
'flow', kind='non-persistent', namespace='request'
)
flow_response_queue = topic(
'flow', kind='non-persistent', namespace='response'
)
############################################################################

View file

@ -0,0 +1,125 @@
from pulsar.schema import Record, Bytes, String, Array, Long
from ..core.primitives import Triple, Error
from ..core.topic import topic
from ..core.metadata import Metadata
from ..knowledge.document import Document, TextDocument
# add-document
# -> (document_id, document_metadata, content)
# <- ()
# <- (error)
# remove-document
# -> (document_id)
# <- ()
# <- (error)
# update-document
# -> (document_id, document_metadata)
# <- ()
# <- (error)
# get-document-metadata
# -> (document_id)
# <- (document_metadata)
# <- (error)
# get-document-content
# -> (document_id)
# <- (content)
# <- (error)
# add-processing
# -> (processing_id, processing_metadata)
# <- ()
# <- (error)
# remove-processing
# -> (processing_id)
# <- ()
# <- (error)
# list-documents
# -> (user, collection?)
# <- (document_metadata[])
# <- (error)
# list-processing
# -> (user, collection?)
# <- (processing_metadata[])
# <- (error)
class DocumentMetadata(Record):
id = String()
time = Long()
kind = String()
title = String()
comments = String()
metadata = Array(Triple())
user = String()
tags = Array(String())
class ProcessingMetadata(Record):
id = String()
document_id = String()
time = Long()
flow = String()
user = String()
collection = String()
tags = Array(String())
class Criteria(Record):
key = String()
value = String()
operator = String()
class LibrarianRequest(Record):
# add-document, remove-document, update-document, get-document-metadata,
# get-document-content, add-processing, remove-processing, list-documents,
# list-processing
operation = String()
# add-document, remove-document, update-document, get-document-metadata,
# get-document-content
document_id = String()
# add-processing, remove-processing
processing_id = String()
# add-document, update-document
document_metadata = DocumentMetadata()
# add-processing
processing_metadata = ProcessingMetadata()
# add-document
content = Bytes()
# list-documents, list-processing
user = String()
# list-documents?, list-processing?
collection = String()
#
criteria = Array(Criteria())
class LibrarianResponse(Record):
error = Error()
document_metadata = DocumentMetadata()
content = Bytes()
document_metadatas = Array(DocumentMetadata())
processing_metadatas = Array(ProcessingMetadata())
# FIXME: Is this right? Using persistence on librarian so that
# message chunking works
librarian_request_queue = topic(
'librarian', kind='persistent', namespace='request'
)
librarian_response_queue = topic(
'librarian', kind='persistent', namespace='response',
)

View file

@ -0,0 +1,51 @@
from pulsar.schema import Record, String, Array, Double, Integer
from ..core.topic import topic
from ..core.primitives import Error
############################################################################
# LLM text completion
class TextCompletionRequest(Record):
system = String()
prompt = String()
class TextCompletionResponse(Record):
error = Error()
response = String()
in_token = Integer()
out_token = Integer()
model = String()
############################################################################
# Embeddings
class EmbeddingsRequest(Record):
text = String()
class EmbeddingsResponse(Record):
error = Error()
vectors = Array(Array(Double()))
############################################################################
# Tool request/response
class ToolRequest(Record):
name = String()
# Parameters are JSON encoded
parameters = String()
class ToolResponse(Record):
error = Error()
# Plain text aka "unstructured"
text = String()
# JSON-encoded object aka "structured"
object = String()

View file

@ -0,0 +1,21 @@
from pulsar.schema import Record, String
from ..core.primitives import Error, Value, Triple
from ..core.topic import topic
from ..core.metadata import Metadata
############################################################################
# Lookups
class LookupRequest(Record):
kind = String()
term = String()
class LookupResponse(Record):
text = String()
error = Error()
############################################################################

View file

@ -0,0 +1,38 @@
from pulsar.schema import Record, String, Map
from ..core.primitives import Error
from ..core.topic import topic
############################################################################
# Prompt services, abstract the prompt generation
# extract-definitions:
# chunk -> definitions
# extract-relationships:
# chunk -> relationships
# kg-prompt:
# query, triples -> answer
# document-prompt:
# query, documents -> answer
# extract-rows
# schema, chunk -> rows
class PromptRequest(Record):
id = String()
# JSON encoded values
terms = Map(String())
class PromptResponse(Record):
# Error case
error = Error()
# Just plain text
text = String()
# JSON encoded
object = String()
############################################################################

View file

@ -0,0 +1,48 @@
from pulsar.schema import Record, String, Integer, Array, Double
from ..core.primitives import Error, Value, Triple
from ..core.topic import topic
############################################################################
# Graph embeddings query
class GraphEmbeddingsRequest(Record):
vectors = Array(Array(Double()))
limit = Integer()
user = String()
collection = String()
class GraphEmbeddingsResponse(Record):
error = Error()
entities = Array(Value())
############################################################################
# Graph triples query
class TriplesQueryRequest(Record):
user = String()
collection = String()
s = Value()
p = Value()
o = Value()
limit = Integer()
class TriplesQueryResponse(Record):
error = Error()
triples = Array(Triple())
############################################################################
# Doc embeddings query
class DocumentEmbeddingsRequest(Record):
vectors = Array(Array(Double()))
limit = Integer()
user = String()
collection = String()
class DocumentEmbeddingsResponse(Record):
error = Error()
chunks = Array(String())

View file

@ -0,0 +1,36 @@
from pulsar.schema import Record, Bytes, String, Boolean, Integer, Array, Double
from ..core.topic import topic
from ..core.primitives import Error, Value
############################################################################
# Graph RAG text retrieval
class GraphRagQuery(Record):
query = String()
user = String()
collection = String()
entity_limit = Integer()
triple_limit = Integer()
max_subgraph_size = Integer()
max_path_length = Integer()
class GraphRagResponse(Record):
error = Error()
response = String()
############################################################################
# Document RAG text retrieval
class DocumentRagQuery(Record):
query = String()
user = String()
collection = String()
doc_limit = Integer()
class DocumentRagResponse(Record):
error = Error()
response = String()