trustgraph/trustgraph-base/trustgraph/schema/services/library.py
Cyber MacGeddon db05427d0e IAM tech spec: Auth and access management current state and proposed
changes.

Support for separate workspaces

Addition of workspace CLI support for test purposes
2026-04-21 10:50:52 +01:00

227 lines
6 KiB
Python

from dataclasses import dataclass, field
from ..core.primitives import Triple, Error
from ..core.topic import queue
from ..core.metadata import Metadata
# Note: Document imports will be updated after knowledge schemas are converted
# add-document
# -> (document_id, document_metadata, content)
# <- ()
# <- (error)
# remove-document
# -> (document_id)
# <- ()
# <- (error)
# update-document
# -> (document_id, document_metadata)
# <- ()
# <- (error)
# get-document-metadata
# -> (document_id)
# <- (document_metadata)
# <- (error)
# get-document-content [DEPRECATED — use stream-document instead]
# -> (document_id)
# <- (content)
# <- (error)
# NOTE: Returns entire document in a single message. Fails for documents
# exceeding the broker's max message size. Use stream-document which
# returns content in chunks.
# add-processing
# -> (processing_id, processing_metadata)
# <- ()
# <- (error)
# remove-processing
# -> (processing_id)
# <- ()
# <- (error)
# list-documents
# -> (workspace, collection?)
# <- (document_metadata[])
# <- (error)
# list-processing
# -> (workspace, collection?)
# <- (processing_metadata[])
# <- (error)
# begin-upload
# -> (document_metadata, total_size, chunk_size)
# <- (upload_id, chunk_size, total_chunks)
# <- (error)
# upload-chunk
# -> (upload_id, chunk_index, content)
# <- (upload_id, chunk_index, chunks_received, total_chunks, bytes_received, total_bytes)
# <- (error)
# complete-upload
# -> (upload_id)
# <- (document_id, object_id)
# <- (error)
# abort-upload
# -> (upload_id)
# <- ()
# <- (error)
# get-upload-status
# -> (upload_id)
# <- (upload_id, state, chunks_received, missing_chunks, total_chunks, bytes_received, total_bytes)
# <- (error)
# list-uploads
# -> (workspace)
# <- (uploads[])
# <- (error)
@dataclass
class DocumentMetadata:
id: str = ""
time: int = 0
kind: str = ""
title: str = ""
comments: str = ""
metadata: list[Triple] = field(default_factory=list)
workspace: str = ""
tags: list[str] = field(default_factory=list)
# Child document support
parent_id: str = "" # Empty for top-level docs, set for children
# Document type vocabulary:
# "source" - original uploaded document
# "page" - page extracted from source (e.g., PDF page)
# "chunk" - text chunk derived from page or source
# "extracted" - legacy value, kept for backwards compatibility
document_type: str = "source"
@dataclass
class ProcessingMetadata:
id: str = ""
document_id: str = ""
time: int = 0
flow: str = ""
workspace: str = ""
collection: str = ""
tags: list[str] = field(default_factory=list)
@dataclass
class Criteria:
key: str = ""
value: str = ""
operator: str = ""
@dataclass
class UploadProgress:
"""Progress information for chunked uploads."""
upload_id: str = ""
chunks_received: int = 0
total_chunks: int = 0
bytes_received: int = 0
total_bytes: int = 0
@dataclass
class UploadSession:
"""Information about an in-progress upload."""
upload_id: str = ""
document_id: str = ""
document_metadata_json: str = "" # JSON-encoded DocumentMetadata
total_size: int = 0
chunk_size: int = 0
total_chunks: int = 0
chunks_received: int = 0
created_at: str = ""
@dataclass
class LibrarianRequest:
# add-document, remove-document, update-document, get-document-metadata,
# get-document-content, add-processing, remove-processing, list-documents,
# list-processing, begin-upload, upload-chunk, complete-upload, abort-upload,
# get-upload-status, list-uploads
operation: str = ""
# add-document, remove-document, update-document, get-document-metadata,
# get-document-content
document_id: str = ""
# add-processing, remove-processing
processing_id: str = ""
# add-document, update-document, begin-upload
document_metadata: DocumentMetadata | None = None
# add-processing
processing_metadata: ProcessingMetadata | None = None
# add-document, upload-chunk
content: bytes = b""
# Workspace scopes every library operation.
workspace: str = ""
# list-documents?, list-processing?
collection: str = ""
#
criteria: list[Criteria] = field(default_factory=list)
# begin-upload
total_size: int = 0
chunk_size: int = 0
# upload-chunk, complete-upload, abort-upload, get-upload-status
upload_id: str = ""
# upload-chunk, stream-document
chunk_index: int = 0
# list-documents - whether to include child documents (default False)
include_children: bool = False
@dataclass
class LibrarianResponse:
error: Error | None = None
document_metadata: DocumentMetadata | None = None
content: bytes = b""
document_metadatas: list[DocumentMetadata] = field(default_factory=list)
processing_metadatas: list[ProcessingMetadata] = field(default_factory=list)
# begin-upload response
upload_id: str = ""
chunk_size: int = 0
total_chunks: int = 0
# upload-chunk response
chunk_index: int = 0
chunks_received: int = 0
bytes_received: int = 0
total_bytes: int = 0
# complete-upload response
document_id: str = ""
object_id: str = ""
# get-upload-status response
upload_state: str = "" # "in-progress", "completed", "expired"
received_chunks: list[int] = field(default_factory=list)
missing_chunks: list[int] = field(default_factory=list)
# list-uploads response
upload_sessions: list[UploadSession] = field(default_factory=list)
# Protocol flag: True if this is the final response for a request.
# Default True since most operations are single request/response.
# Only stream-document sets False for intermediate chunks.
is_final: bool = True
# FIXME: Is this right? Using persistence on librarian so that
# message chunking works
librarian_request_queue = queue('librarian', cls='request')
librarian_response_queue = queue('librarian', cls='response')