Feature/flow librarian (#361)

* Update librarian to new API

* Implementing new schema with document + processing objects
This commit is contained in:
cybermaggedon 2025-05-04 22:26:19 +01:00 committed by GitHub
parent 6bf485788a
commit ff28d26f4d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 1323 additions and 428 deletions

View file

@ -1,11 +1,15 @@
import base64
from ... schema import LibrarianRequest, LibrarianResponse
from ... schema import librarian_request_queue
from ... schema import librarian_response_queue
from . requestor import ServiceRequestor
from . serialize import serialize_document_package, serialize_document_info
from . serialize import to_document_package, to_document_info, to_criteria
from . serialize import serialize_document_metadata
from . serialize import serialize_processing_metadata
from . serialize import to_document_metadata, to_processing_metadata
from . serialize import to_criteria
class LibrarianRequestor(ServiceRequestor):
def __init__(self, pulsar_client, consumer, subscriber, timeout=120):
@ -23,20 +27,37 @@ class LibrarianRequestor(ServiceRequestor):
def to_request(self, body):
if "document" in body:
dp = to_document_package(body["document"])
# Content gets base64 decoded & encoded again. It at least makes
# sure payload is valid base64.
if "document-metadata" in body:
dm = to_document_metadata(body["document-metadata"])
else:
dp = None
dm = None
if "processing-metadata" in body:
pm = to_processing_metadata(body["processing-metadata"])
else:
pm = None
if "criteria" in body:
criteria = to_criteria(body["criteria"])
else:
criteria = None
if "content" in body:
content = base64.b64decode(body["content"].encode("utf-8"))
content = base64.b64encode(content).decode("utf-8")
else:
content = None
return LibrarianRequest(
operation = body.get("operation", None),
id = body.get("id", None),
document = dp,
document_id = body.get("document-id", None),
processing_id = body.get("processing-id", None),
document_metadata = dm,
processing_metadata = pm,
content = content,
user = body.get("user", None),
collection = body.get("collection", None),
criteria = criteria,
@ -44,15 +65,28 @@ class LibrarianRequestor(ServiceRequestor):
def from_response(self, message):
print(message)
response = {}
if message.document:
response["document"] = serialize_document_package(message.document)
if message.document_metadata:
response["document-metadata"] = serialize_document_metadata(
message.document_metadata
)
if message.info:
response["info"] = [
serialize_document_info(v)
for v in message.info
if message.content:
response["content"] = message.content.decode("utf-8")
if message.document_metadatas != None:
response["document-metadatas"] = [
serialize_document_metadata(v)
for v in message.document_metadatas
]
if message.processing_metadatas != None:
response["processing-metadatas"] = [
serialize_processing_metadata(v)
for v in message.processing_metadatas
]
return response, True

View file

@ -1,7 +1,7 @@
import base64
from ... schema import Value, Triple, DocumentPackage, DocumentInfo
from ... schema import Value, Triple, DocumentMetadata, ProcessingMetadata
def to_value(x):
return Value(value=x["v"], is_uri=x["e"])
@ -80,88 +80,86 @@ def serialize_document_embeddings(message):
],
}
def serialize_document_package(message):
def serialize_document_metadata(message):
ret = {}
if message.id:
ret["id"] = message.id
if message.metadata:
ret["metadata"] = serialize_subgraph(message.metdata)
if message.document:
blob = base64.b64encode(
message.document.encode("utf-8")
).decode("utf-8")
ret["document"] = blob
if message.time:
ret["time"] = message.time
if message.kind:
ret["kind"] = message.kind
if message.user:
ret["user"] = message.user
if message.collection:
ret["collection"] = message.collection
return ret
def serialize_document_info(message):
ret = {}
if message.id:
ret["id"] = message.id
if message.kind:
ret["kind"] = message.kind
if message.user:
ret["user"] = message.user
if message.collection:
ret["collection"] = message.collection
if message.title:
ret["title"] = message.title
if message.comments:
ret["comments"] = message.comments
if message.time:
ret["time"] = message.time
if message.metadata:
ret["metadata"] = serialize_subgraph(message.metadata)
if message.user:
ret["user"] = message.user
if message.tags:
ret["tags"] = message.tags
return ret
def to_document_package(x):
def serialize_processing_metadata(message):
return DocumentPackage(
ret = {}
if message.id:
ret["id"] = message.id
if message.id:
ret["document-id"] = message.document_id
if message.time:
ret["time"] = message.time
if message.flow:
ret["flow"] = message.flow
if message.user:
ret["user"] = message.user
if message.collection:
ret["collection"] = message.collection
if message.tags:
ret["tags"] = message.tags
return ret
def to_document_metadata(x):
return DocumentMetadata(
id = x.get("id", None),
time = x.get("time", None),
kind = x.get("kind", None),
user = x.get("user", None),
collection = x.get("collection", None),
title = x.get("title", None),
comments = x.get("comments", None),
time = x.get("time", None),
document = x.get("document", None),
metadata = to_subgraph(x["metadata"]),
user = x.get("user", None),
tags = x.get("tags", None),
)
def to_document_info(x):
def to_processing_metadata(x):
return DocumentInfo(
return ProcessingMetadata(
id = x.get("id", None),
kind = x.get("kind", None),
document_id = x.get("document-id", None),
time = x.get("time", None),
flow = x.get("flow", None),
user = x.get("user", None),
collection = x.get("collection", None),
title = x.get("title", None),
comments = x.get("comments", None),
time = x.get("time", None),
metadata = to_subgraph(x["metadata"]),
tags = x.get("tags", None),
)
def to_criteria(x):
@ -169,3 +167,4 @@ def to_criteria(x):
Critera(v["key"], v["value"], v["operator"])
for v in x
]

View file

@ -95,7 +95,6 @@ class Api:
await self.config_receiver.start()
for ep in self.endpoints:
ep.add_routes(self.app)