mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-29 02:23:44 +02:00
Feature/flow librarian (#361)
* Update librarian to new API * Implementing new schema with document + processing objects
This commit is contained in:
parent
6bf485788a
commit
ff28d26f4d
21 changed files with 1323 additions and 428 deletions
|
|
@ -1,11 +1,15 @@
|
|||
|
||||
import base64
|
||||
|
||||
from ... schema import LibrarianRequest, LibrarianResponse
|
||||
from ... schema import librarian_request_queue
|
||||
from ... schema import librarian_response_queue
|
||||
|
||||
from . requestor import ServiceRequestor
|
||||
from . serialize import serialize_document_package, serialize_document_info
|
||||
from . serialize import to_document_package, to_document_info, to_criteria
|
||||
from . serialize import serialize_document_metadata
|
||||
from . serialize import serialize_processing_metadata
|
||||
from . serialize import to_document_metadata, to_processing_metadata
|
||||
from . serialize import to_criteria
|
||||
|
||||
class LibrarianRequestor(ServiceRequestor):
|
||||
def __init__(self, pulsar_client, consumer, subscriber, timeout=120):
|
||||
|
|
@ -23,20 +27,37 @@ class LibrarianRequestor(ServiceRequestor):
|
|||
|
||||
def to_request(self, body):
|
||||
|
||||
if "document" in body:
|
||||
dp = to_document_package(body["document"])
|
||||
# Content gets base64 decoded & encoded again. It at least makes
|
||||
# sure payload is valid base64.
|
||||
|
||||
if "document-metadata" in body:
|
||||
dm = to_document_metadata(body["document-metadata"])
|
||||
else:
|
||||
dp = None
|
||||
dm = None
|
||||
|
||||
if "processing-metadata" in body:
|
||||
pm = to_processing_metadata(body["processing-metadata"])
|
||||
else:
|
||||
pm = None
|
||||
|
||||
if "criteria" in body:
|
||||
criteria = to_criteria(body["criteria"])
|
||||
else:
|
||||
criteria = None
|
||||
|
||||
if "content" in body:
|
||||
content = base64.b64decode(body["content"].encode("utf-8"))
|
||||
content = base64.b64encode(content).decode("utf-8")
|
||||
else:
|
||||
content = None
|
||||
|
||||
return LibrarianRequest(
|
||||
operation = body.get("operation", None),
|
||||
id = body.get("id", None),
|
||||
document = dp,
|
||||
document_id = body.get("document-id", None),
|
||||
processing_id = body.get("processing-id", None),
|
||||
document_metadata = dm,
|
||||
processing_metadata = pm,
|
||||
content = content,
|
||||
user = body.get("user", None),
|
||||
collection = body.get("collection", None),
|
||||
criteria = criteria,
|
||||
|
|
@ -44,15 +65,28 @@ class LibrarianRequestor(ServiceRequestor):
|
|||
|
||||
def from_response(self, message):
|
||||
|
||||
print(message)
|
||||
|
||||
response = {}
|
||||
|
||||
if message.document:
|
||||
response["document"] = serialize_document_package(message.document)
|
||||
if message.document_metadata:
|
||||
response["document-metadata"] = serialize_document_metadata(
|
||||
message.document_metadata
|
||||
)
|
||||
|
||||
if message.info:
|
||||
response["info"] = [
|
||||
serialize_document_info(v)
|
||||
for v in message.info
|
||||
if message.content:
|
||||
response["content"] = message.content.decode("utf-8")
|
||||
|
||||
if message.document_metadatas != None:
|
||||
response["document-metadatas"] = [
|
||||
serialize_document_metadata(v)
|
||||
for v in message.document_metadatas
|
||||
]
|
||||
|
||||
if message.processing_metadatas != None:
|
||||
response["processing-metadatas"] = [
|
||||
serialize_processing_metadata(v)
|
||||
for v in message.processing_metadatas
|
||||
]
|
||||
|
||||
return response, True
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
|
||||
import base64
|
||||
|
||||
from ... schema import Value, Triple, DocumentPackage, DocumentInfo
|
||||
from ... schema import Value, Triple, DocumentMetadata, ProcessingMetadata
|
||||
|
||||
def to_value(x):
|
||||
return Value(value=x["v"], is_uri=x["e"])
|
||||
|
|
@ -80,88 +80,86 @@ def serialize_document_embeddings(message):
|
|||
],
|
||||
}
|
||||
|
||||
def serialize_document_package(message):
|
||||
def serialize_document_metadata(message):
|
||||
|
||||
ret = {}
|
||||
|
||||
if message.id:
|
||||
ret["id"] = message.id
|
||||
|
||||
if message.metadata:
|
||||
ret["metadata"] = serialize_subgraph(message.metdata)
|
||||
|
||||
if message.document:
|
||||
blob = base64.b64encode(
|
||||
message.document.encode("utf-8")
|
||||
).decode("utf-8")
|
||||
ret["document"] = blob
|
||||
if message.time:
|
||||
ret["time"] = message.time
|
||||
|
||||
if message.kind:
|
||||
ret["kind"] = message.kind
|
||||
|
||||
if message.user:
|
||||
ret["user"] = message.user
|
||||
|
||||
if message.collection:
|
||||
ret["collection"] = message.collection
|
||||
|
||||
return ret
|
||||
|
||||
def serialize_document_info(message):
|
||||
|
||||
ret = {}
|
||||
|
||||
if message.id:
|
||||
ret["id"] = message.id
|
||||
|
||||
if message.kind:
|
||||
ret["kind"] = message.kind
|
||||
|
||||
if message.user:
|
||||
ret["user"] = message.user
|
||||
|
||||
if message.collection:
|
||||
ret["collection"] = message.collection
|
||||
|
||||
if message.title:
|
||||
ret["title"] = message.title
|
||||
|
||||
if message.comments:
|
||||
ret["comments"] = message.comments
|
||||
|
||||
if message.time:
|
||||
ret["time"] = message.time
|
||||
|
||||
if message.metadata:
|
||||
ret["metadata"] = serialize_subgraph(message.metadata)
|
||||
|
||||
if message.user:
|
||||
ret["user"] = message.user
|
||||
|
||||
if message.tags:
|
||||
ret["tags"] = message.tags
|
||||
|
||||
return ret
|
||||
|
||||
def to_document_package(x):
|
||||
def serialize_processing_metadata(message):
|
||||
|
||||
return DocumentPackage(
|
||||
ret = {}
|
||||
|
||||
if message.id:
|
||||
ret["id"] = message.id
|
||||
|
||||
if message.id:
|
||||
ret["document-id"] = message.document_id
|
||||
|
||||
if message.time:
|
||||
ret["time"] = message.time
|
||||
|
||||
if message.flow:
|
||||
ret["flow"] = message.flow
|
||||
|
||||
if message.user:
|
||||
ret["user"] = message.user
|
||||
|
||||
if message.collection:
|
||||
ret["collection"] = message.collection
|
||||
|
||||
if message.tags:
|
||||
ret["tags"] = message.tags
|
||||
|
||||
return ret
|
||||
|
||||
def to_document_metadata(x):
|
||||
|
||||
return DocumentMetadata(
|
||||
id = x.get("id", None),
|
||||
time = x.get("time", None),
|
||||
kind = x.get("kind", None),
|
||||
user = x.get("user", None),
|
||||
collection = x.get("collection", None),
|
||||
title = x.get("title", None),
|
||||
comments = x.get("comments", None),
|
||||
time = x.get("time", None),
|
||||
document = x.get("document", None),
|
||||
metadata = to_subgraph(x["metadata"]),
|
||||
user = x.get("user", None),
|
||||
tags = x.get("tags", None),
|
||||
)
|
||||
|
||||
def to_document_info(x):
|
||||
def to_processing_metadata(x):
|
||||
|
||||
return DocumentInfo(
|
||||
return ProcessingMetadata(
|
||||
id = x.get("id", None),
|
||||
kind = x.get("kind", None),
|
||||
document_id = x.get("document-id", None),
|
||||
time = x.get("time", None),
|
||||
flow = x.get("flow", None),
|
||||
user = x.get("user", None),
|
||||
collection = x.get("collection", None),
|
||||
title = x.get("title", None),
|
||||
comments = x.get("comments", None),
|
||||
time = x.get("time", None),
|
||||
metadata = to_subgraph(x["metadata"]),
|
||||
tags = x.get("tags", None),
|
||||
)
|
||||
|
||||
def to_criteria(x):
|
||||
|
|
@ -169,3 +167,4 @@ def to_criteria(x):
|
|||
Critera(v["key"], v["value"], v["operator"])
|
||||
for v in x
|
||||
]
|
||||
|
||||
|
|
|
|||
|
|
@ -95,7 +95,6 @@ class Api:
|
|||
|
||||
await self.config_receiver.start()
|
||||
|
||||
|
||||
for ep in self.endpoints:
|
||||
ep.add_routes(self.app)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue