mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-10 07:42:38 +02:00
Object import (#497)
* Object import dispatcher * Add object import gateway test
This commit is contained in:
parent
f1d08969ec
commit
257a7951a7
3 changed files with 490 additions and 0 deletions
|
|
@ -37,6 +37,7 @@ from . triples_import import TriplesImport
|
|||
from . graph_embeddings_import import GraphEmbeddingsImport
|
||||
from . document_embeddings_import import DocumentEmbeddingsImport
|
||||
from . entity_contexts_import import EntityContextsImport
|
||||
from . objects_import import ObjectsImport
|
||||
|
||||
from . core_export import CoreExport
|
||||
from . core_import import CoreImport
|
||||
|
|
@ -82,6 +83,7 @@ import_dispatchers = {
|
|||
"graph-embeddings": GraphEmbeddingsImport,
|
||||
"document-embeddings": DocumentEmbeddingsImport,
|
||||
"entity-contexts": EntityContextsImport,
|
||||
"objects": ObjectsImport,
|
||||
}
|
||||
|
||||
class DispatcherWrapper:
|
||||
|
|
|
|||
|
|
@ -0,0 +1,70 @@
|
|||
import asyncio
|
||||
import uuid
|
||||
import logging
|
||||
from aiohttp import WSMsgType
|
||||
|
||||
from ... schema import Metadata
|
||||
from ... schema import ExtractedObject
|
||||
from ... base import Publisher
|
||||
|
||||
from . serialize import to_subgraph
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class ObjectsImport:
|
||||
|
||||
def __init__(
|
||||
self, ws, running, pulsar_client, queue
|
||||
):
|
||||
|
||||
self.ws = ws
|
||||
self.running = running
|
||||
|
||||
self.publisher = Publisher(
|
||||
pulsar_client, topic = queue, schema = ExtractedObject
|
||||
)
|
||||
|
||||
async def start(self):
|
||||
await self.publisher.start()
|
||||
|
||||
async def destroy(self):
|
||||
# Step 1: Stop accepting new messages
|
||||
self.running.stop()
|
||||
|
||||
# Step 2: Wait for publisher to drain its queue
|
||||
logger.info("Draining publisher queue...")
|
||||
await self.publisher.stop()
|
||||
|
||||
# Step 3: Close websocket only after queue is drained
|
||||
if self.ws:
|
||||
await self.ws.close()
|
||||
|
||||
async def receive(self, msg):
|
||||
|
||||
data = msg.json()
|
||||
|
||||
elt = ExtractedObject(
|
||||
metadata=Metadata(
|
||||
id=data["metadata"]["id"],
|
||||
metadata=to_subgraph(data["metadata"].get("metadata", [])),
|
||||
user=data["metadata"]["user"],
|
||||
collection=data["metadata"]["collection"],
|
||||
),
|
||||
schema_name=data["schema_name"],
|
||||
values=data["values"],
|
||||
confidence=data.get("confidence", 1.0),
|
||||
source_span=data.get("source_span", ""),
|
||||
)
|
||||
|
||||
await self.publisher.send(None, elt)
|
||||
|
||||
async def run(self):
|
||||
|
||||
while self.running.get():
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
if self.ws:
|
||||
await self.ws.close()
|
||||
|
||||
self.ws = None
|
||||
Loading…
Add table
Add a link
Reference in a new issue