mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-07 06:12:38 +02:00
Feature/pkgsplit (#83)
* Starting to spawn base package * More package hacking * Bedrock and VertexAI * Parquet split * Updated templates * Utils
This commit is contained in:
parent
3fb75c617b
commit
9b91d5eee3
262 changed files with 630 additions and 420 deletions
0
trustgraph-flow/trustgraph/storage/__init__.py
Normal file
0
trustgraph-flow/trustgraph/storage/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
|
||||
from . write import *
|
||||
|
||||
7
trustgraph-flow/trustgraph/storage/doc_embeddings/milvus/__main__.py
Executable file
7
trustgraph-flow/trustgraph/storage/doc_embeddings/milvus/__main__.py
Executable file
|
|
@ -0,0 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from . write import run
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
||||
|
||||
63
trustgraph-flow/trustgraph/storage/doc_embeddings/milvus/write.py
Executable file
63
trustgraph-flow/trustgraph/storage/doc_embeddings/milvus/write.py
Executable file
|
|
@ -0,0 +1,63 @@
|
|||
|
||||
"""
|
||||
Accepts entity/vector pairs and writes them to a Milvus store.
|
||||
"""
|
||||
|
||||
from .... schema import ChunkEmbeddings
|
||||
from .... schema import chunk_embeddings_ingest_queue
|
||||
from .... log_level import LogLevel
|
||||
from .... direct.milvus_doc_embeddings import DocVectors
|
||||
from .... base import Consumer
|
||||
|
||||
module = ".".join(__name__.split(".")[1:-1])
|
||||
|
||||
default_input_queue = chunk_embeddings_ingest_queue
|
||||
default_subscriber = module
|
||||
default_store_uri = 'http://localhost:19530'
|
||||
|
||||
class Processor(Consumer):
|
||||
|
||||
def __init__(self, **params):
|
||||
|
||||
input_queue = params.get("input_queue", default_input_queue)
|
||||
subscriber = params.get("subscriber", default_subscriber)
|
||||
store_uri = params.get("store_uri", default_store_uri)
|
||||
|
||||
super(Processor, self).__init__(
|
||||
**params | {
|
||||
"input_queue": input_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": ChunkEmbeddings,
|
||||
"store_uri": store_uri,
|
||||
}
|
||||
)
|
||||
|
||||
self.vecstore = DocVectors(store_uri)
|
||||
|
||||
def handle(self, msg):
|
||||
|
||||
v = msg.value()
|
||||
|
||||
chunk = v.chunk.decode("utf-8")
|
||||
|
||||
if v.chunk != "" and v.chunk is not None:
|
||||
for vec in v.vectors:
|
||||
self.vecstore.insert(vec, chunk)
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
||||
Consumer.add_args(
|
||||
parser, default_input_queue, default_subscriber,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-t', '--store-uri',
|
||||
default=default_store_uri,
|
||||
help=f'Milvus store URI (default: {default_store_uri})'
|
||||
)
|
||||
|
||||
def run():
|
||||
|
||||
Processor.start(module, __doc__)
|
||||
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
|
||||
from . write import *
|
||||
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from . write import run
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
||||
|
||||
|
|
@ -0,0 +1,104 @@
|
|||
|
||||
"""
|
||||
Accepts entity/vector pairs and writes them to a Qdrant store.
|
||||
"""
|
||||
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.models import PointStruct
|
||||
from qdrant_client.models import Distance, VectorParams
|
||||
import uuid
|
||||
|
||||
from .... schema import ChunkEmbeddings
|
||||
from .... schema import chunk_embeddings_ingest_queue
|
||||
from .... log_level import LogLevel
|
||||
from .... base import Consumer
|
||||
|
||||
module = ".".join(__name__.split(".")[1:-1])
|
||||
|
||||
default_input_queue = chunk_embeddings_ingest_queue
|
||||
default_subscriber = module
|
||||
default_store_uri = 'http://localhost:6333'
|
||||
|
||||
class Processor(Consumer):
|
||||
|
||||
def __init__(self, **params):
|
||||
|
||||
input_queue = params.get("input_queue", default_input_queue)
|
||||
subscriber = params.get("subscriber", default_subscriber)
|
||||
store_uri = params.get("store_uri", default_store_uri)
|
||||
|
||||
super(Processor, self).__init__(
|
||||
**params | {
|
||||
"input_queue": input_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": ChunkEmbeddings,
|
||||
"store_uri": store_uri,
|
||||
}
|
||||
)
|
||||
|
||||
self.last_collection = None
|
||||
self.last_dim = None
|
||||
|
||||
self.client = QdrantClient(url=store_uri)
|
||||
|
||||
def handle(self, msg):
|
||||
|
||||
v = msg.value()
|
||||
|
||||
chunk = v.chunk.decode("utf-8")
|
||||
|
||||
if chunk == "": return
|
||||
|
||||
for vec in v.vectors:
|
||||
|
||||
dim = len(vec)
|
||||
collection = "doc_" + str(dim)
|
||||
|
||||
if dim != self.last_dim:
|
||||
|
||||
if not self.client.collection_exists(collection):
|
||||
|
||||
try:
|
||||
self.client.create_collection(
|
||||
collection_name=collection,
|
||||
vectors_config=VectorParams(
|
||||
size=dim, distance=Distance.DOT
|
||||
),
|
||||
)
|
||||
except Exception as e:
|
||||
print("Qdrant collection creation failed")
|
||||
raise e
|
||||
|
||||
self.last_collection = collection
|
||||
self.last_dim = dim
|
||||
|
||||
self.client.upsert(
|
||||
collection_name=collection,
|
||||
points=[
|
||||
PointStruct(
|
||||
id=str(uuid.uuid4()),
|
||||
vector=vec,
|
||||
payload={
|
||||
"doc": chunk,
|
||||
}
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
||||
Consumer.add_args(
|
||||
parser, default_input_queue, default_subscriber,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-t', '--store-uri',
|
||||
default=default_store_uri,
|
||||
help=f'Qdrant store URI (default: {default_store_uri})'
|
||||
)
|
||||
|
||||
def run():
|
||||
|
||||
Processor.start(module, __doc__)
|
||||
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
|
||||
from . write import *
|
||||
|
||||
7
trustgraph-flow/trustgraph/storage/graph_embeddings/milvus/__main__.py
Executable file
7
trustgraph-flow/trustgraph/storage/graph_embeddings/milvus/__main__.py
Executable file
|
|
@ -0,0 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from . write import run
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
||||
|
||||
61
trustgraph-flow/trustgraph/storage/graph_embeddings/milvus/write.py
Executable file
61
trustgraph-flow/trustgraph/storage/graph_embeddings/milvus/write.py
Executable file
|
|
@ -0,0 +1,61 @@
|
|||
|
||||
"""
|
||||
Accepts entity/vector pairs and writes them to a Milvus store.
|
||||
"""
|
||||
|
||||
from .... schema import GraphEmbeddings
|
||||
from .... schema import graph_embeddings_store_queue
|
||||
from .... log_level import LogLevel
|
||||
from .... direct.milvus_graph_embeddings import EntityVectors
|
||||
from .... base import Consumer
|
||||
|
||||
module = ".".join(__name__.split(".")[1:-1])
|
||||
|
||||
default_input_queue = graph_embeddings_store_queue
|
||||
default_subscriber = module
|
||||
default_store_uri = 'http://localhost:19530'
|
||||
|
||||
class Processor(Consumer):
|
||||
|
||||
def __init__(self, **params):
|
||||
|
||||
input_queue = params.get("input_queue", default_input_queue)
|
||||
subscriber = params.get("subscriber", default_subscriber)
|
||||
store_uri = params.get("store_uri", default_store_uri)
|
||||
|
||||
super(Processor, self).__init__(
|
||||
**params | {
|
||||
"input_queue": input_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": GraphEmbeddings,
|
||||
"store_uri": store_uri,
|
||||
}
|
||||
)
|
||||
|
||||
self.vecstore = EntityVectors(store_uri)
|
||||
|
||||
def handle(self, msg):
|
||||
|
||||
v = msg.value()
|
||||
|
||||
if v.entity.value != "":
|
||||
for vec in v.vectors:
|
||||
self.vecstore.insert(vec, v.entity.value)
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
||||
Consumer.add_args(
|
||||
parser, default_input_queue, default_subscriber,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-t', '--store-uri',
|
||||
default=default_store_uri,
|
||||
help=f'Milvus store URI (default: {default_store_uri})'
|
||||
)
|
||||
|
||||
def run():
|
||||
|
||||
Processor.start(module, __doc__)
|
||||
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
|
||||
from . write import *
|
||||
|
||||
7
trustgraph-flow/trustgraph/storage/graph_embeddings/qdrant/__main__.py
Executable file
7
trustgraph-flow/trustgraph/storage/graph_embeddings/qdrant/__main__.py
Executable file
|
|
@ -0,0 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from . write import run
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
||||
|
||||
102
trustgraph-flow/trustgraph/storage/graph_embeddings/qdrant/write.py
Executable file
102
trustgraph-flow/trustgraph/storage/graph_embeddings/qdrant/write.py
Executable file
|
|
@ -0,0 +1,102 @@
|
|||
|
||||
"""
|
||||
Accepts entity/vector pairs and writes them to a Qdrant store.
|
||||
"""
|
||||
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.models import PointStruct
|
||||
from qdrant_client.models import Distance, VectorParams
|
||||
import uuid
|
||||
|
||||
from .... schema import GraphEmbeddings
|
||||
from .... schema import graph_embeddings_store_queue
|
||||
from .... log_level import LogLevel
|
||||
from .... base import Consumer
|
||||
|
||||
module = ".".join(__name__.split(".")[1:-1])
|
||||
|
||||
default_input_queue = graph_embeddings_store_queue
|
||||
default_subscriber = module
|
||||
default_store_uri = 'http://localhost:6333'
|
||||
|
||||
class Processor(Consumer):
|
||||
|
||||
def __init__(self, **params):
|
||||
|
||||
input_queue = params.get("input_queue", default_input_queue)
|
||||
subscriber = params.get("subscriber", default_subscriber)
|
||||
store_uri = params.get("store_uri", default_store_uri)
|
||||
|
||||
super(Processor, self).__init__(
|
||||
**params | {
|
||||
"input_queue": input_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": GraphEmbeddings,
|
||||
"store_uri": store_uri,
|
||||
}
|
||||
)
|
||||
|
||||
self.last_collection = None
|
||||
self.last_dim = None
|
||||
|
||||
self.client = QdrantClient(url=store_uri)
|
||||
|
||||
def handle(self, msg):
|
||||
|
||||
v = msg.value()
|
||||
|
||||
if v.entity.value == "" or v.entity.value is None: return
|
||||
|
||||
for vec in v.vectors:
|
||||
|
||||
dim = len(vec)
|
||||
collection = "triples_" + str(dim)
|
||||
|
||||
if dim != self.last_dim:
|
||||
|
||||
if not self.client.collection_exists(collection):
|
||||
|
||||
try:
|
||||
self.client.create_collection(
|
||||
collection_name=collection,
|
||||
vectors_config=VectorParams(
|
||||
size=dim, distance=Distance.DOT
|
||||
),
|
||||
)
|
||||
except Exception as e:
|
||||
print("Qdrant collection creation failed")
|
||||
raise e
|
||||
|
||||
self.last_collection = collection
|
||||
self.last_dim = dim
|
||||
|
||||
self.client.upsert(
|
||||
collection_name=collection,
|
||||
points=[
|
||||
PointStruct(
|
||||
id=str(uuid.uuid4()),
|
||||
vector=vec,
|
||||
payload={
|
||||
"entity": v.entity.value,
|
||||
}
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
||||
Consumer.add_args(
|
||||
parser, default_input_queue, default_subscriber,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-t', '--store-uri',
|
||||
default=default_store_uri,
|
||||
help=f'Qdrant store URI (default: {default_store_uri})'
|
||||
)
|
||||
|
||||
def run():
|
||||
|
||||
Processor.start(module, __doc__)
|
||||
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
|
||||
from . write import *
|
||||
|
||||
7
trustgraph-flow/trustgraph/storage/object_embeddings/milvus/__main__.py
Executable file
7
trustgraph-flow/trustgraph/storage/object_embeddings/milvus/__main__.py
Executable file
|
|
@ -0,0 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from . write import run
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
||||
|
||||
61
trustgraph-flow/trustgraph/storage/object_embeddings/milvus/write.py
Executable file
61
trustgraph-flow/trustgraph/storage/object_embeddings/milvus/write.py
Executable file
|
|
@ -0,0 +1,61 @@
|
|||
|
||||
"""
|
||||
Accepts entity/vector pairs and writes them to a Milvus store.
|
||||
"""
|
||||
|
||||
from .... schema import ObjectEmbeddings
|
||||
from .... schema import object_embeddings_store_queue
|
||||
from .... log_level import LogLevel
|
||||
from .... direct.milvus_object_embeddings import ObjectVectors
|
||||
from .... base import Consumer
|
||||
|
||||
module = ".".join(__name__.split(".")[1:-1])
|
||||
|
||||
default_input_queue = object_embeddings_store_queue
|
||||
default_subscriber = module
|
||||
default_store_uri = 'http://localhost:19530'
|
||||
|
||||
class Processor(Consumer):
|
||||
|
||||
def __init__(self, **params):
|
||||
|
||||
input_queue = params.get("input_queue", default_input_queue)
|
||||
subscriber = params.get("subscriber", default_subscriber)
|
||||
store_uri = params.get("store_uri", default_store_uri)
|
||||
|
||||
super(Processor, self).__init__(
|
||||
**params | {
|
||||
"input_queue": input_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": ObjectEmbeddings,
|
||||
"store_uri": store_uri,
|
||||
}
|
||||
)
|
||||
|
||||
self.vecstore = ObjectVectors(store_uri)
|
||||
|
||||
def handle(self, msg):
|
||||
|
||||
v = msg.value()
|
||||
|
||||
if v.id != "" and v.id is not None:
|
||||
for vec in v.vectors:
|
||||
self.vecstore.insert(vec, v.name, v.key_name, v.id)
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
||||
Consumer.add_args(
|
||||
parser, default_input_queue, default_subscriber,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-t', '--store-uri',
|
||||
default=default_store_uri,
|
||||
help=f'Milvus store URI (default: {default_store_uri})'
|
||||
)
|
||||
|
||||
def run():
|
||||
|
||||
Processor.start(module, __doc__)
|
||||
|
||||
0
trustgraph-flow/trustgraph/storage/rows/__init__.py
Normal file
0
trustgraph-flow/trustgraph/storage/rows/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
|
||||
from . write import *
|
||||
|
||||
7
trustgraph-flow/trustgraph/storage/rows/cassandra/__main__.py
Executable file
7
trustgraph-flow/trustgraph/storage/rows/cassandra/__main__.py
Executable file
|
|
@ -0,0 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from . write import run
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
||||
|
||||
127
trustgraph-flow/trustgraph/storage/rows/cassandra/write.py
Executable file
127
trustgraph-flow/trustgraph/storage/rows/cassandra/write.py
Executable file
|
|
@ -0,0 +1,127 @@
|
|||
|
||||
"""
|
||||
Graph writer. Input is graph edge. Writes edges to Cassandra graph.
|
||||
"""
|
||||
|
||||
import pulsar
|
||||
import base64
|
||||
import os
|
||||
import argparse
|
||||
import time
|
||||
from cassandra.cluster import Cluster
|
||||
from cassandra.auth import PlainTextAuthProvider
|
||||
|
||||
from .... schema import Rows
|
||||
from .... schema import rows_store_queue
|
||||
from .... log_level import LogLevel
|
||||
from .... base import Consumer
|
||||
|
||||
module = ".".join(__name__.split(".")[1:-1])
|
||||
|
||||
default_input_queue = rows_store_queue
|
||||
default_subscriber = module
|
||||
default_graph_host='localhost'
|
||||
|
||||
class Processor(Consumer):
|
||||
|
||||
def __init__(self, **params):
|
||||
|
||||
input_queue = params.get("input_queue", default_input_queue)
|
||||
subscriber = params.get("subscriber", default_subscriber)
|
||||
graph_host = params.get("graph_host", default_graph_host)
|
||||
|
||||
super(Processor, self).__init__(
|
||||
**params | {
|
||||
"input_queue": input_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": Rows,
|
||||
"graph_host": graph_host,
|
||||
}
|
||||
)
|
||||
|
||||
self.cluster = Cluster(graph_host.split(","))
|
||||
self.session = self.cluster.connect()
|
||||
|
||||
self.tables = set()
|
||||
|
||||
self.session.execute("""
|
||||
create keyspace if not exists trustgraph
|
||||
with replication = {
|
||||
'class' : 'SimpleStrategy',
|
||||
'replication_factor' : 1
|
||||
};
|
||||
""");
|
||||
|
||||
self.session.execute("use trustgraph");
|
||||
|
||||
def handle(self, msg):
|
||||
|
||||
try:
|
||||
|
||||
v = msg.value()
|
||||
name = v.row_schema.name
|
||||
|
||||
if name not in self.tables:
|
||||
|
||||
# FIXME: SQL injection?
|
||||
|
||||
pkey = []
|
||||
|
||||
stmt = "create table if not exists " + name + " ( "
|
||||
|
||||
for field in v.row_schema.fields:
|
||||
|
||||
stmt += field.name + " text, "
|
||||
|
||||
if field.primary:
|
||||
pkey.append(field.name)
|
||||
|
||||
stmt += "PRIMARY KEY (" + ", ".join(pkey) + "));"
|
||||
|
||||
self.session.execute(stmt)
|
||||
|
||||
self.tables.add(name);
|
||||
|
||||
for row in v.rows:
|
||||
|
||||
field_names = []
|
||||
values = []
|
||||
|
||||
for field in v.row_schema.fields:
|
||||
field_names.append(field.name)
|
||||
values.append(row[field.name])
|
||||
|
||||
# FIXME: SQL injection?
|
||||
stmt = (
|
||||
"insert into " + name + " (" + ", ".join(field_names) +
|
||||
") values (" + ",".join(["%s"] * len(values)) + ")"
|
||||
)
|
||||
|
||||
self.session.execute(stmt, values)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", str(e), flush=True)
|
||||
|
||||
# If there's an error make sure to do table creation etc.
|
||||
self.tables.remove(name)
|
||||
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
||||
Consumer.add_args(
|
||||
parser, default_input_queue, default_subscriber,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-g', '--graph-host',
|
||||
default="localhost",
|
||||
help=f'Graph host (default: localhost)'
|
||||
)
|
||||
|
||||
def run():
|
||||
|
||||
Processor.start(module, __doc__)
|
||||
|
||||
0
trustgraph-flow/trustgraph/storage/triples/__init__.py
Normal file
0
trustgraph-flow/trustgraph/storage/triples/__init__.py
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
|
||||
from . write import *
|
||||
|
||||
7
trustgraph-flow/trustgraph/storage/triples/cassandra/__main__.py
Executable file
7
trustgraph-flow/trustgraph/storage/triples/cassandra/__main__.py
Executable file
|
|
@ -0,0 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from . write import run
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
||||
|
||||
69
trustgraph-flow/trustgraph/storage/triples/cassandra/write.py
Executable file
69
trustgraph-flow/trustgraph/storage/triples/cassandra/write.py
Executable file
|
|
@ -0,0 +1,69 @@
|
|||
|
||||
"""
|
||||
Graph writer. Input is graph edge. Writes edges to Cassandra graph.
|
||||
"""
|
||||
|
||||
import pulsar
|
||||
import base64
|
||||
import os
|
||||
import argparse
|
||||
import time
|
||||
|
||||
from .... direct.cassandra import TrustGraph
|
||||
from .... schema import Triple
|
||||
from .... schema import triples_store_queue
|
||||
from .... log_level import LogLevel
|
||||
from .... base import Consumer
|
||||
|
||||
module = ".".join(__name__.split(".")[1:-1])
|
||||
|
||||
default_input_queue = triples_store_queue
|
||||
default_subscriber = module
|
||||
default_graph_host='localhost'
|
||||
|
||||
class Processor(Consumer):
|
||||
|
||||
def __init__(self, **params):
|
||||
|
||||
input_queue = params.get("input_queue", default_input_queue)
|
||||
subscriber = params.get("subscriber", default_subscriber)
|
||||
graph_host = params.get("graph_host", default_graph_host)
|
||||
|
||||
super(Processor, self).__init__(
|
||||
**params | {
|
||||
"input_queue": input_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": Triple,
|
||||
"graph_host": graph_host,
|
||||
}
|
||||
)
|
||||
|
||||
self.tg = TrustGraph([graph_host])
|
||||
|
||||
def handle(self, msg):
|
||||
|
||||
v = msg.value()
|
||||
|
||||
self.tg.insert(
|
||||
v.s.value,
|
||||
v.p.value,
|
||||
v.o.value
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
||||
Consumer.add_args(
|
||||
parser, default_input_queue, default_subscriber,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-g', '--graph-host',
|
||||
default="localhost",
|
||||
help=f'Graph host (default: localhost)'
|
||||
)
|
||||
|
||||
def run():
|
||||
|
||||
Processor.start(module, __doc__)
|
||||
|
||||
|
|
@ -0,0 +1,3 @@
|
|||
|
||||
from . write import *
|
||||
|
||||
7
trustgraph-flow/trustgraph/storage/triples/neo4j/__main__.py
Executable file
7
trustgraph-flow/trustgraph/storage/triples/neo4j/__main__.py
Executable file
|
|
@ -0,0 +1,7 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from . write import run
|
||||
|
||||
if __name__ == '__main__':
|
||||
run()
|
||||
|
||||
156
trustgraph-flow/trustgraph/storage/triples/neo4j/write.py
Executable file
156
trustgraph-flow/trustgraph/storage/triples/neo4j/write.py
Executable file
|
|
@ -0,0 +1,156 @@
|
|||
|
||||
"""
|
||||
Graph writer. Input is graph edge. Writes edges to Cassandra graph.
|
||||
"""
|
||||
|
||||
import pulsar
|
||||
import base64
|
||||
import os
|
||||
import argparse
|
||||
import time
|
||||
|
||||
from neo4j import GraphDatabase
|
||||
|
||||
from .... schema import Triple
|
||||
from .... schema import triples_store_queue
|
||||
from .... log_level import LogLevel
|
||||
from .... base import Consumer
|
||||
|
||||
module = ".".join(__name__.split(".")[1:-1])
|
||||
|
||||
default_input_queue = triples_store_queue
|
||||
default_subscriber = module
|
||||
|
||||
default_graph_host = 'bolt://neo4j:7687'
|
||||
default_username = 'neo4j'
|
||||
default_password = 'password'
|
||||
|
||||
class Processor(Consumer):
|
||||
|
||||
def __init__(self, **params):
|
||||
|
||||
input_queue = params.get("input_queue", default_input_queue)
|
||||
subscriber = params.get("subscriber", default_subscriber)
|
||||
graph_host = params.get("graph_host", default_graph_host)
|
||||
username = params.get("username", default_username)
|
||||
password = params.get("passowrd", default_password)
|
||||
|
||||
super(Processor, self).__init__(
|
||||
**params | {
|
||||
"input_queue": input_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": Triple,
|
||||
"graph_host": graph_host,
|
||||
}
|
||||
)
|
||||
|
||||
self.db = "neo4j"
|
||||
|
||||
self.io = GraphDatabase.driver(graph_host, auth=(username, password))
|
||||
|
||||
def create_node(self, uri):
|
||||
|
||||
print("Create node", uri)
|
||||
|
||||
summary = self.io.execute_query(
|
||||
"MERGE (n:Node {uri: $uri})",
|
||||
uri=uri,
|
||||
database_=self.db,
|
||||
).summary
|
||||
|
||||
print("Created {nodes_created} nodes in {time} ms.".format(
|
||||
nodes_created=summary.counters.nodes_created,
|
||||
time=summary.result_available_after
|
||||
))
|
||||
|
||||
def create_literal(self, value):
|
||||
|
||||
print("Create literal", value)
|
||||
|
||||
summary = self.io.execute_query(
|
||||
"MERGE (n:Literal {value: $value})",
|
||||
value=value,
|
||||
database_=self.db,
|
||||
).summary
|
||||
|
||||
print("Created {nodes_created} nodes in {time} ms.".format(
|
||||
nodes_created=summary.counters.nodes_created,
|
||||
time=summary.result_available_after
|
||||
))
|
||||
|
||||
def relate_node(self, src, uri, dest):
|
||||
|
||||
print("Create node rel", src, uri, dest)
|
||||
|
||||
summary = self.io.execute_query(
|
||||
"MATCH (src:Node {uri: $src}) "
|
||||
"MATCH (dest:Node {uri: $dest}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",
|
||||
src=src, dest=dest, uri=uri,
|
||||
database_=self.db,
|
||||
).summary
|
||||
|
||||
print("Created {nodes_created} nodes in {time} ms.".format(
|
||||
nodes_created=summary.counters.nodes_created,
|
||||
time=summary.result_available_after
|
||||
))
|
||||
|
||||
def relate_literal(self, src, uri, dest):
|
||||
|
||||
print("Create literal rel", src, uri, dest)
|
||||
|
||||
summary = self.io.execute_query(
|
||||
"MATCH (src:Node {uri: $src}) "
|
||||
"MATCH (dest:Literal {value: $dest}) "
|
||||
"MERGE (src)-[:Rel {uri: $uri}]->(dest)",
|
||||
src=src, dest=dest, uri=uri,
|
||||
database_=self.db,
|
||||
).summary
|
||||
|
||||
print("Created {nodes_created} nodes in {time} ms.".format(
|
||||
nodes_created=summary.counters.nodes_created,
|
||||
time=summary.result_available_after
|
||||
))
|
||||
|
||||
def handle(self, msg):
|
||||
|
||||
v = msg.value()
|
||||
|
||||
self.create_node(v.s.value)
|
||||
|
||||
if v.o.is_uri:
|
||||
self.create_node(v.o.value)
|
||||
self.relate_node(v.s.value, v.p.value, v.o.value)
|
||||
else:
|
||||
self.create_literal(v.o.value)
|
||||
self.relate_literal(v.s.value, v.p.value, v.o.value)
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
||||
Consumer.add_args(
|
||||
parser, default_input_queue, default_subscriber,
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-g', '--graph_host',
|
||||
default=default_graph_host,
|
||||
help=f'Graph host (default: {default_graph_host})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--username',
|
||||
default=default_username,
|
||||
help=f'Neo4j username (default: {default_username})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--password',
|
||||
default=default_password,
|
||||
help=f'Neo4j password (default: {default_password})'
|
||||
)
|
||||
|
||||
def run():
|
||||
|
||||
Processor.start(module, __doc__)
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue