mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-26 08:56:21 +02:00
knowledge service (#367)
* Write knowledge core elements to Cassandra * Store service works, building management service * kg-manager
This commit is contained in:
parent
d0da122bed
commit
807c19fd22
24 changed files with 1196 additions and 243 deletions
0
trustgraph-flow/trustgraph/tables/__init__.py
Normal file
0
trustgraph-flow/trustgraph/tables/__init__.py
Normal file
504
trustgraph-flow/trustgraph/tables/knowledge.py
Normal file
504
trustgraph-flow/trustgraph/tables/knowledge.py
Normal file
|
|
@ -0,0 +1,504 @@
|
|||
|
||||
from .. schema import KnowledgeResponse, Triple, Triples, EntityEmbeddings
|
||||
from .. schema import Metadata, Value, GraphEmbeddings
|
||||
|
||||
from cassandra.cluster import Cluster
|
||||
from cassandra.auth import PlainTextAuthProvider
|
||||
from ssl import SSLContext, PROTOCOL_TLSv1_2
|
||||
|
||||
import uuid
|
||||
import time
|
||||
import asyncio
|
||||
|
||||
class KnowledgeTableStore:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
cassandra_host, cassandra_user, cassandra_password, keyspace,
|
||||
):
|
||||
|
||||
self.keyspace = keyspace
|
||||
|
||||
print("Connecting to Cassandra...", flush=True)
|
||||
|
||||
if cassandra_user and cassandra_password:
|
||||
ssl_context = SSLContext(PROTOCOL_TLSv1_2)
|
||||
auth_provider = PlainTextAuthProvider(
|
||||
username=cassandra_user, password=cassandra_password
|
||||
)
|
||||
self.cluster = Cluster(
|
||||
cassandra_host,
|
||||
auth_provider=auth_provider,
|
||||
ssl_context=ssl_context
|
||||
)
|
||||
else:
|
||||
self.cluster = Cluster(cassandra_host)
|
||||
|
||||
self.cassandra = self.cluster.connect()
|
||||
|
||||
print("Connected.", flush=True)
|
||||
|
||||
self.ensure_cassandra_schema()
|
||||
|
||||
self.prepare_statements()
|
||||
|
||||
def ensure_cassandra_schema(self):
|
||||
|
||||
print("Ensure Cassandra schema...", flush=True)
|
||||
|
||||
print("Keyspace...", flush=True)
|
||||
|
||||
# FIXME: Replication factor should be configurable
|
||||
self.cassandra.execute(f"""
|
||||
create keyspace if not exists {self.keyspace}
|
||||
with replication = {{
|
||||
'class' : 'SimpleStrategy',
|
||||
'replication_factor' : 1
|
||||
}};
|
||||
""");
|
||||
|
||||
self.cassandra.set_keyspace(self.keyspace)
|
||||
|
||||
print("triples table...", flush=True)
|
||||
|
||||
self.cassandra.execute("""
|
||||
CREATE TABLE IF NOT EXISTS triples (
|
||||
user text,
|
||||
document_id text,
|
||||
id uuid,
|
||||
time timestamp,
|
||||
metadata list<tuple<
|
||||
text, boolean, text, boolean, text, boolean
|
||||
>>,
|
||||
triples list<tuple<
|
||||
text, boolean, text, boolean, text, boolean
|
||||
>>,
|
||||
PRIMARY KEY ((user, document_id), id)
|
||||
);
|
||||
""");
|
||||
|
||||
print("graph_embeddings table...", flush=True)
|
||||
|
||||
self.cassandra.execute("""
|
||||
create table if not exists graph_embeddings (
|
||||
user text,
|
||||
document_id text,
|
||||
id uuid,
|
||||
time timestamp,
|
||||
metadata list<tuple<
|
||||
text, boolean, text, boolean, text, boolean
|
||||
>>,
|
||||
entity_embeddings list<
|
||||
tuple<
|
||||
tuple<text, boolean>,
|
||||
list<list<double>>
|
||||
>
|
||||
>,
|
||||
PRIMARY KEY ((user, document_id), id)
|
||||
);
|
||||
""");
|
||||
|
||||
self.cassandra.execute("""
|
||||
CREATE INDEX IF NOT EXISTS graph_embeddings_user ON
|
||||
graph_embeddings ( user );
|
||||
""");
|
||||
|
||||
print("document_embeddings table...", flush=True)
|
||||
|
||||
self.cassandra.execute("""
|
||||
create table if not exists document_embeddings (
|
||||
user text,
|
||||
document_id text,
|
||||
id uuid,
|
||||
time timestamp,
|
||||
metadata list<tuple<
|
||||
text, boolean, text, boolean, text, boolean
|
||||
>>,
|
||||
chunks list<
|
||||
tuple<
|
||||
blob,
|
||||
list<list<double>>
|
||||
>
|
||||
>,
|
||||
PRIMARY KEY ((user, document_id), id)
|
||||
);
|
||||
""");
|
||||
|
||||
self.cassandra.execute("""
|
||||
CREATE INDEX IF NOT EXISTS document_embeddings_user ON
|
||||
document_embeddings ( user );
|
||||
""");
|
||||
|
||||
print("Cassandra schema OK.", flush=True)
|
||||
|
||||
def prepare_statements(self):
|
||||
|
||||
self.insert_triples_stmt = self.cassandra.prepare("""
|
||||
INSERT INTO triples
|
||||
(
|
||||
id, user, document_id,
|
||||
time, metadata, triples
|
||||
)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
""")
|
||||
|
||||
self.insert_graph_embeddings_stmt = self.cassandra.prepare("""
|
||||
INSERT INTO graph_embeddings
|
||||
(
|
||||
id, user, document_id, time, metadata, entity_embeddings
|
||||
)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
""")
|
||||
|
||||
self.insert_document_embeddings_stmt = self.cassandra.prepare("""
|
||||
INSERT INTO document_embeddings
|
||||
(
|
||||
id, user, document_id, time, metadata, chunks
|
||||
)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
""")
|
||||
|
||||
self.list_cores_stmt = self.cassandra.prepare("""
|
||||
SELECT DISTINCT user, document_id FROM graph_embeddings
|
||||
WHERE user = ?
|
||||
""")
|
||||
|
||||
self.get_triples_stmt = self.cassandra.prepare("""
|
||||
SELECT id, time, metadata, triples
|
||||
FROM triples
|
||||
WHERE user = ? AND document_id = ?
|
||||
""")
|
||||
|
||||
self.get_graph_embeddings_stmt = self.cassandra.prepare("""
|
||||
SELECT id, time, metadata, entity_embeddings
|
||||
FROM graph_embeddings
|
||||
WHERE user = ? AND document_id = ?
|
||||
""")
|
||||
|
||||
self.get_document_embeddings_stmt = self.cassandra.prepare("""
|
||||
SELECT id, time, metadata, chunks
|
||||
FROM document_embeddings
|
||||
WHERE user = ? AND document_id = ?
|
||||
""")
|
||||
|
||||
self.delete_triples_stmt = self.cassandra.prepare("""
|
||||
DELETE FROM triples
|
||||
WHERE user = ? AND document_id = ?
|
||||
""")
|
||||
|
||||
self.delete_graph_embeddings_stmt = self.cassandra.prepare("""
|
||||
DELETE FROM graph_embeddings
|
||||
WHERE user = ? AND document_id = ?
|
||||
""")
|
||||
|
||||
async def add_triples(self, m):
|
||||
|
||||
when = int(time.time() * 1000)
|
||||
|
||||
if m.metadata.metadata:
|
||||
metadata = [
|
||||
(
|
||||
v.s.value, v.s.is_uri, v.p.value, v.p.is_uri,
|
||||
v.o.value, v.o.is_uri
|
||||
)
|
||||
for v in m.metadata.metadata
|
||||
]
|
||||
else:
|
||||
metadata = []
|
||||
|
||||
triples = [
|
||||
(
|
||||
v.s.value, v.s.is_uri, v.p.value, v.p.is_uri,
|
||||
v.o.value, v.o.is_uri
|
||||
)
|
||||
for v in m.triples
|
||||
]
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
resp = self.cassandra.execute(
|
||||
self.insert_triples_stmt,
|
||||
(
|
||||
uuid.uuid4(), m.metadata.user,
|
||||
m.metadata.id, when,
|
||||
metadata, triples,
|
||||
)
|
||||
)
|
||||
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", type(e))
|
||||
print(f"{e}, retry...", flush=True)
|
||||
await asyncio.sleep(1)
|
||||
|
||||
async def add_graph_embeddings(self, m):
|
||||
|
||||
when = int(time.time() * 1000)
|
||||
|
||||
if m.metadata.metadata:
|
||||
metadata = [
|
||||
(
|
||||
v.s.value, v.s.is_uri, v.p.value, v.p.is_uri,
|
||||
v.o.value, v.o.is_uri
|
||||
)
|
||||
for v in m.metadata.metadata
|
||||
]
|
||||
else:
|
||||
metadata = []
|
||||
|
||||
entities = [
|
||||
(
|
||||
(v.entity.value, v.entity.is_uri),
|
||||
v.vectors
|
||||
)
|
||||
for v in m.entities
|
||||
]
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
resp = self.cassandra.execute(
|
||||
self.insert_graph_embeddings_stmt,
|
||||
(
|
||||
uuid.uuid4(), m.metadata.user,
|
||||
m.metadata.id, when,
|
||||
metadata, entities,
|
||||
)
|
||||
)
|
||||
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", type(e))
|
||||
print(f"{e}, retry...", flush=True)
|
||||
await asyncio.sleep(1)
|
||||
|
||||
async def add_document_embeddings(self, m):
|
||||
|
||||
when = int(time.time() * 1000)
|
||||
|
||||
if m.metadata.metadata:
|
||||
metadata = [
|
||||
(
|
||||
v.s.value, v.s.is_uri, v.p.value, v.p.is_uri,
|
||||
v.o.value, v.o.is_uri
|
||||
)
|
||||
for v in m.metadata.metadata
|
||||
]
|
||||
else:
|
||||
metadata = []
|
||||
|
||||
chunks = [
|
||||
(
|
||||
v.chunk,
|
||||
v.vectors,
|
||||
)
|
||||
for v in m.chunks
|
||||
]
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
resp = self.cassandra.execute(
|
||||
self.insert_document_embeddings_stmt,
|
||||
(
|
||||
uuid.uuid4(), m.metadata.user,
|
||||
m.metadata.id, when,
|
||||
metadata, chunks,
|
||||
)
|
||||
)
|
||||
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", type(e))
|
||||
print(f"{e}, retry...", flush=True)
|
||||
await asyncio.sleep(1)
|
||||
|
||||
async def list_kg_cores(self, user):
|
||||
|
||||
print("List kg cores...")
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
resp = self.cassandra.execute(
|
||||
self.list_cores_stmt,
|
||||
(user,)
|
||||
)
|
||||
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
print("Exception:", type(e))
|
||||
print(f"{e}, retry...", flush=True)
|
||||
await asyncio.sleep(1)
|
||||
|
||||
|
||||
lst = [
|
||||
row[1]
|
||||
for row in resp
|
||||
]
|
||||
|
||||
print("Done")
|
||||
|
||||
return lst
|
||||
|
||||
async def delete_kg_core(self, user, document_id):
|
||||
|
||||
print("Delete kg cores...")
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
resp = self.cassandra.execute(
|
||||
self.delete_triples_stmt,
|
||||
(user, document_id)
|
||||
)
|
||||
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
print("Exception:", type(e))
|
||||
print(f"{e}, retry...", flush=True)
|
||||
await asyncio.sleep(1)
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
resp = self.cassandra.execute(
|
||||
self.delete_graph_embeddings_stmt,
|
||||
(user, document_id)
|
||||
)
|
||||
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
print("Exception:", type(e))
|
||||
print(f"{e}, retry...", flush=True)
|
||||
await asyncio.sleep(1)
|
||||
|
||||
async def get_triples(self, user, document_id, receiver):
|
||||
|
||||
print("Get triples...")
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
resp = self.cassandra.execute(
|
||||
self.get_triples_stmt,
|
||||
(user, document_id)
|
||||
)
|
||||
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
print("Exception:", type(e))
|
||||
print(f"{e}, retry...", flush=True)
|
||||
await asyncio.sleep(1)
|
||||
|
||||
for row in resp:
|
||||
|
||||
if row[2]:
|
||||
metadata = [
|
||||
Triple(
|
||||
s = Value(value = elt[0], is_uri = elt[1]),
|
||||
p = Value(value = elt[2], is_uri = elt[3]),
|
||||
o = Value(value = elt[4], is_uri = elt[5]),
|
||||
)
|
||||
for elt in row[2]
|
||||
]
|
||||
else:
|
||||
metadata = []
|
||||
|
||||
triples = [
|
||||
Triple(
|
||||
s = Value(value = elt[0], is_uri = elt[1]),
|
||||
p = Value(value = elt[2], is_uri = elt[3]),
|
||||
o = Value(value = elt[4], is_uri = elt[5]),
|
||||
)
|
||||
for elt in row[3]
|
||||
]
|
||||
|
||||
await receiver(
|
||||
Triples(
|
||||
metadata = Metadata(
|
||||
id = document_id,
|
||||
user = user,
|
||||
collection = "default", # FIXME: What to put here?
|
||||
metadata = metadata,
|
||||
),
|
||||
triples = triples
|
||||
)
|
||||
)
|
||||
|
||||
print("Done")
|
||||
|
||||
async def get_graph_embeddings(self, user, document_id, receiver):
|
||||
|
||||
print("Get GE...")
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
resp = self.cassandra.execute(
|
||||
self.get_graph_embeddings_stmt,
|
||||
(user, document_id)
|
||||
)
|
||||
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
print("Exception:", type(e))
|
||||
print(f"{e}, retry...", flush=True)
|
||||
await asyncio.sleep(1)
|
||||
|
||||
for row in resp:
|
||||
|
||||
if row[2]:
|
||||
metadata = [
|
||||
Triple(
|
||||
s = Value(value = elt[0], is_uri = elt[1]),
|
||||
p = Value(value = elt[2], is_uri = elt[3]),
|
||||
o = Value(value = elt[4], is_uri = elt[5]),
|
||||
)
|
||||
for elt in row[2]
|
||||
]
|
||||
else:
|
||||
metadata = []
|
||||
|
||||
entities = [
|
||||
EntityEmbeddings(
|
||||
entity = Value(value = ent[0][0], is_uri = ent[0][1]),
|
||||
vectors = ent[1]
|
||||
)
|
||||
for ent in row[3]
|
||||
]
|
||||
|
||||
await receiver(
|
||||
GraphEmbeddings(
|
||||
metadata = Metadata(
|
||||
id = document_id,
|
||||
user = user,
|
||||
collection = "default", # FIXME: What to put here?
|
||||
metadata = metadata,
|
||||
),
|
||||
entities = entities
|
||||
)
|
||||
)
|
||||
|
||||
print("Done")
|
||||
|
||||
525
trustgraph-flow/trustgraph/tables/library.py
Normal file
525
trustgraph-flow/trustgraph/tables/library.py
Normal file
|
|
@ -0,0 +1,525 @@
|
|||
|
||||
from .. schema import LibrarianRequest, LibrarianResponse
|
||||
from .. schema import DocumentMetadata, ProcessingMetadata
|
||||
from .. schema import Error, Triple, Value
|
||||
from .. knowledge import hash
|
||||
from .. exceptions import RequestError
|
||||
|
||||
from cassandra.cluster import Cluster
|
||||
from cassandra.auth import PlainTextAuthProvider
|
||||
from cassandra.query import BatchStatement
|
||||
from ssl import SSLContext, PROTOCOL_TLSv1_2
|
||||
|
||||
import uuid
|
||||
import time
|
||||
import asyncio
|
||||
|
||||
class LibraryTableStore:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
cassandra_host, cassandra_user, cassandra_password, keyspace,
|
||||
):
|
||||
|
||||
self.keyspace = keyspace
|
||||
|
||||
print("Connecting to Cassandra...", flush=True)
|
||||
|
||||
if cassandra_user and cassandra_password:
|
||||
ssl_context = SSLContext(PROTOCOL_TLSv1_2)
|
||||
auth_provider = PlainTextAuthProvider(
|
||||
username=cassandra_user, password=cassandra_password
|
||||
)
|
||||
self.cluster = Cluster(
|
||||
cassandra_host,
|
||||
auth_provider=auth_provider,
|
||||
ssl_context=ssl_context
|
||||
)
|
||||
else:
|
||||
self.cluster = Cluster(cassandra_host)
|
||||
|
||||
self.cassandra = self.cluster.connect()
|
||||
|
||||
print("Connected.", flush=True)
|
||||
|
||||
self.ensure_cassandra_schema()
|
||||
|
||||
self.prepare_statements()
|
||||
|
||||
def ensure_cassandra_schema(self):
|
||||
|
||||
print("Ensure Cassandra schema...", flush=True)
|
||||
|
||||
print("Keyspace...", flush=True)
|
||||
|
||||
# FIXME: Replication factor should be configurable
|
||||
self.cassandra.execute(f"""
|
||||
create keyspace if not exists {self.keyspace}
|
||||
with replication = {{
|
||||
'class' : 'SimpleStrategy',
|
||||
'replication_factor' : 1
|
||||
}};
|
||||
""");
|
||||
|
||||
self.cassandra.set_keyspace(self.keyspace)
|
||||
|
||||
print("document table...", flush=True)
|
||||
|
||||
self.cassandra.execute("""
|
||||
CREATE TABLE IF NOT EXISTS document (
|
||||
id text,
|
||||
user text,
|
||||
time timestamp,
|
||||
kind text,
|
||||
title text,
|
||||
comments text,
|
||||
metadata list<tuple<
|
||||
text, boolean, text, boolean, text, boolean
|
||||
>>,
|
||||
tags list<text>,
|
||||
object_id uuid,
|
||||
PRIMARY KEY (user, id)
|
||||
);
|
||||
""");
|
||||
|
||||
print("object index...", flush=True)
|
||||
|
||||
self.cassandra.execute("""
|
||||
CREATE INDEX IF NOT EXISTS document_object
|
||||
ON document (object_id)
|
||||
""");
|
||||
|
||||
print("processing table...", flush=True)
|
||||
|
||||
self.cassandra.execute("""
|
||||
CREATE TABLE IF NOT EXISTS processing (
|
||||
id text,
|
||||
document_id text,
|
||||
time timestamp,
|
||||
flow text,
|
||||
user text,
|
||||
collection text,
|
||||
tags list<text>,
|
||||
PRIMARY KEY (user, id)
|
||||
);
|
||||
""");
|
||||
|
||||
print("Cassandra schema OK.", flush=True)
|
||||
|
||||
def prepare_statements(self):
|
||||
|
||||
self.insert_document_stmt = self.cassandra.prepare("""
|
||||
INSERT INTO document
|
||||
(
|
||||
id, user, time,
|
||||
kind, title, comments,
|
||||
metadata, tags, object_id
|
||||
)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""")
|
||||
|
||||
self.update_document_stmt = self.cassandra.prepare("""
|
||||
UPDATE document
|
||||
SET time = ?, title = ?, comments = ?,
|
||||
metadata = ?, tags = ?
|
||||
WHERE user = ? AND id = ?
|
||||
""")
|
||||
|
||||
self.get_document_stmt = self.cassandra.prepare("""
|
||||
SELECT time, kind, title, comments, metadata, tags, object_id
|
||||
FROM document
|
||||
WHERE user = ? AND id = ?
|
||||
""")
|
||||
|
||||
self.delete_document_stmt = self.cassandra.prepare("""
|
||||
DELETE FROM document
|
||||
WHERE user = ? AND id = ?
|
||||
""")
|
||||
|
||||
self.test_document_exists_stmt = self.cassandra.prepare("""
|
||||
SELECT id
|
||||
FROM document
|
||||
WHERE user = ? AND id = ?
|
||||
LIMIT 1
|
||||
""")
|
||||
|
||||
self.list_document_stmt = self.cassandra.prepare("""
|
||||
SELECT
|
||||
id, time, kind, title, comments, metadata, tags, object_id
|
||||
FROM document
|
||||
WHERE user = ?
|
||||
""")
|
||||
|
||||
self.list_document_by_tag_stmt = self.cassandra.prepare("""
|
||||
SELECT
|
||||
id, time, kind, title, comments, metadata, tags, object_id
|
||||
FROM document
|
||||
WHERE user = ? AND tags CONTAINS ?
|
||||
ALLOW FILTERING
|
||||
""")
|
||||
|
||||
self.insert_processing_stmt = self.cassandra.prepare("""
|
||||
INSERT INTO processing
|
||||
(
|
||||
id, document_id, time,
|
||||
flow, user, collection,
|
||||
tags
|
||||
)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||
""")
|
||||
|
||||
self.delete_processing_stmt = self.cassandra.prepare("""
|
||||
DELETE FROM processing
|
||||
WHERE user = ? AND id = ?
|
||||
""")
|
||||
|
||||
self.test_processing_exists_stmt = self.cassandra.prepare("""
|
||||
SELECT id
|
||||
FROM processing
|
||||
WHERE user = ? AND id = ?
|
||||
LIMIT 1
|
||||
""")
|
||||
|
||||
self.list_processing_stmt = self.cassandra.prepare("""
|
||||
SELECT
|
||||
id, document_id, time, flow, collection, tags
|
||||
FROM processing
|
||||
WHERE user = ?
|
||||
""")
|
||||
|
||||
async def document_exists(self, user, id):
|
||||
|
||||
resp = self.cassandra.execute(
|
||||
self.test_document_exists_stmt,
|
||||
( user, id )
|
||||
)
|
||||
|
||||
# If a row exists, document exists. It's a cursor, can't just
|
||||
# count the length
|
||||
|
||||
for row in resp:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
async def add_document(self, document, object_id):
|
||||
|
||||
print("Adding document", document.id, object_id)
|
||||
|
||||
metadata = [
|
||||
(
|
||||
v.s.value, v.s.is_uri, v.p.value, v.p.is_uri,
|
||||
v.o.value, v.o.is_uri
|
||||
)
|
||||
for v in document.metadata
|
||||
]
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
resp = self.cassandra.execute(
|
||||
self.insert_document_stmt,
|
||||
(
|
||||
document.id, document.user, int(document.time * 1000),
|
||||
document.kind, document.title, document.comments,
|
||||
metadata, document.tags, object_id
|
||||
)
|
||||
)
|
||||
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", type(e))
|
||||
print(f"{e}, retry...", flush=True)
|
||||
await asyncio.sleep(1)
|
||||
|
||||
print("Add complete", flush=True)
|
||||
|
||||
async def update_document(self, document):
|
||||
|
||||
print("Updating document", document.id)
|
||||
|
||||
metadata = [
|
||||
(
|
||||
v.s.value, v.s.is_uri, v.p.value, v.p.is_uri,
|
||||
v.o.value, v.o.is_uri
|
||||
)
|
||||
for v in document.metadata
|
||||
]
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
resp = self.cassandra.execute(
|
||||
self.update_document_stmt,
|
||||
(
|
||||
int(document.time * 1000), document.title,
|
||||
document.comments, metadata, document.tags,
|
||||
document.user, document.id
|
||||
)
|
||||
)
|
||||
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", type(e))
|
||||
print(f"{e}, retry...", flush=True)
|
||||
await asyncio.sleep(1)
|
||||
|
||||
print("Update complete", flush=True)
|
||||
|
||||
async def remove_document(self, user, document_id):
|
||||
|
||||
print("Removing document", document_id)
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
resp = self.cassandra.execute(
|
||||
self.delete_document_stmt,
|
||||
(
|
||||
user, document_id
|
||||
)
|
||||
)
|
||||
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", type(e))
|
||||
print(f"{e}, retry...", flush=True)
|
||||
await asyncio.sleep(1)
|
||||
|
||||
print("Delete complete", flush=True)
|
||||
|
||||
async def list_documents(self, user):
|
||||
|
||||
print("List documents...")
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
resp = self.cassandra.execute(
|
||||
self.list_document_stmt,
|
||||
(user,)
|
||||
)
|
||||
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
print("Exception:", type(e))
|
||||
print(f"{e}, retry...", flush=True)
|
||||
await asyncio.sleep(1)
|
||||
|
||||
|
||||
lst = [
|
||||
DocumentMetadata(
|
||||
id = row[0],
|
||||
user = user,
|
||||
time = int(time.mktime(row[1].timetuple())),
|
||||
kind = row[2],
|
||||
title = row[3],
|
||||
comments = row[4],
|
||||
metadata = [
|
||||
Triple(
|
||||
s=Value(value=m[0], is_uri=m[1]),
|
||||
p=Value(value=m[2], is_uri=m[3]),
|
||||
o=Value(value=m[4], is_uri=m[5])
|
||||
)
|
||||
for m in row[5]
|
||||
],
|
||||
tags = row[6] if row[6] else [],
|
||||
object_id = row[7],
|
||||
)
|
||||
for row in resp
|
||||
]
|
||||
|
||||
print("Done")
|
||||
|
||||
return lst
|
||||
|
||||
async def get_document(self, user, id):
|
||||
|
||||
print("Get document")
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
resp = self.cassandra.execute(
|
||||
self.get_document_stmt,
|
||||
(user, id)
|
||||
)
|
||||
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
print("Exception:", type(e))
|
||||
print(f"{e}, retry...", flush=True)
|
||||
await asyncio.sleep(1)
|
||||
|
||||
|
||||
for row in resp:
|
||||
doc = DocumentMetadata(
|
||||
id = id,
|
||||
user = user,
|
||||
time = int(time.mktime(row[0].timetuple())),
|
||||
kind = row[1],
|
||||
title = row[2],
|
||||
comments = row[3],
|
||||
metadata = [
|
||||
Triple(
|
||||
s=Value(value=m[0], is_uri=m[1]),
|
||||
p=Value(value=m[2], is_uri=m[3]),
|
||||
o=Value(value=m[4], is_uri=m[5])
|
||||
)
|
||||
for m in row[4]
|
||||
],
|
||||
tags = row[5] if row[5] else [],
|
||||
object_id = row[6],
|
||||
)
|
||||
|
||||
print("Done")
|
||||
return doc
|
||||
|
||||
raise RuntimeError("No such document row?")
|
||||
|
||||
async def get_document_object_id(self, user, id):
|
||||
|
||||
print("Get document obj ID")
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
resp = self.cassandra.execute(
|
||||
self.get_document_stmt,
|
||||
(user, id)
|
||||
)
|
||||
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
print("Exception:", type(e))
|
||||
print(f"{e}, retry...", flush=True)
|
||||
await asyncio.sleep(1)
|
||||
|
||||
|
||||
for row in resp:
|
||||
print("Done")
|
||||
return row[6]
|
||||
|
||||
raise RuntimeError("No such document row?")
|
||||
|
||||
async def processing_exists(self, user, id):
|
||||
|
||||
resp = self.cassandra.execute(
|
||||
self.test_processing_exists_stmt,
|
||||
( user, id )
|
||||
)
|
||||
|
||||
# If a row exists, document exists. It's a cursor, can't just
|
||||
# count the length
|
||||
|
||||
for row in resp:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
async def add_processing(self, processing):
|
||||
|
||||
print("Adding processing", processing.id)
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
resp = self.cassandra.execute(
|
||||
self.insert_processing_stmt,
|
||||
(
|
||||
processing.id, processing.document_id,
|
||||
int(processing.time * 1000), processing.flow,
|
||||
processing.user, processing.collection,
|
||||
processing.tags
|
||||
)
|
||||
)
|
||||
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", type(e))
|
||||
print(f"{e}, retry...", flush=True)
|
||||
await asyncio.sleep(1)
|
||||
|
||||
print("Add complete", flush=True)
|
||||
|
||||
async def remove_processing(self, user, processing_id):
|
||||
|
||||
print("Removing processing", processing_id)
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
resp = self.cassandra.execute(
|
||||
self.delete_processing_stmt,
|
||||
(
|
||||
user, processing_id
|
||||
)
|
||||
)
|
||||
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", type(e))
|
||||
print(f"{e}, retry...", flush=True)
|
||||
await asyncio.sleep(1)
|
||||
|
||||
print("Delete complete", flush=True)
|
||||
|
||||
async def list_processing(self, user):
|
||||
|
||||
print("List processing objects")
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
resp = self.cassandra.execute(
|
||||
self.list_processing_stmt,
|
||||
(user,)
|
||||
)
|
||||
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
print("Exception:", type(e))
|
||||
print(f"{e}, retry...", flush=True)
|
||||
await asyncio.sleep(1)
|
||||
|
||||
|
||||
lst = [
|
||||
ProcessingMetadata(
|
||||
id = row[0],
|
||||
document_id = row[1],
|
||||
time = int(time.mktime(row[2].timetuple())),
|
||||
flow = row[3],
|
||||
user = user,
|
||||
collection = row[4],
|
||||
tags = row[5] if row[5] else [],
|
||||
)
|
||||
for row in resp
|
||||
]
|
||||
|
||||
print("Done")
|
||||
|
||||
return lst
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue