mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-06-10 23:35:14 +02:00
feat: complete knowledge core storage — named graphs, provenance, source material (#973)
Implements all three changes from the knowledge-core-completeness tech spec: 1. Named graph field preserved through Cassandra storage (7-element tuple), enabling provenance triples to retain their graph URIs on round-trip. 2. Provenance triples already arrive on triples-input — no routing change needed; Change 1 was sufficient. 3. Source material (library documents) streamed alongside triples and embeddings during core download/upload. The knowledge manager fetches the document hierarchy from the librarian on download and recreates it on upload, preserving the full provenance chain across instances.
This commit is contained in:
parent
aa158e1ba3
commit
6df7471a55
14 changed files with 1347 additions and 15 deletions
|
|
@ -47,6 +47,31 @@ def write_ge(f, data):
|
|||
)
|
||||
f.write(msgpack.packb(msg, use_bin_type=True))
|
||||
|
||||
def write_library_metadata(f, data):
|
||||
msg = (
|
||||
"lm",
|
||||
{
|
||||
"i": data["id"],
|
||||
"k": data.get("kind", ""),
|
||||
"t": data.get("title", ""),
|
||||
"p": data.get("parent-id", ""),
|
||||
"d": data.get("document-type", ""),
|
||||
"c": data.get("comments", ""),
|
||||
"g": data.get("tags", []),
|
||||
}
|
||||
)
|
||||
f.write(msgpack.packb(msg, use_bin_type=True))
|
||||
|
||||
def write_library_blob(f, data):
|
||||
msg = (
|
||||
"lb",
|
||||
{
|
||||
"i": data["id"],
|
||||
"d": data.get("data", b""),
|
||||
}
|
||||
)
|
||||
f.write(msgpack.packb(msg, use_bin_type=True))
|
||||
|
||||
def fetch(url, workspace, id, output, token=None):
|
||||
|
||||
api = Api(url=url, token=token, workspace=workspace)
|
||||
|
|
@ -55,6 +80,8 @@ def fetch(url, workspace, id, output, token=None):
|
|||
try:
|
||||
ge = 0
|
||||
t = 0
|
||||
lm = 0
|
||||
lb = 0
|
||||
|
||||
with open(output, "wb") as f:
|
||||
|
||||
|
|
@ -68,7 +95,15 @@ def fetch(url, workspace, id, output, token=None):
|
|||
ge += 1
|
||||
write_ge(f, response["graph-embeddings"])
|
||||
|
||||
print(f"Got: {t} triple, {ge} GE messages.")
|
||||
if "library-metadata" in response:
|
||||
lm += 1
|
||||
write_library_metadata(f, response["library-metadata"])
|
||||
|
||||
if "library-blob" in response:
|
||||
lb += 1
|
||||
write_library_blob(f, response["library-blob"])
|
||||
|
||||
print(f"Got: {t} triple, {ge} GE, {lm} library metadata, {lb} library blob messages.")
|
||||
|
||||
finally:
|
||||
socket.close()
|
||||
|
|
|
|||
|
|
@ -40,6 +40,23 @@ def read_message(unpacked, id):
|
|||
},
|
||||
"triples": msg["t"],
|
||||
}
|
||||
elif unpacked[0] == "lm":
|
||||
msg = unpacked[1]
|
||||
return "lm", {
|
||||
"id": msg["i"],
|
||||
"kind": msg.get("k", ""),
|
||||
"title": msg.get("t", ""),
|
||||
"parent-id": msg.get("p", ""),
|
||||
"document-type": msg.get("d", ""),
|
||||
"comments": msg.get("c", ""),
|
||||
"tags": msg.get("g", []),
|
||||
}
|
||||
elif unpacked[0] == "lb":
|
||||
msg = unpacked[1]
|
||||
return "lb", {
|
||||
"id": msg["i"],
|
||||
"data": msg.get("d", b""),
|
||||
}
|
||||
else:
|
||||
raise RuntimeError("Unpacked unexpected messsage type", unpacked[0])
|
||||
|
||||
|
|
@ -51,6 +68,8 @@ def put(url, workspace, id, input, token=None):
|
|||
try:
|
||||
ge = 0
|
||||
t = 0
|
||||
lm = 0
|
||||
lb = 0
|
||||
|
||||
with open(input, "rb") as f:
|
||||
|
||||
|
|
@ -73,10 +92,18 @@ def put(url, workspace, id, input, token=None):
|
|||
t += 1
|
||||
socket.put_kg_core(id, triples=msg)
|
||||
|
||||
elif kind == "lm":
|
||||
lm += 1
|
||||
socket.put_kg_core(id, library_metadata=msg)
|
||||
|
||||
elif kind == "lb":
|
||||
lb += 1
|
||||
socket.put_kg_core(id, library_blob=msg)
|
||||
|
||||
else:
|
||||
raise RuntimeError("Unexpected message kind", kind)
|
||||
|
||||
print(f"Put: {t} triple, {ge} GE messages.")
|
||||
print(f"Put: {t} triple, {ge} GE, {lm} library metadata, {lb} library blob messages.")
|
||||
|
||||
finally:
|
||||
socket.close()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue