mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-05-01 19:32:38 +02:00
Collection management (#520)
* Tech spec * Refactored Cassanda knowledge graph for single table * Collection management, librarian services to manage metadata and collection deletion
This commit is contained in:
parent
48016d8fb2
commit
13ff7d765d
48 changed files with 2941 additions and 425 deletions
|
|
@ -6,18 +6,18 @@ from ssl import SSLContext, PROTOCOL_TLSv1_2
|
|||
# Global list to track clusters for cleanup
|
||||
_active_clusters = []
|
||||
|
||||
class TrustGraph:
|
||||
class KnowledgeGraph:
|
||||
|
||||
def __init__(
|
||||
self, hosts=None,
|
||||
keyspace="trustgraph", table="default", username=None, password=None
|
||||
keyspace="trustgraph", username=None, password=None
|
||||
):
|
||||
|
||||
if hosts is None:
|
||||
hosts = ["localhost"]
|
||||
|
||||
self.keyspace = keyspace
|
||||
self.table = table
|
||||
self.table = "triples" # Fixed table name for unified schema
|
||||
self.username = username
|
||||
|
||||
if username and password:
|
||||
|
|
@ -55,13 +55,19 @@ class TrustGraph:
|
|||
|
||||
self.session.execute(f"""
|
||||
create table if not exists {self.table} (
|
||||
collection text,
|
||||
s text,
|
||||
p text,
|
||||
o text,
|
||||
PRIMARY KEY (s, p, o)
|
||||
PRIMARY KEY (collection, s, p, o)
|
||||
);
|
||||
""");
|
||||
|
||||
self.session.execute(f"""
|
||||
create index if not exists {self.table}_s
|
||||
ON {self.table} (s);
|
||||
""");
|
||||
|
||||
self.session.execute(f"""
|
||||
create index if not exists {self.table}_p
|
||||
ON {self.table} (p);
|
||||
|
|
@ -72,58 +78,66 @@ class TrustGraph:
|
|||
ON {self.table} (o);
|
||||
""");
|
||||
|
||||
def insert(self, s, p, o):
|
||||
|
||||
def insert(self, collection, s, p, o):
|
||||
|
||||
self.session.execute(
|
||||
f"insert into {self.table} (s, p, o) values (%s, %s, %s)",
|
||||
(s, p, o)
|
||||
f"insert into {self.table} (collection, s, p, o) values (%s, %s, %s, %s)",
|
||||
(collection, s, p, o)
|
||||
)
|
||||
|
||||
def get_all(self, limit=50):
|
||||
def get_all(self, collection, limit=50):
|
||||
return self.session.execute(
|
||||
f"select s, p, o from {self.table} limit {limit}"
|
||||
f"select s, p, o from {self.table} where collection = %s limit {limit}",
|
||||
(collection,)
|
||||
)
|
||||
|
||||
def get_s(self, s, limit=10):
|
||||
def get_s(self, collection, s, limit=10):
|
||||
return self.session.execute(
|
||||
f"select p, o from {self.table} where s = %s limit {limit}",
|
||||
(s,)
|
||||
f"select p, o from {self.table} where collection = %s and s = %s limit {limit}",
|
||||
(collection, s)
|
||||
)
|
||||
|
||||
def get_p(self, p, limit=10):
|
||||
def get_p(self, collection, p, limit=10):
|
||||
return self.session.execute(
|
||||
f"select s, o from {self.table} where p = %s limit {limit}",
|
||||
(p,)
|
||||
f"select s, o from {self.table} where collection = %s and p = %s limit {limit}",
|
||||
(collection, p)
|
||||
)
|
||||
|
||||
def get_o(self, o, limit=10):
|
||||
def get_o(self, collection, o, limit=10):
|
||||
return self.session.execute(
|
||||
f"select s, p from {self.table} where o = %s limit {limit}",
|
||||
(o,)
|
||||
f"select s, p from {self.table} where collection = %s and o = %s limit {limit}",
|
||||
(collection, o)
|
||||
)
|
||||
|
||||
def get_sp(self, s, p, limit=10):
|
||||
def get_sp(self, collection, s, p, limit=10):
|
||||
return self.session.execute(
|
||||
f"select o from {self.table} where s = %s and p = %s limit {limit}",
|
||||
(s, p)
|
||||
f"select o from {self.table} where collection = %s and s = %s and p = %s limit {limit}",
|
||||
(collection, s, p)
|
||||
)
|
||||
|
||||
def get_po(self, p, o, limit=10):
|
||||
def get_po(self, collection, p, o, limit=10):
|
||||
return self.session.execute(
|
||||
f"select s from {self.table} where p = %s and o = %s limit {limit} allow filtering",
|
||||
(p, o)
|
||||
f"select s from {self.table} where collection = %s and p = %s and o = %s limit {limit} allow filtering",
|
||||
(collection, p, o)
|
||||
)
|
||||
|
||||
def get_os(self, o, s, limit=10):
|
||||
def get_os(self, collection, o, s, limit=10):
|
||||
return self.session.execute(
|
||||
f"select p from {self.table} where o = %s and s = %s limit {limit}",
|
||||
(o, s)
|
||||
f"select p from {self.table} where collection = %s and o = %s and s = %s limit {limit} allow filtering",
|
||||
(collection, o, s)
|
||||
)
|
||||
|
||||
def get_spo(self, s, p, o, limit=10):
|
||||
def get_spo(self, collection, s, p, o, limit=10):
|
||||
return self.session.execute(
|
||||
f"""select s as x from {self.table} where s = %s and p = %s and o = %s limit {limit}""",
|
||||
(s, p, o)
|
||||
f"""select s as x from {self.table} where collection = %s and s = %s and p = %s and o = %s limit {limit}""",
|
||||
(collection, s, p, o)
|
||||
)
|
||||
|
||||
def delete_collection(self, collection):
|
||||
"""Delete all triples for a specific collection"""
|
||||
self.session.execute(
|
||||
f"delete from {self.table} where collection = %s",
|
||||
(collection,)
|
||||
)
|
||||
|
||||
def close(self):
|
||||
|
|
@ -6,7 +6,7 @@ import re
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def make_safe_collection_name(user, collection, dimension, prefix):
|
||||
def make_safe_collection_name(user, collection, prefix):
|
||||
"""
|
||||
Create a safe Milvus collection name from user/collection parameters.
|
||||
Milvus only allows letters, numbers, and underscores.
|
||||
|
|
@ -26,7 +26,7 @@ def make_safe_collection_name(user, collection, dimension, prefix):
|
|||
safe_user = sanitize(user)
|
||||
safe_collection = sanitize(collection)
|
||||
|
||||
return f"{prefix}_{safe_user}_{safe_collection}_{dimension}"
|
||||
return f"{prefix}_{safe_user}_{safe_collection}"
|
||||
|
||||
class DocVectors:
|
||||
|
||||
|
|
@ -51,7 +51,7 @@ class DocVectors:
|
|||
|
||||
def init_collection(self, dimension, user, collection):
|
||||
|
||||
collection_name = make_safe_collection_name(user, collection, dimension, self.prefix)
|
||||
collection_name = make_safe_collection_name(user, collection, self.prefix)
|
||||
|
||||
pkey_field = FieldSchema(
|
||||
name="id",
|
||||
|
|
@ -162,3 +162,20 @@ class DocVectors:
|
|||
|
||||
return res
|
||||
|
||||
def delete_collection(self, user, collection):
|
||||
"""Delete a collection for the given user and collection"""
|
||||
collection_name = make_safe_collection_name(user, collection, self.prefix)
|
||||
|
||||
# Check if collection exists
|
||||
if self.client.has_collection(collection_name):
|
||||
# Drop the collection
|
||||
self.client.drop_collection(collection_name)
|
||||
logger.info(f"Deleted Milvus collection: {collection_name}")
|
||||
|
||||
# Remove from our local cache
|
||||
keys_to_remove = [key for key in self.collections.keys() if key[1] == user and key[2] == collection]
|
||||
for key in keys_to_remove:
|
||||
del self.collections[key]
|
||||
else:
|
||||
logger.info(f"Collection {collection_name} does not exist, nothing to delete")
|
||||
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ import re
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def make_safe_collection_name(user, collection, dimension, prefix):
|
||||
def make_safe_collection_name(user, collection, prefix):
|
||||
"""
|
||||
Create a safe Milvus collection name from user/collection parameters.
|
||||
Milvus only allows letters, numbers, and underscores.
|
||||
|
|
@ -26,7 +26,7 @@ def make_safe_collection_name(user, collection, dimension, prefix):
|
|||
safe_user = sanitize(user)
|
||||
safe_collection = sanitize(collection)
|
||||
|
||||
return f"{prefix}_{safe_user}_{safe_collection}_{dimension}"
|
||||
return f"{prefix}_{safe_user}_{safe_collection}"
|
||||
|
||||
class EntityVectors:
|
||||
|
||||
|
|
@ -51,7 +51,7 @@ class EntityVectors:
|
|||
|
||||
def init_collection(self, dimension, user, collection):
|
||||
|
||||
collection_name = make_safe_collection_name(user, collection, dimension, self.prefix)
|
||||
collection_name = make_safe_collection_name(user, collection, self.prefix)
|
||||
|
||||
pkey_field = FieldSchema(
|
||||
name="id",
|
||||
|
|
@ -162,3 +162,20 @@ class EntityVectors:
|
|||
|
||||
return res
|
||||
|
||||
def delete_collection(self, user, collection):
|
||||
"""Delete a collection for the given user and collection"""
|
||||
collection_name = make_safe_collection_name(user, collection, self.prefix)
|
||||
|
||||
# Check if collection exists
|
||||
if self.client.has_collection(collection_name):
|
||||
# Drop the collection
|
||||
self.client.drop_collection(collection_name)
|
||||
logger.info(f"Deleted Milvus collection: {collection_name}")
|
||||
|
||||
# Remove from our local cache
|
||||
keys_to_remove = [key for key in self.collections.keys() if key[1] == user and key[2] == collection]
|
||||
for key in keys_to_remove:
|
||||
del self.collections[key]
|
||||
else:
|
||||
logger.info(f"Collection {collection_name} does not exist, nothing to delete")
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue