Feature / collections (#96)

* Update schema defs for source -> metadata
* Migrate to use metadata part of schema, also add metadata to triples & vecs
* Add user/collection metadata to query
* Use user/collection in RAG
* Write and query working on triples
This commit is contained in:
cybermaggedon 2024-10-02 18:14:29 +01:00 committed by GitHub
parent 709221fa10
commit b0f4c58200
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
31 changed files with 459 additions and 251 deletions

View file

@ -4,10 +4,16 @@ from cassandra.auth import PlainTextAuthProvider
class TrustGraph:
def __init__(self, hosts=None):
def __init__(
self, hosts=None,
keyspace="trustgraph", table="default",
):
if hosts is None:
hosts = ["localhost"]
self.keyspace = keyspace
self.table = table
self.cluster = Cluster(hosts)
self.session = self.cluster.connect()
@ -16,26 +22,26 @@ class TrustGraph:
def clear(self):
self.session.execute("""
drop keyspace if exists trustgraph;
self.session.execute(f"""
drop keyspace if exists {self.keyspace};
""");
self.init()
def init(self):
self.session.execute("""
create keyspace if not exists trustgraph
with replication = {
self.session.execute(f"""
create keyspace if not exists {self.keyspace}
with replication = {{
'class' : 'SimpleStrategy',
'replication_factor' : 1
};
}};
""");
self.session.set_keyspace('trustgraph')
self.session.set_keyspace(self.keyspace)
self.session.execute("""
create table if not exists triples (
self.session.execute(f"""
create table if not exists {self.table} (
s text,
p text,
o text,
@ -43,66 +49,66 @@ class TrustGraph:
);
""");
self.session.execute("""
create index if not exists triples_p
ON triples (p);
self.session.execute(f"""
create index if not exists {self.table}_p
ON {self.table} (p);
""");
self.session.execute("""
create index if not exists triples_o
ON triples (o);
self.session.execute(f"""
create index if not exists {self.table}_o
ON {self.table} (o);
""");
def insert(self, s, p, o):
self.session.execute(
"insert into triples (s, p, o) values (%s, %s, %s)",
f"insert into {self.table} (s, p, o) values (%s, %s, %s)",
(s, p, o)
)
def get_all(self, limit=50):
return self.session.execute(
f"select s, p, o from triples limit {limit}"
f"select s, p, o from {self.table} limit {limit}"
)
def get_s(self, s, limit=10):
return self.session.execute(
f"select p, o from triples where s = %s limit {limit}",
f"select p, o from {self.table} where s = %s limit {limit}",
(s,)
)
def get_p(self, p, limit=10):
return self.session.execute(
f"select s, o from triples where p = %s limit {limit}",
f"select s, o from {self.table} where p = %s limit {limit}",
(p,)
)
def get_o(self, o, limit=10):
return self.session.execute(
f"select s, p from triples where o = %s limit {limit}",
f"select s, p from {self.table} where o = %s limit {limit}",
(o,)
)
def get_sp(self, s, p, limit=10):
return self.session.execute(
f"select o from triples where s = %s and p = %s limit {limit}",
f"select o from {self.table} where s = %s and p = %s limit {limit}",
(s, p)
)
def get_po(self, p, o, limit=10):
return self.session.execute(
f"select s from triples where p = %s and o = %s allow filtering limit {limit}",
f"select s from {self.table} where p = %s and o = %s allow filtering limit {limit}",
(p, o)
)
def get_os(self, o, s, limit=10):
return self.session.execute(
f"select p from triples where o = %s and s = %s limit {limit}",
f"select p from {self.table} where o = %s and s = %s limit {limit}",
(o, s)
)
def get_spo(self, s, p, o, limit=10):
return self.session.execute(
f"""select s as x from triples where s = %s and p = %s and o = %s limit {limit}""",
f"""select s as x from {self.table} where s = %s and p = %s and o = %s limit {limit}""",
(s, p, o)
)