Feature / collections (#96)

* Update schema defs for source -> metadata
* Migrate to use metadata part of schema, also add metadata to triples & vecs
* Add user/collection metadata to query
* Use user/collection in RAG
* Write and query working on triples
This commit is contained in:
cybermaggedon 2024-10-02 18:14:29 +01:00 committed by GitHub
parent 709221fa10
commit b0f4c58200
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
31 changed files with 459 additions and 251 deletions

View file

@ -37,7 +37,6 @@ class Processor(Consumer):
)
self.last_collection = None
self.last_dim = None
self.client = QdrantClient(url=store_uri)
@ -52,9 +51,12 @@ class Processor(Consumer):
for vec in v.vectors:
dim = len(vec)
collection = "doc_" + str(dim)
collection = (
"d_" + v.metadata.user + "_" + v.metadata.collection + "_" +
str(dim)
)
if dim != self.last_dim:
if collection != self.last_collection:
if not self.client.collection_exists(collection):
@ -70,7 +72,6 @@ class Processor(Consumer):
raise e
self.last_collection = collection
self.last_dim = dim
self.client.upsert(
collection_name=collection,

View file

@ -37,7 +37,6 @@ class Processor(Consumer):
)
self.last_collection = None
self.last_dim = None
self.client = QdrantClient(url=store_uri)
@ -50,9 +49,12 @@ class Processor(Consumer):
for vec in v.vectors:
dim = len(vec)
collection = "triples_" + str(dim)
collection = (
"t_" + v.metadata.user + "_" + v.metadata.collection + "_" +
str(dim)
)
if dim != self.last_dim:
if collection != self.last_collection:
if not self.client.collection_exists(collection):
@ -68,7 +70,6 @@ class Processor(Consumer):
raise e
self.last_collection = collection
self.last_dim = dim
self.client.upsert(
collection_name=collection,

View file

@ -38,12 +38,31 @@ class Processor(Consumer):
}
)
self.tg = TrustGraph([graph_host])
self.graph_host = [graph_host]
self.table = None
def handle(self, msg):
v = msg.value()
table = (v.metadata.user, v.metadata.collection)
if self.table is None or self.table != table:
self.tg = None
try:
self.tg = TrustGraph(
hosts=self.graph_host,
keyspace=v.metadata.user, table=v.metadata.collection,
)
except Exception as e:
print("Exception", e, flush=True)
time.sleep(1)
raise e
self.table = table
self.tg.insert(
v.s.value,
v.p.value,