Feature / collections (#96)

* Update schema defs for source -> metadata
* Migrate to use metadata part of schema, also add metadata to triples & vecs
* Add user/collection metadata to query
* Use user/collection in RAG
* Write and query working on triples
This commit is contained in:
cybermaggedon 2024-10-02 18:14:29 +01:00 committed by GitHub
parent 709221fa10
commit b0f4c58200
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
31 changed files with 459 additions and 251 deletions

View file

@ -60,7 +60,10 @@ class Processor(ConsumerProducer):
for vec in v.vectors:
dim = len(vec)
collection = "doc_" + str(dim)
collection = (
"d_" + v.user + "_" + v.collection + "_" +
str(dim)
)
search_result = self.client.query_points(
collection_name=collection,

View file

@ -66,7 +66,10 @@ class Processor(ConsumerProducer):
for vec in v.vectors:
dim = len(vec)
collection = "triples_" + str(dim)
collection = (
"t_" + v.user + "_" + v.collection + "_" +
str(dim)
)
search_result = self.client.query_points(
collection_name=collection,

View file

@ -38,7 +38,8 @@ class Processor(ConsumerProducer):
}
)
self.tg = TrustGraph([graph_host])
self.graph_host = [graph_host]
self.table = None
def create_value(self, ent):
if ent.startswith("http://") or ent.startswith("https://"):
@ -52,6 +53,15 @@ class Processor(ConsumerProducer):
v = msg.value()
table = (v.user, v.collection)
if table != self.table:
self.tg = TrustGraph(
hosts=self.graph_host,
keyspace=v.user, table=v.collection,
)
self.table = table
# Sender-produced ID
id = msg.properties()["id"]