Feature / collections (#96)

* Update schema defs for source -> metadata
* Migrate to use metadata part of schema, also add metadata to triples & vecs
* Add user/collection metadata to query
* Use user/collection in RAG
* Write and query working on triples
This commit is contained in:
cybermaggedon 2024-10-02 18:14:29 +01:00 committed by GitHub
parent 709221fa10
commit b0f4c58200
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
31 changed files with 459 additions and 251 deletions

View file

@ -38,8 +38,12 @@ class GraphEmbeddingsClient(BaseClient):
output_schema=GraphEmbeddingsResponse,
)
def request(self, vectors, limit=10, timeout=300):
def request(
self, vectors, user="trustgraph", collection="default",
limit=10, timeout=300
):
return self.call(
user=user, collection=collection,
vectors=vectors, limit=limit, timeout=timeout
).entities

View file

@ -38,9 +38,12 @@ class GraphRagClient(BaseClient):
output_schema=GraphRagResponse,
)
def request(self, query, timeout=500):
def request(
self, query, user="trustgraph", collection="default",
timeout=500
):
return self.call(
query=query, timeout=timeout
user=user, collection=collection, query=query, timeout=timeout
).response

View file

@ -48,11 +48,18 @@ class TriplesQueryClient(BaseClient):
return Value(value=ent, is_uri=False)
def request(self, s, p, o, limit=10, timeout=60):
def request(
self,
s, p, o,
user="trustgraph", collection="default",
limit=10, timeout=60,
):
return self.call(
s=self.create_value(s),
p=self.create_value(p),
o=self.create_value(o),
user=user,
collection=collection,
limit=limit,
timeout=timeout,
).triples

View file

@ -7,6 +7,7 @@ from . object import *
from . topic import *
from . graph import *
from . retrieval import *
from . metadata import *

View file

@ -2,17 +2,13 @@
from pulsar.schema import Record, Bytes, String, Boolean, Integer, Array, Double
from . topic import topic
from . types import Error
class Source(Record):
source = String()
id = String()
title = String()
from . metadata import Metadata
############################################################################
# PDF docs etc.
class Document(Record):
source = Source()
metadata = Metadata()
data = Bytes()
document_ingest_queue = topic('document-load')
@ -22,7 +18,7 @@ document_ingest_queue = topic('document-load')
# Text documents / text from PDF
class TextDocument(Record):
source = Source()
metadata = Metadata()
text = Bytes()
text_ingest_queue = topic('text-document-load')
@ -32,7 +28,7 @@ text_ingest_queue = topic('text-document-load')
# Chunks of text
class Chunk(Record):
source = Source()
metadata = Metadata()
chunk = Bytes()
chunk_ingest_queue = topic('chunk-load')
@ -42,7 +38,7 @@ chunk_ingest_queue = topic('chunk-load')
# Chunk embeddings are an embeddings associated with a text chunk
class ChunkEmbeddings(Record):
source = Source()
metadata = Metadata()
vectors = Array(Array(Double()))
chunk = Bytes()

View file

@ -1,16 +1,16 @@
from pulsar.schema import Record, Bytes, String, Boolean, Integer, Array, Double
from . documents import Source
from . types import Error, Value
from . topic import topic
from . metadata import Metadata
############################################################################
# Graph embeddings are embeddings associated with a graph entity
class GraphEmbeddings(Record):
source = Source()
metadata = Metadata()
vectors = Array(Array(Double()))
entity = Value()
@ -23,6 +23,8 @@ graph_embeddings_store_queue = topic('graph-embeddings-store')
class GraphEmbeddingsRequest(Record):
vectors = Array(Array(Double()))
limit = Integer()
user = String()
collection = String()
class GraphEmbeddingsResponse(Record):
error = Error()
@ -40,7 +42,7 @@ graph_embeddings_response_queue = topic(
# Graph triples
class Triple(Record):
source = Source()
metadata = Metadata()
s = Value()
p = Value()
o = Value()
@ -56,6 +58,8 @@ class TriplesQueryRequest(Record):
p = Value()
o = Value()
limit = Integer()
user = String()
collection = String()
class TriplesQueryResponse(Record):
error = Error()

View file

@ -0,0 +1,10 @@
from pulsar.schema import Record, String
class Metadata(Record):
source = String()
id = String()
title = String()
user = String()
collection = String()

View file

@ -2,7 +2,7 @@
from pulsar.schema import Record, Bytes, String, Boolean, Integer, Array
from pulsar.schema import Double, Map
from . documents import Source
from . metadata import Metadata
from . types import Value, RowSchema
from . topic import topic
@ -12,7 +12,7 @@ from . topic import topic
# object
class ObjectEmbeddings(Record):
source = Source()
metadata = Metadata()
vectors = Array(Array(Double()))
name = String()
key_name = String()
@ -25,7 +25,7 @@ object_embeddings_store_queue = topic('object-embeddings-store')
# Stores rows of information
class Rows(Record):
source = Source()
metadata = Metadata()
row_schema = RowSchema()
rows = Array(Map(String()))

View file

@ -9,6 +9,8 @@ from . types import Error, Value
class GraphRagQuery(Record):
query = String()
user = String()
collection = String()
class GraphRagResponse(Record):
error = Error()
@ -27,6 +29,8 @@ graph_rag_response_queue = topic(
class DocumentRagQuery(Record):
query = String()
user = String()
collection = String()
class DocumentRagResponse(Record):
error = Error()