Feature/refactor entity embeddings (#235)

* Make schema changes
* Core entity context flow in place
* extract-def outputs entity contexts
* Refactored qdrant write
* Refactoring of all vector stores in place
This commit is contained in:
cybermaggedon 2024-12-30 12:53:19 +00:00 committed by GitHub
parent 9942f63773
commit a458d57af2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 230 additions and 169 deletions

View file

@ -35,17 +35,6 @@ chunk_ingest_queue = topic('chunk-load')
############################################################################
# Chunk embeddings are an embeddings associated with a text chunk
class ChunkEmbeddings(Record):
metadata = Metadata()
vectors = Array(Array(Double()))
chunk = Bytes()
chunk_embeddings_ingest_queue = topic('chunk-embeddings-load')
############################################################################
# Doc embeddings query
class DocumentEmbeddingsRequest(Record):
@ -62,3 +51,4 @@ document_embeddings_request_queue = topic(
document_embeddings_response_queue = topic(
'doc-embeddings', kind='non-persistent', namespace='response',
)

View file

@ -7,12 +7,31 @@ from . metadata import Metadata
############################################################################
# Entity context are an entity associated with textual context
class EntityContext(Record):
entity = Value()
context = String()
# This is a 'batching' mechanism for the above data
class EntityContexts(Record):
metadata = Metadata()
entities = Array(EntityContext())
entity_contexts_ingest_queue = topic('entity-contexts-load')
############################################################################
# Graph embeddings are embeddings associated with a graph entity
class EntityEmbeddings(Record):
entity = Value()
vectors = Array(Array(Double()))
# This is a 'batching' mechanism for the above data
class GraphEmbeddings(Record):
metadata = Metadata()
vectors = Array(Array(Double()))
entity = Value()
entities = Array(EntityEmbeddings())
graph_embeddings_store_queue = topic('graph-embeddings-store')