mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-26 17:06:22 +02:00
Feature / collections (#96)
* Update schema defs for source -> metadata * Migrate to use metadata part of schema, also add metadata to triples & vecs * Add user/collection metadata to query * Use user/collection in RAG * Write and query working on triples
This commit is contained in:
parent
709221fa10
commit
b0f4c58200
31 changed files with 459 additions and 251 deletions
|
|
@ -7,7 +7,7 @@ as text as separate output objects.
|
|||
from langchain_text_splitters import TokenTextSplitter
|
||||
from prometheus_client import Histogram
|
||||
|
||||
from ... schema import TextDocument, Chunk, Source
|
||||
from ... schema import TextDocument, Chunk, Metadata
|
||||
from ... schema import text_ingest_queue, chunk_ingest_queue
|
||||
from ... log_level import LogLevel
|
||||
from ... base import ConsumerProducer
|
||||
|
|
@ -54,7 +54,7 @@ class Processor(ConsumerProducer):
|
|||
def handle(self, msg):
|
||||
|
||||
v = msg.value()
|
||||
print(f"Chunking {v.source.id}...", flush=True)
|
||||
print(f"Chunking {v.metadata.id}...", flush=True)
|
||||
|
||||
texts = self.text_splitter.create_documents(
|
||||
[v.text.decode("utf-8")]
|
||||
|
|
@ -62,13 +62,15 @@ class Processor(ConsumerProducer):
|
|||
|
||||
for ix, chunk in enumerate(texts):
|
||||
|
||||
id = v.source.id + "-c" + str(ix)
|
||||
id = v.metadata.id + "-c" + str(ix)
|
||||
|
||||
r = Chunk(
|
||||
source=Source(
|
||||
source=v.source.source,
|
||||
metadata=Metadata(
|
||||
source=v.metadata.source,
|
||||
id=id,
|
||||
title=v.source.title
|
||||
title=v.metadata.title,
|
||||
user=v.metadata.user,
|
||||
collection=v.metadata.collection,
|
||||
),
|
||||
chunk=chunk.page_content.encode("utf-8"),
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue