mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-27 17:36:23 +02:00
Feature / collections (#96)
* Update schema defs for source -> metadata * Migrate to use metadata part of schema, also add metadata to triples & vecs * Add user/collection metadata to query * Use user/collection in RAG * Write and query working on triples
This commit is contained in:
parent
709221fa10
commit
b0f4c58200
31 changed files with 459 additions and 251 deletions
|
|
@ -8,7 +8,7 @@ import tempfile
|
|||
import base64
|
||||
from langchain_community.document_loaders import PyPDFLoader
|
||||
|
||||
from ... schema import Document, TextDocument, Source
|
||||
from ... schema import Document, TextDocument, Metadata
|
||||
from ... schema import document_ingest_queue, text_ingest_queue
|
||||
from ... log_level import LogLevel
|
||||
from ... base import ConsumerProducer
|
||||
|
|
@ -45,7 +45,7 @@ class Processor(ConsumerProducer):
|
|||
|
||||
v = msg.value()
|
||||
|
||||
print(f"Decoding {v.source.id}...", flush=True)
|
||||
print(f"Decoding {v.metadata.id}...", flush=True)
|
||||
|
||||
with tempfile.NamedTemporaryFile(delete_on_close=False) as fp:
|
||||
|
||||
|
|
@ -59,12 +59,14 @@ class Processor(ConsumerProducer):
|
|||
|
||||
for ix, page in enumerate(pages):
|
||||
|
||||
id = v.source.id + "-p" + str(ix)
|
||||
id = v.metadata.id + "-p" + str(ix)
|
||||
r = TextDocument(
|
||||
source=Source(
|
||||
source=v.source.source,
|
||||
title=v.source.title,
|
||||
metadata=Metadata(
|
||||
source=v.metadata.source,
|
||||
title=v.metadata.title,
|
||||
id=id,
|
||||
user=v.metadata.user,
|
||||
collection=v.metadata.collection,
|
||||
),
|
||||
text=page.page_content.encode("utf-8"),
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue