Feature: document metadata (#123)

* Rework metadata structure in processing messages to be a subgraph
* Add subgraph creation for tg-load-pdf and tg-load-text based on command-line passing of doc attributes
* Document metadata is added to knowledge graph with subjectOf linkage to extracted entities
This commit is contained in:
cybermaggedon 2024-10-23 18:04:04 +01:00 committed by GitHub
parent b8818e28d0
commit 7954e863cc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 625 additions and 98 deletions

View file

@ -10,7 +10,7 @@ import argparse
import time
from .... direct.cassandra import TrustGraph
from .... schema import Triple
from .... schema import Triple, Triples
from .... schema import triples_store_queue
from .... log_level import LogLevel
from .... base import Consumer
@ -33,7 +33,7 @@ class Processor(Consumer):
**params | {
"input_queue": input_queue,
"subscriber": subscriber,
"input_schema": Triple,
"input_schema": Triples,
"graph_host": graph_host,
}
)
@ -62,12 +62,13 @@ class Processor(Consumer):
raise e
self.table = table
self.tg.insert(
v.s.value,
v.p.value,
v.o.value
)
for t in v.triples:
self.tg.insert(
t.s.value,
t.p.value,
t.o.value
)
@staticmethod
def add_args(parser):