Feature: document metadata (#123)

* Rework metadata structure in processing messages to be a subgraph
* Add subgraph creation for tg-load-pdf and tg-load-text based on command-line passing of doc attributes
* Document metadata is added to knowledge graph with subjectOf linkage to extracted entities
This commit is contained in:
cybermaggedon 2024-10-23 18:04:04 +01:00 committed by GitHub
parent b8818e28d0
commit 7954e863cc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 625 additions and 98 deletions

View file

@ -0,0 +1,40 @@
from . defs import *
from .. schema import Triple, Value
class Organization:
def __init__(self, id, name=None, description=None):
self.id = id
self.name = name
self.description = description
def emit(self, emit):
emit(Triple(
s=Value(value=self.id, is_uri=True),
p=Value(value=IS_A, is_uri=True),
o=Value(value=ORGANIZATION, is_uri=True)
))
if self.name:
emit(Triple(
s=Value(value=self.id, is_uri=True),
p=Value(value=LABEL, is_uri=True),
o=Value(value=self.name, is_uri=False)
))
emit(Triple(
s=Value(value=self.id, is_uri=True),
p=Value(value=NAME, is_uri=True),
o=Value(value=self.name, is_uri=False)
))
if self.description:
emit(Triple(
s=Value(value=self.id, is_uri=True),
p=Value(value=DESCRIPTION, is_uri=True),
o=Value(value=self.description, is_uri=False)
))