Feature: document metadata (#123)

* Rework metadata structure in processing messages to be a subgraph
* Add subgraph creation for tg-load-pdf and tg-load-text based on command-line passing of doc attributes
* Document metadata is added to knowledge graph with subjectOf linkage to extracted entities
This commit is contained in:
cybermaggedon 2024-10-23 18:04:04 +01:00 committed by GitHub
parent b8818e28d0
commit 7954e863cc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 625 additions and 98 deletions

View file

@ -0,0 +1,25 @@
IS_A = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'
LABEL = 'http://www.w3.org/2000/01/rdf-schema#label'
DIGITAL_DOCUMENT = 'https://schema.org/DigitalDocument'
PUBLICATION_EVENT = 'https://schema.org/PublicationEvent'
ORGANIZATION = 'https://schema.org/Organization'
NAME = 'https://schema.org/name'
DESCRIPTION = 'https://schema.org/description'
COPYRIGHT_NOTICE = 'https://schema.org/copyrightNotice'
COPYRIGHT_HOLDER = 'https://schema.org/copyrightHolder'
COPYRIGHT_YEAR = 'https://schema.org/copyrightYear'
LICENSE = 'https://schema.org/license'
PUBLICATION = 'https://schema.org/publication'
START_DATE = 'https://schema.org/startDate'
END_DATE = 'https://schema.org/endDate'
PUBLISHED_BY = 'https://schema.org/publishedBy'
DATE_PUBLISHED = 'https://schema.org/datePublished'
PUBLICATION = 'https://schema.org/publication'
DATE_PUBLISHED = 'https://schema.org/datePublished'
URL = 'https://schema.org/url'
IDENTIFIER = 'https://schema.org/identifier'
KEYWORD = 'https://schema.org/keywords'