mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-07-03 06:51:00 +02:00
Feature: document metadata (#123)
* Rework metadata structure in processing messages to be a subgraph * Add subgraph creation for tg-load-pdf and tg-load-text based on command-line passing of doc attributes * Document metadata is added to knowledge graph with subjectOf linkage to extracted entities
This commit is contained in:
parent
b8818e28d0
commit
7954e863cc
21 changed files with 625 additions and 98 deletions
23
trustgraph-base/trustgraph/knowledge/identifier.py
Normal file
23
trustgraph-base/trustgraph/knowledge/identifier.py
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
|
||||
import uuid
|
||||
import hashlib
|
||||
|
||||
def hash(data):
|
||||
|
||||
if isinstance(data, str):
|
||||
data = data.encode("utf-8")
|
||||
|
||||
# Create a SHA256 hash from the data
|
||||
id = hashlib.sha256(data).hexdigest()
|
||||
|
||||
# Convert into a UUID, 64-byte hash becomes 32-byte UUID
|
||||
id = str(uuid.UUID(id[::2]))
|
||||
|
||||
return id
|
||||
|
||||
def to_uri(pref, id):
|
||||
return f"https://trustgraph.ai/{pref}/{id}"
|
||||
|
||||
PREF_PUBEV = "pubev"
|
||||
PREF_ORG = "org"
|
||||
PREF_DOC = "doc"
|
||||
Loading…
Add table
Add a link
Reference in a new issue