trustgraph/tests/test-load-pdf

37 lines
720 B
Text
Raw Normal View History

#!/usr/bin/env python3
import pulsar
from pulsar.schema import JsonSchema
import base64
from trustgraph.schema import Document, Metadata
client = pulsar.Client("pulsar://localhost:6650", listener_name="localhost")
prod = client.create_producer(
topic="persistent://tg/flow/document-load:0000",
schema=JsonSchema(Document),
chunking_enabled=True,
)
path = "../sources/Challenger-Report-Vol1.pdf"
with open(path, "rb") as f:
blob = base64.b64encode(f.read()).decode("utf-8")
message = Document(
metadata = Metadata(
id = "00001",
metadata = [],
user="trustgraph",
collection="default",
),
data=blob
)
prod.send(message)
prod.close()
client.close()