mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-26 17:06:22 +02:00
Feature/pkgsplit (#83)
* Starting to spawn base package * More package hacking * Bedrock and VertexAI * Parquet split * Updated templates * Utils
This commit is contained in:
parent
3fb75c617b
commit
9b91d5eee3
262 changed files with 630 additions and 420 deletions
6
trustgraph-base/trustgraph/base/__init__.py
Normal file
6
trustgraph-base/trustgraph/base/__init__.py
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
|
||||
from . base_processor import BaseProcessor
|
||||
from . consumer import Consumer
|
||||
from . producer import Producer
|
||||
from . consumer_producer import ConsumerProducer
|
||||
|
||||
119
trustgraph-base/trustgraph/base/base_processor.py
Normal file
119
trustgraph-base/trustgraph/base/base_processor.py
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
|
||||
import os
|
||||
import argparse
|
||||
import pulsar
|
||||
import _pulsar
|
||||
import time
|
||||
from prometheus_client import start_http_server, Info
|
||||
|
||||
from .. log_level import LogLevel
|
||||
|
||||
class BaseProcessor:
|
||||
|
||||
default_pulsar_host = os.getenv("PULSAR_HOST", 'pulsar://pulsar:6650')
|
||||
|
||||
def __init__(self, **params):
|
||||
|
||||
self.client = None
|
||||
|
||||
if not hasattr(__class__, "params_metric"):
|
||||
__class__.params_metric = Info(
|
||||
'params', 'Parameters configuration'
|
||||
)
|
||||
|
||||
# FIXME: Maybe outputs information it should not
|
||||
__class__.params_metric.info({
|
||||
k: str(params[k])
|
||||
for k in params
|
||||
})
|
||||
|
||||
pulsar_host = params.get("pulsar_host", self.default_pulsar_host)
|
||||
log_level = params.get("log_level", LogLevel.INFO)
|
||||
|
||||
self.pulsar_host = pulsar_host
|
||||
|
||||
self.client = pulsar.Client(
|
||||
pulsar_host,
|
||||
logger=pulsar.ConsoleLogger(log_level.to_pulsar())
|
||||
)
|
||||
|
||||
def __del__(self):
|
||||
|
||||
if self.client:
|
||||
self.client.close()
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
||||
parser.add_argument(
|
||||
'-p', '--pulsar-host',
|
||||
default=__class__.default_pulsar_host,
|
||||
help=f'Pulsar host (default: {__class__.default_pulsar_host})',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-l', '--log-level',
|
||||
type=LogLevel,
|
||||
default=LogLevel.INFO,
|
||||
choices=list(LogLevel),
|
||||
help=f'Output queue (default: info)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--metrics',
|
||||
action=argparse.BooleanOptionalAction,
|
||||
default=True,
|
||||
help=f'Metrics enabled (default: true)',
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-P', '--metrics-port',
|
||||
type=int,
|
||||
default=8000,
|
||||
help=f'Pulsar host (default: 8000)',
|
||||
)
|
||||
|
||||
def run(self):
|
||||
raise RuntimeError("Something should have implemented the run method")
|
||||
|
||||
@classmethod
|
||||
def start(cls, prog, doc):
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog=prog,
|
||||
description=doc
|
||||
)
|
||||
|
||||
cls.add_args(parser)
|
||||
|
||||
args = parser.parse_args()
|
||||
args = vars(args)
|
||||
|
||||
print(args)
|
||||
|
||||
if args["metrics"]:
|
||||
start_http_server(args["metrics_port"])
|
||||
|
||||
while True:
|
||||
|
||||
try:
|
||||
|
||||
p = cls(**args)
|
||||
p.run()
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("Keyboard interrupt.")
|
||||
return
|
||||
|
||||
except _pulsar.Interrupted:
|
||||
print("Pulsar Interrupted.")
|
||||
return
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print(type(e))
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
print("Will retry...", flush=True)
|
||||
|
||||
time.sleep(4)
|
||||
107
trustgraph-base/trustgraph/base/consumer.py
Normal file
107
trustgraph-base/trustgraph/base/consumer.py
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
|
||||
from pulsar.schema import JsonSchema
|
||||
from prometheus_client import Histogram, Info, Counter, Enum
|
||||
import time
|
||||
|
||||
from . base_processor import BaseProcessor
|
||||
from .. exceptions import TooManyRequests
|
||||
|
||||
class Consumer(BaseProcessor):
|
||||
|
||||
def __init__(self, **params):
|
||||
|
||||
if not hasattr(__class__, "state_metric"):
|
||||
__class__.state_metric = Enum(
|
||||
'processor_state', 'Processor state',
|
||||
states=['starting', 'running', 'stopped']
|
||||
)
|
||||
__class__.state_metric.state('starting')
|
||||
|
||||
__class__.state_metric.state('starting')
|
||||
|
||||
super(Consumer, self).__init__(**params)
|
||||
|
||||
input_queue = params.get("input_queue")
|
||||
subscriber = params.get("subscriber")
|
||||
input_schema = params.get("input_schema")
|
||||
|
||||
if input_schema == None:
|
||||
raise RuntimeError("input_schema must be specified")
|
||||
|
||||
if not hasattr(__class__, "request_metric"):
|
||||
__class__.request_metric = Histogram(
|
||||
'request_latency', 'Request latency (seconds)'
|
||||
)
|
||||
|
||||
if not hasattr(__class__, "pubsub_metric"):
|
||||
__class__.pubsub_metric = Info(
|
||||
'pubsub', 'Pub/sub configuration'
|
||||
)
|
||||
|
||||
if not hasattr(__class__, "processing_metric"):
|
||||
__class__.processing_metric = Counter(
|
||||
'processing_count', 'Processing count', ["status"]
|
||||
)
|
||||
|
||||
__class__.pubsub_metric.info({
|
||||
"input_queue": input_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": input_schema.__name__,
|
||||
})
|
||||
|
||||
self.consumer = self.client.subscribe(
|
||||
input_queue, subscriber,
|
||||
schema=JsonSchema(input_schema),
|
||||
)
|
||||
|
||||
def run(self):
|
||||
|
||||
__class__.state_metric.state('running')
|
||||
|
||||
while True:
|
||||
|
||||
msg = self.consumer.receive()
|
||||
|
||||
try:
|
||||
|
||||
with __class__.request_metric.time():
|
||||
self.handle(msg)
|
||||
|
||||
# Acknowledge successful processing of the message
|
||||
self.consumer.acknowledge(msg)
|
||||
|
||||
__class__.processing_metric.labels(status="success").inc()
|
||||
|
||||
except TooManyRequests:
|
||||
self.consumer.negative_acknowledge(msg)
|
||||
print("TooManyRequests: will retry")
|
||||
__class__.processing_metric.labels(status="rate-limit").inc()
|
||||
time.sleep(5)
|
||||
continue
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
|
||||
# Message failed to be processed
|
||||
self.consumer.negative_acknowledge(msg)
|
||||
|
||||
__class__.processing_metric.labels(status="error").inc()
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser, default_input_queue, default_subscriber):
|
||||
|
||||
BaseProcessor.add_args(parser)
|
||||
|
||||
parser.add_argument(
|
||||
'-i', '--input-queue',
|
||||
default=default_input_queue,
|
||||
help=f'Input queue (default: {default_input_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-s', '--subscriber',
|
||||
default=default_subscriber,
|
||||
help=f'Queue subscriber name (default: {default_subscriber})'
|
||||
)
|
||||
|
||||
139
trustgraph-base/trustgraph/base/consumer_producer.py
Normal file
139
trustgraph-base/trustgraph/base/consumer_producer.py
Normal file
|
|
@ -0,0 +1,139 @@
|
|||
|
||||
from pulsar.schema import JsonSchema
|
||||
from prometheus_client import Histogram, Info, Counter, Enum
|
||||
import time
|
||||
|
||||
from . base_processor import BaseProcessor
|
||||
from .. exceptions import TooManyRequests
|
||||
|
||||
# FIXME: Derive from consumer? And producer?
|
||||
|
||||
class ConsumerProducer(BaseProcessor):
|
||||
|
||||
def __init__(self, **params):
|
||||
|
||||
if not hasattr(__class__, "state_metric"):
|
||||
__class__.state_metric = Enum(
|
||||
'processor_state', 'Processor state',
|
||||
states=['starting', 'running', 'stopped']
|
||||
)
|
||||
__class__.state_metric.state('starting')
|
||||
|
||||
__class__.state_metric.state('starting')
|
||||
|
||||
input_queue = params.get("input_queue")
|
||||
output_queue = params.get("output_queue")
|
||||
subscriber = params.get("subscriber")
|
||||
input_schema = params.get("input_schema")
|
||||
output_schema = params.get("output_schema")
|
||||
|
||||
if not hasattr(__class__, "request_metric"):
|
||||
__class__.request_metric = Histogram(
|
||||
'request_latency', 'Request latency (seconds)'
|
||||
)
|
||||
|
||||
if not hasattr(__class__, "output_metric"):
|
||||
__class__.output_metric = Counter(
|
||||
'output_count', 'Output items created'
|
||||
)
|
||||
|
||||
if not hasattr(__class__, "pubsub_metric"):
|
||||
__class__.pubsub_metric = Info(
|
||||
'pubsub', 'Pub/sub configuration'
|
||||
)
|
||||
|
||||
if not hasattr(__class__, "processing_metric"):
|
||||
__class__.processing_metric = Counter(
|
||||
'processing_count', 'Processing count', ["status"]
|
||||
)
|
||||
|
||||
__class__.pubsub_metric.info({
|
||||
"input_queue": input_queue,
|
||||
"output_queue": output_queue,
|
||||
"subscriber": subscriber,
|
||||
"input_schema": input_schema.__name__,
|
||||
"output_schema": output_schema.__name__,
|
||||
})
|
||||
|
||||
super(ConsumerProducer, self).__init__(**params)
|
||||
|
||||
if input_schema == None:
|
||||
raise RuntimeError("input_schema must be specified")
|
||||
|
||||
if output_schema == None:
|
||||
raise RuntimeError("output_schema must be specified")
|
||||
|
||||
self.producer = self.client.create_producer(
|
||||
topic=output_queue,
|
||||
schema=JsonSchema(output_schema),
|
||||
)
|
||||
|
||||
self.consumer = self.client.subscribe(
|
||||
input_queue, subscriber,
|
||||
schema=JsonSchema(input_schema),
|
||||
)
|
||||
|
||||
def run(self):
|
||||
|
||||
__class__.state_metric.state('running')
|
||||
|
||||
while True:
|
||||
|
||||
msg = self.consumer.receive()
|
||||
|
||||
try:
|
||||
|
||||
with __class__.request_metric.time():
|
||||
resp = self.handle(msg)
|
||||
|
||||
# Acknowledge successful processing of the message
|
||||
self.consumer.acknowledge(msg)
|
||||
|
||||
__class__.processing_metric.labels(status="success").inc()
|
||||
|
||||
except TooManyRequests:
|
||||
self.consumer.negative_acknowledge(msg)
|
||||
print("TooManyRequests: will retry")
|
||||
__class__.processing_metric.labels(status="rate-limit").inc()
|
||||
time.sleep(5)
|
||||
continue
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
|
||||
# Message failed to be processed
|
||||
self.consumer.negative_acknowledge(msg)
|
||||
|
||||
__class__.processing_metric.labels(status="error").inc()
|
||||
|
||||
def send(self, msg, properties={}):
|
||||
self.producer.send(msg, properties)
|
||||
__class__.output_metric.inc()
|
||||
|
||||
@staticmethod
|
||||
def add_args(
|
||||
parser, default_input_queue, default_subscriber,
|
||||
default_output_queue,
|
||||
):
|
||||
|
||||
BaseProcessor.add_args(parser)
|
||||
|
||||
parser.add_argument(
|
||||
'-i', '--input-queue',
|
||||
default=default_input_queue,
|
||||
help=f'Input queue (default: {default_input_queue})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-s', '--subscriber',
|
||||
default=default_subscriber,
|
||||
help=f'Queue subscriber name (default: {default_subscriber})'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'-o', '--output-queue',
|
||||
default=default_output_queue,
|
||||
help=f'Output queue (default: {default_output_queue})'
|
||||
)
|
||||
|
||||
55
trustgraph-base/trustgraph/base/producer.py
Normal file
55
trustgraph-base/trustgraph/base/producer.py
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
|
||||
from pulsar.schema import JsonSchema
|
||||
from prometheus_client import Info, Counter
|
||||
|
||||
from . base_processor import BaseProcessor
|
||||
|
||||
class Producer(BaseProcessor):
|
||||
|
||||
def __init__(self, **params):
|
||||
|
||||
output_queue = params.get("output_queue")
|
||||
output_schema = params.get("output_schema")
|
||||
|
||||
if not hasattr(__class__, "output_metric"):
|
||||
__class__.output_metric = Counter(
|
||||
'output_count', 'Output items created'
|
||||
)
|
||||
|
||||
if not hasattr(__class__, "pubsub_metric"):
|
||||
__class__.pubsub_metric = Info(
|
||||
'pubsub', 'Pub/sub configuration'
|
||||
)
|
||||
|
||||
__class__.pubsub_metric.info({
|
||||
"output_queue": output_queue,
|
||||
"output_schema": output_schema.__name__,
|
||||
})
|
||||
|
||||
super(Producer, self).__init__(**params)
|
||||
|
||||
if output_schema == None:
|
||||
raise RuntimeError("output_schema must be specified")
|
||||
|
||||
self.producer = self.client.create_producer(
|
||||
topic=output_queue,
|
||||
schema=JsonSchema(output_schema),
|
||||
)
|
||||
|
||||
def send(self, msg, properties={}):
|
||||
self.producer.send(msg, properties)
|
||||
__class__.output_metric.inc()
|
||||
|
||||
@staticmethod
|
||||
def add_args(
|
||||
parser, default_input_queue, default_subscriber,
|
||||
default_output_queue,
|
||||
):
|
||||
|
||||
BaseProcessor.add_args(parser)
|
||||
|
||||
parser.add_argument(
|
||||
'-o', '--output-queue',
|
||||
default=default_output_queue,
|
||||
help=f'Output queue (default: {default_output_queue})'
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue