mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 00:16:23 +02:00
Implement logging strategy (#444)
* Logging strategy and convert all prints() to logging invocations
This commit is contained in:
parent
3e0651222b
commit
dd70aade11
117 changed files with 1216 additions and 667 deletions
169
docs/LOGGING_STRATEGY.md
Normal file
169
docs/LOGGING_STRATEGY.md
Normal file
|
|
@ -0,0 +1,169 @@
|
|||
# TrustGraph Logging Strategy
|
||||
|
||||
## Overview
|
||||
|
||||
TrustGraph uses Python's built-in `logging` module for all logging operations. This provides a standardized, flexible approach to logging across all components of the system.
|
||||
|
||||
## Default Configuration
|
||||
|
||||
### Logging Level
|
||||
- **Default Level**: `INFO`
|
||||
- **Debug Mode**: `DEBUG` (enabled via command-line argument)
|
||||
- **Production**: `WARNING` or `ERROR` as appropriate
|
||||
|
||||
### Output Destination
|
||||
All logs should be written to **standard output (stdout)** to ensure compatibility with containerized environments and log aggregation systems.
|
||||
|
||||
## Implementation Guidelines
|
||||
|
||||
### 1. Logger Initialization
|
||||
|
||||
Each module should create its own logger using the module's `__name__`:
|
||||
|
||||
```python
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
```
|
||||
|
||||
### 2. Centralized Configuration
|
||||
|
||||
The logging configuration should be centralized in `async_processor.py` (or a dedicated logging configuration module) since it's inherited by much of the codebase:
|
||||
|
||||
```python
|
||||
import logging
|
||||
import argparse
|
||||
|
||||
def setup_logging(log_level='INFO'):
|
||||
"""Configure logging for the entire application"""
|
||||
logging.basicConfig(
|
||||
level=getattr(logging, log_level.upper()),
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
handlers=[logging.StreamHandler()]
|
||||
)
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
'--log-level',
|
||||
default='INFO',
|
||||
choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
|
||||
help='Set the logging level (default: INFO)'
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
# In main execution
|
||||
if __name__ == '__main__':
|
||||
args = parse_args()
|
||||
setup_logging(args.log_level)
|
||||
```
|
||||
|
||||
### 3. Logging Best Practices
|
||||
|
||||
#### Log Levels Usage
|
||||
- **DEBUG**: Detailed information for diagnosing problems (variable values, function entry/exit)
|
||||
- **INFO**: General informational messages (service started, configuration loaded, processing milestones)
|
||||
- **WARNING**: Warning messages for potentially harmful situations (deprecated features, recoverable errors)
|
||||
- **ERROR**: Error messages for serious problems (failed operations, exceptions)
|
||||
- **CRITICAL**: Critical messages for system failures requiring immediate attention
|
||||
|
||||
#### Message Format
|
||||
```python
|
||||
# Good - includes context
|
||||
logger.info(f"Processing document: {doc_id}, size: {doc_size} bytes")
|
||||
logger.error(f"Failed to connect to database: {error}", exc_info=True)
|
||||
|
||||
# Avoid - lacks context
|
||||
logger.info("Processing document")
|
||||
logger.error("Connection failed")
|
||||
```
|
||||
|
||||
#### Performance Considerations
|
||||
```python
|
||||
# Use lazy formatting for expensive operations
|
||||
logger.debug("Expensive operation result: %s", expensive_function())
|
||||
|
||||
# Check log level for very expensive debug operations
|
||||
if logger.isEnabledFor(logging.DEBUG):
|
||||
debug_data = compute_expensive_debug_info()
|
||||
logger.debug(f"Debug data: {debug_data}")
|
||||
```
|
||||
|
||||
### 4. Structured Logging
|
||||
|
||||
For complex data, use structured logging:
|
||||
|
||||
```python
|
||||
logger.info("Request processed", extra={
|
||||
'request_id': request_id,
|
||||
'duration_ms': duration,
|
||||
'status_code': status_code,
|
||||
'user_id': user_id
|
||||
})
|
||||
```
|
||||
|
||||
### 5. Exception Logging
|
||||
|
||||
Always include stack traces for exceptions:
|
||||
|
||||
```python
|
||||
try:
|
||||
process_data()
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to process data: {e}", exc_info=True)
|
||||
raise
|
||||
```
|
||||
|
||||
### 6. Async Logging Considerations
|
||||
|
||||
For async code, ensure thread-safe logging:
|
||||
|
||||
```python
|
||||
import asyncio
|
||||
import logging
|
||||
|
||||
async def async_operation():
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.info(f"Starting async operation in task: {asyncio.current_task().get_name()}")
|
||||
```
|
||||
|
||||
## Environment Variables
|
||||
|
||||
Support environment-based configuration as a fallback:
|
||||
|
||||
```python
|
||||
import os
|
||||
|
||||
log_level = os.environ.get('TRUSTGRAPH_LOG_LEVEL', 'INFO')
|
||||
```
|
||||
|
||||
## Testing
|
||||
|
||||
During tests, consider using a different logging configuration:
|
||||
|
||||
```python
|
||||
# In test setup
|
||||
logging.getLogger().setLevel(logging.WARNING) # Reduce noise during tests
|
||||
```
|
||||
|
||||
## Monitoring Integration
|
||||
|
||||
Ensure log format is compatible with monitoring tools:
|
||||
- Include timestamps in ISO format
|
||||
- Use consistent field names
|
||||
- Include correlation IDs where applicable
|
||||
- Structure logs for easy parsing (JSON format for production)
|
||||
|
||||
## Security Considerations
|
||||
|
||||
- Never log sensitive information (passwords, API keys, personal data)
|
||||
- Sanitize user input before logging
|
||||
- Use placeholders for sensitive fields: `user_id=****1234`
|
||||
|
||||
## Migration Path
|
||||
|
||||
For existing code using print statements:
|
||||
1. Replace `print()` with appropriate logger calls
|
||||
2. Choose appropriate log levels based on message importance
|
||||
3. Add context to make logs more useful
|
||||
4. Test logging output at different levels
|
||||
|
|
@ -248,9 +248,13 @@ class TestDocumentRagIntegration:
|
|||
@pytest.mark.asyncio
|
||||
async def test_document_rag_verbose_logging(self, mock_embeddings_client,
|
||||
mock_doc_embeddings_client, mock_prompt_client,
|
||||
capsys):
|
||||
caplog):
|
||||
"""Test DocumentRAG verbose logging functionality"""
|
||||
# Arrange
|
||||
import logging
|
||||
|
||||
# Arrange - Configure logging to capture debug messages
|
||||
caplog.set_level(logging.DEBUG)
|
||||
|
||||
document_rag = DocumentRag(
|
||||
embeddings_client=mock_embeddings_client,
|
||||
doc_embeddings_client=mock_doc_embeddings_client,
|
||||
|
|
@ -261,14 +265,14 @@ class TestDocumentRagIntegration:
|
|||
# Act
|
||||
await document_rag.query("test query for verbose logging")
|
||||
|
||||
# Assert
|
||||
captured = capsys.readouterr()
|
||||
assert "Initialised" in captured.out
|
||||
assert "Construct prompt..." in captured.out
|
||||
assert "Compute embeddings..." in captured.out
|
||||
assert "Get docs..." in captured.out
|
||||
assert "Invoke LLM..." in captured.out
|
||||
assert "Done" in captured.out
|
||||
# Assert - Check for new logging messages
|
||||
log_messages = caplog.text
|
||||
assert "DocumentRag initialized" in log_messages
|
||||
assert "Constructing prompt..." in log_messages
|
||||
assert "Computing embeddings..." in log_messages
|
||||
assert "Getting documents..." in log_messages
|
||||
assert "Invoking LLM..." in log_messages
|
||||
assert "Query processing complete" in log_messages
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.slow
|
||||
|
|
|
|||
|
|
@ -49,9 +49,6 @@ class Api:
|
|||
|
||||
url = f"{self.url}{path}"
|
||||
|
||||
# print("uri:", url)
|
||||
# print(json.dumps(request, indent=4))
|
||||
|
||||
# Invoke the API, input is passed as JSON
|
||||
resp = requests.post(url, json=request, timeout=self.timeout)
|
||||
|
||||
|
|
@ -59,8 +56,6 @@ class Api:
|
|||
if resp.status_code != 200:
|
||||
raise ProtocolException(f"Status code {resp.status_code}")
|
||||
|
||||
# print(resp.text)
|
||||
|
||||
try:
|
||||
# Parse the response as JSON
|
||||
object = resp.json()
|
||||
|
|
|
|||
|
|
@ -1,7 +1,11 @@
|
|||
|
||||
import logging
|
||||
|
||||
from . exceptions import *
|
||||
from . types import ConfigValue
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class Config:
|
||||
|
||||
def __init__(self, api):
|
||||
|
|
@ -33,7 +37,7 @@ class Config:
|
|||
for v in object["values"]
|
||||
]
|
||||
except Exception as e:
|
||||
print(e)
|
||||
logger.error("Failed to parse config get response", exc_info=True)
|
||||
raise ProtocolException("Response not formatted correctly")
|
||||
|
||||
def put(self, values):
|
||||
|
|
|
|||
|
|
@ -2,11 +2,14 @@
|
|||
import datetime
|
||||
import time
|
||||
import base64
|
||||
import logging
|
||||
|
||||
from . types import DocumentMetadata, ProcessingMetadata, Triple
|
||||
from .. knowledge import hash, Uri, Literal
|
||||
from . exceptions import *
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def to_value(x):
|
||||
if x["e"]: return Uri(x["v"])
|
||||
return Literal(x["v"])
|
||||
|
|
@ -112,7 +115,7 @@ class Library:
|
|||
for v in object["document-metadatas"]
|
||||
]
|
||||
except Exception as e:
|
||||
print(e)
|
||||
logger.error("Failed to parse document list response", exc_info=True)
|
||||
raise ProtocolException(f"Response not formatted correctly")
|
||||
|
||||
def get_document(self, user, id):
|
||||
|
|
@ -145,7 +148,7 @@ class Library:
|
|||
tags = doc["tags"]
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
logger.error("Failed to parse document response", exc_info=True)
|
||||
raise ProtocolException(f"Response not formatted correctly")
|
||||
|
||||
def update_document(self, user, id, metadata):
|
||||
|
|
@ -192,7 +195,7 @@ class Library:
|
|||
tags = doc["tags"]
|
||||
)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
logger.error("Failed to parse document update response", exc_info=True)
|
||||
raise ProtocolException(f"Response not formatted correctly")
|
||||
|
||||
def remove_document(self, user, id):
|
||||
|
|
@ -266,6 +269,6 @@ class Library:
|
|||
for v in object["processing-metadatas"]
|
||||
]
|
||||
except Exception as e:
|
||||
print(e)
|
||||
logger.error("Failed to parse processing list response", exc_info=True)
|
||||
raise ProtocolException(f"Response not formatted correctly")
|
||||
|
||||
|
|
|
|||
|
|
@ -4,12 +4,16 @@ Agent manager service completion base class
|
|||
"""
|
||||
|
||||
import time
|
||||
import logging
|
||||
from prometheus_client import Histogram
|
||||
|
||||
from .. schema import AgentRequest, AgentResponse, Error
|
||||
from .. exceptions import TooManyRequests
|
||||
from .. base import FlowProcessor, ConsumerSpec, ProducerSpec
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "agent-manager"
|
||||
|
||||
class AgentService(FlowProcessor):
|
||||
|
|
@ -76,9 +80,9 @@ class AgentService(FlowProcessor):
|
|||
except Exception as e:
|
||||
|
||||
# Apart from rate limits, treat all exceptions as unrecoverable
|
||||
print(f"on_request Exception: {e}")
|
||||
logger.error(f"Exception in agent service on_request: {e}", exc_info=True)
|
||||
|
||||
print("Send error response...", flush=True)
|
||||
logger.info("Sending error response...")
|
||||
|
||||
await flow.producer["response"].send(
|
||||
AgentResponse(
|
||||
|
|
|
|||
|
|
@ -9,6 +9,8 @@ import argparse
|
|||
import _pulsar
|
||||
import time
|
||||
import uuid
|
||||
import logging
|
||||
import os
|
||||
from prometheus_client import start_http_server, Info
|
||||
|
||||
from .. schema import ConfigPush, config_push_queue
|
||||
|
|
@ -20,6 +22,9 @@ from . metrics import ProcessorMetrics, ConsumerMetrics
|
|||
|
||||
default_config_queue = config_push_queue
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Async processor
|
||||
class AsyncProcessor:
|
||||
|
||||
|
|
@ -113,7 +118,7 @@ class AsyncProcessor:
|
|||
version = message.value().version
|
||||
|
||||
# Invoke message handlers
|
||||
print("Config change event", version, flush=True)
|
||||
logger.info(f"Config change event: version={version}")
|
||||
for ch in self.config_handlers:
|
||||
await ch(config, version)
|
||||
|
||||
|
|
@ -156,9 +161,23 @@ class AsyncProcessor:
|
|||
|
||||
# This is here to output a debug message, shouldn't be needed.
|
||||
except Exception as e:
|
||||
print("Exception, closing taskgroup", flush=True)
|
||||
logger.error("Exception, closing taskgroup", exc_info=True)
|
||||
raise e
|
||||
|
||||
@classmethod
|
||||
def setup_logging(cls, log_level='INFO'):
|
||||
"""Configure logging for the entire application"""
|
||||
# Support environment variable override
|
||||
env_log_level = os.environ.get('TRUSTGRAPH_LOG_LEVEL', log_level)
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=getattr(logging, env_log_level.upper()),
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
handlers=[logging.StreamHandler()]
|
||||
)
|
||||
logger.info(f"Logging configured with level: {env_log_level}")
|
||||
|
||||
# Startup fabric. launch calls launch_async in async mode.
|
||||
@classmethod
|
||||
def launch(cls, ident, doc):
|
||||
|
|
@ -183,8 +202,11 @@ class AsyncProcessor:
|
|||
args = parser.parse_args()
|
||||
args = vars(args)
|
||||
|
||||
# Setup logging before anything else
|
||||
cls.setup_logging(args.get('log_level', 'INFO').upper())
|
||||
|
||||
# Debug
|
||||
print(args, flush=True)
|
||||
logger.debug(f"Arguments: {args}")
|
||||
|
||||
# Start the Prometheus metrics service if needed
|
||||
if args["metrics"]:
|
||||
|
|
@ -193,7 +215,7 @@ class AsyncProcessor:
|
|||
# Loop forever, exception handler
|
||||
while True:
|
||||
|
||||
print("Starting...", flush=True)
|
||||
logger.info("Starting...")
|
||||
|
||||
try:
|
||||
|
||||
|
|
@ -203,30 +225,30 @@ class AsyncProcessor:
|
|||
))
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("Keyboard interrupt.", flush=True)
|
||||
logger.info("Keyboard interrupt.")
|
||||
return
|
||||
|
||||
except _pulsar.Interrupted:
|
||||
print("Pulsar Interrupted.", flush=True)
|
||||
logger.info("Pulsar Interrupted.")
|
||||
return
|
||||
|
||||
# Exceptions from a taskgroup come in as an exception group
|
||||
except ExceptionGroup as e:
|
||||
|
||||
print("Exception group:", flush=True)
|
||||
logger.error("Exception group:")
|
||||
|
||||
for se in e.exceptions:
|
||||
print(" Type:", type(se), flush=True)
|
||||
print(f" Exception: {se}", flush=True)
|
||||
logger.error(f" Type: {type(se)}")
|
||||
logger.error(f" Exception: {se}", exc_info=se)
|
||||
|
||||
except Exception as e:
|
||||
print("Type:", type(e), flush=True)
|
||||
print("Exception:", e, flush=True)
|
||||
logger.error(f"Type: {type(e)}")
|
||||
logger.error(f"Exception: {e}", exc_info=True)
|
||||
|
||||
# Retry occurs here
|
||||
print("Will retry...", flush=True)
|
||||
logger.warning("Will retry...")
|
||||
time.sleep(4)
|
||||
print("Retrying...", flush=True)
|
||||
logger.info("Retrying...")
|
||||
|
||||
# The command-line arguments are built using a stack of add_args
|
||||
# invocations
|
||||
|
|
@ -254,3 +276,4 @@ class AsyncProcessor:
|
|||
default=8000,
|
||||
help=f'Pulsar host (default: 8000)',
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -14,9 +14,13 @@ import pulsar
|
|||
import _pulsar
|
||||
import asyncio
|
||||
import time
|
||||
import logging
|
||||
|
||||
from .. exceptions import TooManyRequests
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class Consumer:
|
||||
|
||||
def __init__(
|
||||
|
|
@ -90,7 +94,7 @@ class Consumer:
|
|||
|
||||
try:
|
||||
|
||||
print(self.topic, "subscribing...", flush=True)
|
||||
logger.info(f"Subscribing to topic: {self.topic}")
|
||||
|
||||
if self.start_of_messages:
|
||||
pos = pulsar.InitialPosition.Earliest
|
||||
|
|
@ -108,21 +112,18 @@ class Consumer:
|
|||
|
||||
except Exception as e:
|
||||
|
||||
print("consumer subs Exception:", e, flush=True)
|
||||
logger.error(f"Consumer subscription exception: {e}", exc_info=True)
|
||||
await asyncio.sleep(self.reconnect_time)
|
||||
continue
|
||||
|
||||
print(self.topic, "subscribed", flush=True)
|
||||
logger.info(f"Successfully subscribed to topic: {self.topic}")
|
||||
|
||||
if self.metrics:
|
||||
self.metrics.state("running")
|
||||
|
||||
try:
|
||||
|
||||
print(
|
||||
"Starting", self.concurrency, "receiver threads",
|
||||
flush=True
|
||||
)
|
||||
logger.info(f"Starting {self.concurrency} receiver threads")
|
||||
|
||||
async with asyncio.TaskGroup() as tg:
|
||||
|
||||
|
|
@ -138,7 +139,7 @@ class Consumer:
|
|||
|
||||
except Exception as e:
|
||||
|
||||
print("consumer loop exception:", e, flush=True)
|
||||
logger.error(f"Consumer loop exception: {e}", exc_info=True)
|
||||
self.consumer.unsubscribe()
|
||||
self.consumer.close()
|
||||
self.consumer = None
|
||||
|
|
@ -174,7 +175,7 @@ class Consumer:
|
|||
|
||||
if time.time() > expiry:
|
||||
|
||||
print("Gave up waiting for rate-limit retry", flush=True)
|
||||
logger.warning("Gave up waiting for rate-limit retry")
|
||||
|
||||
# Message failed to be processed, this causes it to
|
||||
# be retried
|
||||
|
|
@ -188,7 +189,7 @@ class Consumer:
|
|||
|
||||
try:
|
||||
|
||||
print("Handle...", flush=True)
|
||||
logger.debug("Processing message...")
|
||||
|
||||
if self.metrics:
|
||||
|
||||
|
|
@ -198,7 +199,7 @@ class Consumer:
|
|||
else:
|
||||
await self.handler(msg, self, self.flow)
|
||||
|
||||
print("Handled.", flush=True)
|
||||
logger.debug("Message processed successfully")
|
||||
|
||||
# Acknowledge successful processing of the message
|
||||
self.consumer.acknowledge(msg)
|
||||
|
|
@ -211,7 +212,7 @@ class Consumer:
|
|||
|
||||
except TooManyRequests:
|
||||
|
||||
print("TooManyRequests: will retry...", flush=True)
|
||||
logger.warning("Rate limit exceeded, will retry...")
|
||||
|
||||
if self.metrics:
|
||||
self.metrics.rate_limit()
|
||||
|
|
@ -224,7 +225,7 @@ class Consumer:
|
|||
|
||||
except Exception as e:
|
||||
|
||||
print("consume exception:", e, flush=True)
|
||||
logger.error(f"Message processing exception: {e}", exc_info=True)
|
||||
|
||||
# Message failed to be processed, this causes it to
|
||||
# be retried
|
||||
|
|
|
|||
|
|
@ -1,8 +1,13 @@
|
|||
|
||||
import logging
|
||||
|
||||
from . request_response_spec import RequestResponse, RequestResponseSpec
|
||||
from .. schema import DocumentEmbeddingsRequest, DocumentEmbeddingsResponse
|
||||
from .. knowledge import Uri, Literal
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class DocumentEmbeddingsClient(RequestResponse):
|
||||
async def query(self, vectors, limit=20, user="trustgraph",
|
||||
collection="default", timeout=30):
|
||||
|
|
@ -17,7 +22,7 @@ class DocumentEmbeddingsClient(RequestResponse):
|
|||
timeout=timeout
|
||||
)
|
||||
|
||||
print(resp, flush=True)
|
||||
logger.debug(f"Document embeddings response: {resp}")
|
||||
|
||||
if resp.error:
|
||||
raise RuntimeError(resp.error.message)
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@ Document embeddings query service. Input is vectors. Output is list of
|
|||
embeddings.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from .. schema import DocumentEmbeddingsRequest, DocumentEmbeddingsResponse
|
||||
from .. schema import Error, Value
|
||||
|
||||
|
|
@ -11,6 +13,9 @@ from . flow_processor import FlowProcessor
|
|||
from . consumer_spec import ConsumerSpec
|
||||
from . producer_spec import ProducerSpec
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "ge-query"
|
||||
|
||||
class DocumentEmbeddingsQueryService(FlowProcessor):
|
||||
|
|
@ -47,21 +52,21 @@ class DocumentEmbeddingsQueryService(FlowProcessor):
|
|||
# Sender-produced ID
|
||||
id = msg.properties()["id"]
|
||||
|
||||
print(f"Handling input {id}...", flush=True)
|
||||
logger.debug(f"Handling document embeddings query request {id}...")
|
||||
|
||||
docs = await self.query_document_embeddings(request)
|
||||
|
||||
print("Send response...", flush=True)
|
||||
logger.debug("Sending document embeddings query response...")
|
||||
r = DocumentEmbeddingsResponse(documents=docs, error=None)
|
||||
await flow("response").send(r, properties={"id": id})
|
||||
|
||||
print("Done.", flush=True)
|
||||
logger.debug("Document embeddings query request completed")
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"Exception in document embeddings query service: {e}", exc_info=True)
|
||||
|
||||
print("Send error response...", flush=True)
|
||||
logger.info("Sending error response...")
|
||||
|
||||
r = DocumentEmbeddingsResponse(
|
||||
error=Error(
|
||||
|
|
|
|||
|
|
@ -3,10 +3,15 @@
|
|||
Document embeddings store base class
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from .. schema import DocumentEmbeddings
|
||||
from .. base import FlowProcessor, ConsumerSpec
|
||||
from .. exceptions import TooManyRequests
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "document-embeddings-write"
|
||||
|
||||
class DocumentEmbeddingsStoreService(FlowProcessor):
|
||||
|
|
@ -40,7 +45,7 @@ class DocumentEmbeddingsStoreService(FlowProcessor):
|
|||
|
||||
except Exception as e:
|
||||
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"Exception in document embeddings store service: {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -4,12 +4,16 @@ Embeddings resolution base class
|
|||
"""
|
||||
|
||||
import time
|
||||
import logging
|
||||
from prometheus_client import Histogram
|
||||
|
||||
from .. schema import EmbeddingsRequest, EmbeddingsResponse, Error
|
||||
from .. exceptions import TooManyRequests
|
||||
from .. base import FlowProcessor, ConsumerSpec, ProducerSpec
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "embeddings"
|
||||
default_concurrency = 1
|
||||
|
||||
|
|
@ -51,7 +55,7 @@ class EmbeddingsService(FlowProcessor):
|
|||
|
||||
id = msg.properties()["id"]
|
||||
|
||||
print("Handling request", id, "...", flush=True)
|
||||
logger.debug(f"Handling embeddings request {id}...")
|
||||
|
||||
vectors = await self.on_embeddings(request.text)
|
||||
|
||||
|
|
@ -63,7 +67,7 @@ class EmbeddingsService(FlowProcessor):
|
|||
properties={"id": id}
|
||||
)
|
||||
|
||||
print("Handled.", flush=True)
|
||||
logger.debug("Embeddings request handled successfully")
|
||||
|
||||
except TooManyRequests as e:
|
||||
raise e
|
||||
|
|
@ -72,9 +76,9 @@ class EmbeddingsService(FlowProcessor):
|
|||
|
||||
# Apart from rate limits, treat all exceptions as unrecoverable
|
||||
|
||||
print(f"Exception: {e}", flush=True)
|
||||
logger.error(f"Exception in embeddings service: {e}", exc_info=True)
|
||||
|
||||
print("Send error response...", flush=True)
|
||||
logger.info("Sending error response...")
|
||||
|
||||
await flow.producer["response"].send(
|
||||
EmbeddingsResponse(
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
# configuration service which can't manage itself.
|
||||
|
||||
import json
|
||||
import logging
|
||||
|
||||
from pulsar.schema import JsonSchema
|
||||
|
||||
|
|
@ -14,6 +15,9 @@ from .. log_level import LogLevel
|
|||
from . async_processor import AsyncProcessor
|
||||
from . flow import Flow
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Parent class for configurable processors, configured with flows by
|
||||
# the config service
|
||||
class FlowProcessor(AsyncProcessor):
|
||||
|
|
@ -34,7 +38,7 @@ class FlowProcessor(AsyncProcessor):
|
|||
# Array of specifications: ConsumerSpec, ProducerSpec, SettingSpec
|
||||
self.specifications = []
|
||||
|
||||
print("Service initialised.")
|
||||
logger.info("Service initialised.")
|
||||
|
||||
# Register a configuration variable
|
||||
def register_specification(self, spec):
|
||||
|
|
@ -44,19 +48,19 @@ class FlowProcessor(AsyncProcessor):
|
|||
async def start_flow(self, flow, defn):
|
||||
self.flows[flow] = Flow(self.id, flow, self, defn)
|
||||
await self.flows[flow].start()
|
||||
print("Started flow: ", flow)
|
||||
logger.info(f"Started flow: {flow}")
|
||||
|
||||
# Stop processing for a new flow
|
||||
async def stop_flow(self, flow):
|
||||
if flow in self.flows:
|
||||
await self.flows[flow].stop()
|
||||
del self.flows[flow]
|
||||
print("Stopped flow: ", flow, flush=True)
|
||||
logger.info(f"Stopped flow: {flow}")
|
||||
|
||||
# Event handler - called for a configuration change
|
||||
async def on_configure_flows(self, config, version):
|
||||
|
||||
print("Got config version", version, flush=True)
|
||||
logger.info(f"Got config version {version}")
|
||||
|
||||
# Skip over invalid data
|
||||
if "flows-active" not in config: return
|
||||
|
|
@ -69,7 +73,7 @@ class FlowProcessor(AsyncProcessor):
|
|||
|
||||
else:
|
||||
|
||||
print("No configuration settings for me.", flush=True)
|
||||
logger.debug("No configuration settings for me.")
|
||||
flow_config = {}
|
||||
|
||||
# Get list of flows which should be running and are currently
|
||||
|
|
@ -88,7 +92,7 @@ class FlowProcessor(AsyncProcessor):
|
|||
if flow not in wanted_flows:
|
||||
await self.stop_flow(flow)
|
||||
|
||||
print("Handled config update")
|
||||
logger.info("Handled config update")
|
||||
|
||||
# Start threads, just call parent
|
||||
async def start(self):
|
||||
|
|
|
|||
|
|
@ -1,8 +1,13 @@
|
|||
|
||||
import logging
|
||||
|
||||
from . request_response_spec import RequestResponse, RequestResponseSpec
|
||||
from .. schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse
|
||||
from .. knowledge import Uri, Literal
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def to_value(x):
|
||||
if x.is_uri: return Uri(x.value)
|
||||
return Literal(x.value)
|
||||
|
|
@ -21,7 +26,7 @@ class GraphEmbeddingsClient(RequestResponse):
|
|||
timeout=timeout
|
||||
)
|
||||
|
||||
print(resp, flush=True)
|
||||
logger.debug(f"Graph embeddings response: {resp}")
|
||||
|
||||
if resp.error:
|
||||
raise RuntimeError(resp.error.message)
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@ Graph embeddings query service. Input is vectors. Output is list of
|
|||
embeddings.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from .. schema import GraphEmbeddingsRequest, GraphEmbeddingsResponse
|
||||
from .. schema import Error, Value
|
||||
|
||||
|
|
@ -11,6 +13,9 @@ from . flow_processor import FlowProcessor
|
|||
from . consumer_spec import ConsumerSpec
|
||||
from . producer_spec import ProducerSpec
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "ge-query"
|
||||
|
||||
class GraphEmbeddingsQueryService(FlowProcessor):
|
||||
|
|
@ -47,21 +52,21 @@ class GraphEmbeddingsQueryService(FlowProcessor):
|
|||
# Sender-produced ID
|
||||
id = msg.properties()["id"]
|
||||
|
||||
print(f"Handling input {id}...", flush=True)
|
||||
logger.debug(f"Handling graph embeddings query request {id}...")
|
||||
|
||||
entities = await self.query_graph_embeddings(request)
|
||||
|
||||
print("Send response...", flush=True)
|
||||
logger.debug("Sending graph embeddings query response...")
|
||||
r = GraphEmbeddingsResponse(entities=entities, error=None)
|
||||
await flow("response").send(r, properties={"id": id})
|
||||
|
||||
print("Done.", flush=True)
|
||||
logger.debug("Graph embeddings query request completed")
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"Exception in graph embeddings query service: {e}", exc_info=True)
|
||||
|
||||
print("Send error response...", flush=True)
|
||||
logger.info("Sending error response...")
|
||||
|
||||
r = GraphEmbeddingsResponse(
|
||||
error=Error(
|
||||
|
|
|
|||
|
|
@ -3,10 +3,15 @@
|
|||
Graph embeddings store base class
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from .. schema import GraphEmbeddings
|
||||
from .. base import FlowProcessor, ConsumerSpec
|
||||
from .. exceptions import TooManyRequests
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "graph-embeddings-write"
|
||||
|
||||
class GraphEmbeddingsStoreService(FlowProcessor):
|
||||
|
|
@ -40,7 +45,7 @@ class GraphEmbeddingsStoreService(FlowProcessor):
|
|||
|
||||
except Exception as e:
|
||||
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"Exception in graph embeddings store service: {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -4,12 +4,16 @@ LLM text completion base class
|
|||
"""
|
||||
|
||||
import time
|
||||
import logging
|
||||
from prometheus_client import Histogram
|
||||
|
||||
from .. schema import TextCompletionRequest, TextCompletionResponse, Error
|
||||
from .. exceptions import TooManyRequests
|
||||
from .. base import FlowProcessor, ConsumerSpec, ProducerSpec
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "text-completion"
|
||||
default_concurrency = 1
|
||||
|
||||
|
|
@ -103,9 +107,9 @@ class LlmService(FlowProcessor):
|
|||
|
||||
# Apart from rate limits, treat all exceptions as unrecoverable
|
||||
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"LLM service exception: {e}", exc_info=True)
|
||||
|
||||
print("Send error response...", flush=True)
|
||||
logger.debug("Sending error response...")
|
||||
|
||||
await flow.producer["response"].send(
|
||||
TextCompletionResponse(
|
||||
|
|
|
|||
|
|
@ -1,6 +1,10 @@
|
|||
|
||||
from pulsar.schema import JsonSchema
|
||||
import asyncio
|
||||
import logging
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class Producer:
|
||||
|
||||
|
|
@ -39,15 +43,15 @@ class Producer:
|
|||
while self.running and self.producer is None:
|
||||
|
||||
try:
|
||||
print("Connect publisher to", self.topic, "...", flush=True)
|
||||
logger.info(f"Connecting publisher to {self.topic}...")
|
||||
self.producer = self.client.create_producer(
|
||||
topic = self.topic,
|
||||
schema = JsonSchema(self.schema),
|
||||
chunking_enabled = self.chunking_enabled,
|
||||
)
|
||||
print("Connected to", self.topic, flush=True)
|
||||
logger.info(f"Connected publisher to {self.topic}")
|
||||
except Exception as e:
|
||||
print("Exception:", e, flush=True)
|
||||
logger.error(f"Exception connecting publisher: {e}", exc_info=True)
|
||||
await asyncio.sleep(2)
|
||||
|
||||
if not self.running: break
|
||||
|
|
@ -68,7 +72,7 @@ class Producer:
|
|||
break
|
||||
|
||||
except Exception as e:
|
||||
print("Exception:", e, flush=True)
|
||||
logger.error(f"Exception sending message: {e}", exc_info=True)
|
||||
self.producer.close()
|
||||
self.producer = None
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,10 @@ from pulsar.schema import JsonSchema
|
|||
import asyncio
|
||||
import time
|
||||
import pulsar
|
||||
import logging
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class Publisher:
|
||||
|
||||
|
|
@ -62,7 +66,7 @@ class Publisher:
|
|||
producer.send(item)
|
||||
|
||||
except Exception as e:
|
||||
print("Exception:", e, flush=True)
|
||||
logger.error(f"Exception in publisher: {e}", exc_info=True)
|
||||
|
||||
if not self.running:
|
||||
return
|
||||
|
|
|
|||
|
|
@ -1,12 +1,16 @@
|
|||
|
||||
import uuid
|
||||
import asyncio
|
||||
import logging
|
||||
|
||||
from . subscriber import Subscriber
|
||||
from . producer import Producer
|
||||
from . spec import Spec
|
||||
from . metrics import ConsumerMetrics, ProducerMetrics, SubscriberMetrics
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class RequestResponse(Subscriber):
|
||||
|
||||
def __init__(
|
||||
|
|
@ -45,7 +49,7 @@ class RequestResponse(Subscriber):
|
|||
|
||||
id = str(uuid.uuid4())
|
||||
|
||||
print("Request", id, "...", flush=True)
|
||||
logger.debug(f"Sending request {id}...")
|
||||
|
||||
q = await self.subscribe(id)
|
||||
|
||||
|
|
@ -58,7 +62,7 @@ class RequestResponse(Subscriber):
|
|||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e)
|
||||
logger.error(f"Exception sending request: {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
|
||||
|
|
@ -71,7 +75,7 @@ class RequestResponse(Subscriber):
|
|||
timeout=timeout
|
||||
)
|
||||
|
||||
print("Got response.", flush=True)
|
||||
logger.debug("Received response")
|
||||
|
||||
if recipient is None:
|
||||
|
||||
|
|
@ -93,7 +97,7 @@ class RequestResponse(Subscriber):
|
|||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e)
|
||||
logger.error(f"Exception processing response: {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
finally:
|
||||
|
|
|
|||
|
|
@ -7,6 +7,10 @@ from pulsar.schema import JsonSchema
|
|||
import asyncio
|
||||
import _pulsar
|
||||
import time
|
||||
import logging
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class Subscriber:
|
||||
|
||||
|
|
@ -66,7 +70,7 @@ class Subscriber:
|
|||
if self.metrics:
|
||||
self.metrics.state("running")
|
||||
|
||||
print("Subscriber running...", flush=True)
|
||||
logger.info("Subscriber running...")
|
||||
|
||||
while self.running:
|
||||
|
||||
|
|
@ -78,8 +82,7 @@ class Subscriber:
|
|||
except _pulsar.Timeout:
|
||||
continue
|
||||
except Exception as e:
|
||||
print("Exception:", e, flush=True)
|
||||
print(type(e))
|
||||
logger.error(f"Exception in subscriber receive: {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
if self.metrics:
|
||||
|
|
@ -110,7 +113,7 @@ class Subscriber:
|
|||
|
||||
except Exception as e:
|
||||
self.metrics.dropped()
|
||||
print("Q Put:", e, flush=True)
|
||||
logger.warning(f"Failed to put message in queue: {e}")
|
||||
|
||||
for q in self.full.values():
|
||||
try:
|
||||
|
|
@ -121,10 +124,10 @@ class Subscriber:
|
|||
)
|
||||
except Exception as e:
|
||||
self.metrics.dropped()
|
||||
print("Q Put:", e, flush=True)
|
||||
logger.warning(f"Failed to put message in full queue: {e}")
|
||||
|
||||
except Exception as e:
|
||||
print("Subscriber exception:", e, flush=True)
|
||||
logger.error(f"Subscriber exception: {e}", exc_info=True)
|
||||
|
||||
finally:
|
||||
|
||||
|
|
|
|||
|
|
@ -4,12 +4,16 @@ Tool invocation base class
|
|||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from prometheus_client import Counter
|
||||
|
||||
from .. schema import ToolRequest, ToolResponse, Error
|
||||
from .. exceptions import TooManyRequests
|
||||
from .. base import FlowProcessor, ConsumerSpec, ProducerSpec
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_concurrency = 1
|
||||
|
||||
class ToolService(FlowProcessor):
|
||||
|
|
@ -91,9 +95,9 @@ class ToolService(FlowProcessor):
|
|||
|
||||
# Apart from rate limits, treat all exceptions as unrecoverable
|
||||
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"Exception in tool service: {e}", exc_info=True)
|
||||
|
||||
print("Send error response...", flush=True)
|
||||
logger.info("Sending error response...")
|
||||
|
||||
await flow.producer["response"].send(
|
||||
ToolResponse(
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@ Triples query service. Input is a (s, p, o) triple, some values may be
|
|||
null. Output is a list of triples.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from .. schema import TriplesQueryRequest, TriplesQueryResponse, Error
|
||||
from .. schema import Value, Triple
|
||||
|
||||
|
|
@ -11,6 +13,9 @@ from . flow_processor import FlowProcessor
|
|||
from . consumer_spec import ConsumerSpec
|
||||
from . producer_spec import ProducerSpec
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "triples-query"
|
||||
|
||||
class TriplesQueryService(FlowProcessor):
|
||||
|
|
@ -45,21 +50,21 @@ class TriplesQueryService(FlowProcessor):
|
|||
# Sender-produced ID
|
||||
id = msg.properties()["id"]
|
||||
|
||||
print(f"Handling input {id}...", flush=True)
|
||||
logger.debug(f"Handling triples query request {id}...")
|
||||
|
||||
triples = await self.query_triples(request)
|
||||
|
||||
print("Send response...", flush=True)
|
||||
logger.debug("Sending triples query response...")
|
||||
r = TriplesQueryResponse(triples=triples, error=None)
|
||||
await flow("response").send(r, properties={"id": id})
|
||||
|
||||
print("Done.", flush=True)
|
||||
logger.debug("Triples query request completed")
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"Exception in triples query service: {e}", exc_info=True)
|
||||
|
||||
print("Send error response...", flush=True)
|
||||
logger.info("Sending error response...")
|
||||
|
||||
r = TriplesQueryResponse(
|
||||
error = Error(
|
||||
|
|
|
|||
|
|
@ -3,10 +3,15 @@
|
|||
Triples store base class
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from .. schema import Triples
|
||||
from .. base import FlowProcessor, ConsumerSpec
|
||||
from .. exceptions import TooManyRequests
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "triples-write"
|
||||
|
||||
class TriplesStoreService(FlowProcessor):
|
||||
|
|
@ -38,7 +43,7 @@ class TriplesStoreService(FlowProcessor):
|
|||
|
||||
except Exception as e:
|
||||
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"Exception in triples store service: {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -8,10 +8,14 @@ import boto3
|
|||
import json
|
||||
import os
|
||||
import enum
|
||||
import logging
|
||||
|
||||
from .... exceptions import TooManyRequests
|
||||
from .... base import LlmService, LlmResult
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "text-completion"
|
||||
|
||||
default_model = 'mistral.mistral-large-2407-v1:0'
|
||||
|
|
@ -145,7 +149,7 @@ class Processor(LlmService):
|
|||
|
||||
def __init__(self, **params):
|
||||
|
||||
print(params)
|
||||
logger.debug(f"Bedrock LLM initialized with params: {params}")
|
||||
|
||||
model = params.get("model", default_model)
|
||||
temperature = params.get("temperature", default_temperature)
|
||||
|
|
@ -197,7 +201,7 @@ class Processor(LlmService):
|
|||
|
||||
self.bedrock = self.session.client(service_name='bedrock-runtime')
|
||||
|
||||
print("Initialised", flush=True)
|
||||
logger.info("Bedrock LLM service initialized")
|
||||
|
||||
def determine_variant(self, model):
|
||||
|
||||
|
|
@ -250,9 +254,9 @@ class Processor(LlmService):
|
|||
inputtokens = int(metadata['x-amzn-bedrock-input-token-count'])
|
||||
outputtokens = int(metadata['x-amzn-bedrock-output-token-count'])
|
||||
|
||||
print(outputtext, flush=True)
|
||||
print(f"Input Tokens: {inputtokens}", flush=True)
|
||||
print(f"Output Tokens: {outputtokens}", flush=True)
|
||||
logger.debug(f"LLM output: {outputtext}")
|
||||
logger.info(f"Input Tokens: {inputtokens}")
|
||||
logger.info(f"Output Tokens: {outputtokens}")
|
||||
|
||||
resp = LlmResult(
|
||||
text = outputtext,
|
||||
|
|
@ -265,7 +269,7 @@ class Processor(LlmService):
|
|||
|
||||
except self.bedrock.exceptions.ThrottlingException as e:
|
||||
|
||||
print("Hit rate limit:", e, flush=True)
|
||||
logger.warning(f"Hit rate limit: {e}")
|
||||
|
||||
# Leave rate limit retries to the base handler
|
||||
raise TooManyRequests()
|
||||
|
|
@ -274,8 +278,7 @@ class Processor(LlmService):
|
|||
|
||||
# Apart from rate limits, treat all exceptions as unrecoverable
|
||||
|
||||
print(type(e))
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"Bedrock LLM exception ({type(e).__name__}): {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -4,10 +4,14 @@ Embeddings service, applies an embeddings model selected from HuggingFace.
|
|||
Input is text, output is embeddings vector.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from ... base import EmbeddingsService
|
||||
|
||||
from langchain_huggingface import HuggingFaceEmbeddings
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "embeddings"
|
||||
|
||||
default_model="all-MiniLM-L6-v2"
|
||||
|
|
@ -22,13 +26,13 @@ class Processor(EmbeddingsService):
|
|||
**params | { "model": model }
|
||||
)
|
||||
|
||||
print("Get model...", flush=True)
|
||||
logger.info(f"Loading HuggingFace embeddings model: {model}")
|
||||
self.embeddings = HuggingFaceEmbeddings(model_name=model)
|
||||
|
||||
async def on_embeddings(self, text):
|
||||
|
||||
embeds = self.embeddings.embed_documents([text])
|
||||
print("Done.", flush=True)
|
||||
logger.debug("Embeddings generation complete")
|
||||
return embeds
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -5,11 +5,15 @@ name + parameters, output is the response, either a string or an object.
|
|||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from mcp.client.streamable_http import streamablehttp_client
|
||||
from mcp import ClientSession
|
||||
|
||||
from ... base import ToolService
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "mcp-tool"
|
||||
|
||||
class Service(ToolService):
|
||||
|
|
@ -26,7 +30,7 @@ class Service(ToolService):
|
|||
|
||||
async def on_mcp_config(self, config, version):
|
||||
|
||||
print("Got config version", version)
|
||||
logger.info(f"Got config version {version}")
|
||||
|
||||
if "mcp" not in config: return
|
||||
|
||||
|
|
@ -52,7 +56,7 @@ class Service(ToolService):
|
|||
else:
|
||||
remote_name = name
|
||||
|
||||
print("Invoking", remote_name, "at", url, flush=True)
|
||||
logger.info(f"Invoking {remote_name} at {url}")
|
||||
|
||||
# Connect to a streamable HTTP server
|
||||
async with streamablehttp_client(url) as (
|
||||
|
|
@ -86,13 +90,13 @@ class Service(ToolService):
|
|||
except BaseExceptionGroup as e:
|
||||
|
||||
for child in e.exceptions:
|
||||
print(child)
|
||||
logger.debug(f"Child: {child}")
|
||||
|
||||
raise e.exceptions[0]
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print(e)
|
||||
logger.error(f"Error invoking MCP tool: {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -164,18 +164,18 @@ class AgentManager:
|
|||
|
||||
async def reason(self, question, history, context):
|
||||
|
||||
print(f"calling reason: {question}", flush=True)
|
||||
logger.debug(f"calling reason: {question}")
|
||||
|
||||
tools = self.tools
|
||||
|
||||
print(f"in reason", flush=True)
|
||||
print(tools, flush=True)
|
||||
logger.debug("in reason")
|
||||
logger.debug(f"tools: {tools}")
|
||||
|
||||
tool_names = ",".join([
|
||||
t for t in self.tools.keys()
|
||||
])
|
||||
|
||||
print("Tool names:", tool_names, flush=True)
|
||||
logger.debug(f"Tool names: {tool_names}")
|
||||
|
||||
variables = {
|
||||
"question": question,
|
||||
|
|
@ -208,14 +208,14 @@ class AgentManager:
|
|||
]
|
||||
}
|
||||
|
||||
print(json.dumps(variables, indent=4), flush=True)
|
||||
logger.debug(f"Variables: {json.dumps(variables, indent=4)}")
|
||||
|
||||
logger.info(f"prompt: {variables}")
|
||||
|
||||
# Get text response from prompt service
|
||||
response_text = await context("prompt-request").agent_react(variables)
|
||||
|
||||
print(f"Response text:\n{response_text}", flush=True)
|
||||
logger.debug(f"Response text:\n{response_text}")
|
||||
|
||||
logger.info(f"response: {response_text}")
|
||||
|
||||
|
|
@ -233,7 +233,6 @@ class AgentManager:
|
|||
async def react(self, question, history, think, observe, context):
|
||||
|
||||
logger.info(f"question: {question}")
|
||||
print(f"question: {question}", flush=True)
|
||||
|
||||
act = await self.reason(
|
||||
question = question,
|
||||
|
|
@ -256,7 +255,7 @@ class AgentManager:
|
|||
else:
|
||||
raise RuntimeError(f"No action for {act.name}!")
|
||||
|
||||
print("TOOL>>>", act, flush=True)
|
||||
logger.debug(f"TOOL>>> {act}")
|
||||
|
||||
resp = await action.implementation(context).invoke(
|
||||
**act.arguments
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ import sys
|
|||
import functools
|
||||
import logging
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from ... base import AgentService, TextCompletionClientSpec, PromptClientSpec
|
||||
|
|
@ -81,7 +81,7 @@ class Processor(AgentService):
|
|||
|
||||
async def on_tools_config(self, config, version):
|
||||
|
||||
print("Loading configuration version", version)
|
||||
logger.info(f"Loading configuration version {version}")
|
||||
|
||||
try:
|
||||
|
||||
|
|
@ -151,13 +151,13 @@ class Processor(AgentService):
|
|||
additional_context=additional
|
||||
)
|
||||
|
||||
print(f"Loaded {len(tools)} tools", flush=True)
|
||||
print("Tool configuration reloaded.", flush=True)
|
||||
logger.info(f"Loaded {len(tools)} tools")
|
||||
logger.info("Tool configuration reloaded.")
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("on_tools_config Exception:", e, flush=True)
|
||||
print("Configuration reload failed", flush=True)
|
||||
logger.error(f"on_tools_config Exception: {e}", exc_info=True)
|
||||
logger.error("Configuration reload failed")
|
||||
|
||||
async def agent_request(self, request, respond, next, flow):
|
||||
|
||||
|
|
@ -176,16 +176,16 @@ class Processor(AgentService):
|
|||
else:
|
||||
history = []
|
||||
|
||||
print(f"Question: {request.question}", flush=True)
|
||||
logger.info(f"Question: {request.question}")
|
||||
|
||||
if len(history) >= self.max_iterations:
|
||||
raise RuntimeError("Too many agent iterations")
|
||||
|
||||
print(f"History: {history}", flush=True)
|
||||
logger.debug(f"History: {history}")
|
||||
|
||||
async def think(x):
|
||||
|
||||
print(f"Think: {x}", flush=True)
|
||||
logger.debug(f"Think: {x}")
|
||||
|
||||
r = AgentResponse(
|
||||
answer=None,
|
||||
|
|
@ -198,7 +198,7 @@ class Processor(AgentService):
|
|||
|
||||
async def observe(x):
|
||||
|
||||
print(f"Observe: {x}", flush=True)
|
||||
logger.debug(f"Observe: {x}")
|
||||
|
||||
r = AgentResponse(
|
||||
answer=None,
|
||||
|
|
@ -209,7 +209,7 @@ class Processor(AgentService):
|
|||
|
||||
await respond(r)
|
||||
|
||||
print("Call React", flush=True)
|
||||
logger.debug("Call React")
|
||||
|
||||
act = await self.agent.react(
|
||||
question = request.question,
|
||||
|
|
@ -219,11 +219,11 @@ class Processor(AgentService):
|
|||
context = flow,
|
||||
)
|
||||
|
||||
print(f"Action: {act}", flush=True)
|
||||
logger.debug(f"Action: {act}")
|
||||
|
||||
if isinstance(act, Final):
|
||||
|
||||
print("Send final response...", flush=True)
|
||||
logger.debug("Send final response...")
|
||||
|
||||
if isinstance(act.final, str):
|
||||
f = act.final
|
||||
|
|
@ -238,11 +238,11 @@ class Processor(AgentService):
|
|||
|
||||
await respond(r)
|
||||
|
||||
print("Done.", flush=True)
|
||||
logger.debug("Done.")
|
||||
|
||||
return
|
||||
|
||||
print("Send next...", flush=True)
|
||||
logger.debug("Send next...")
|
||||
|
||||
history.append(act)
|
||||
|
||||
|
|
@ -263,15 +263,15 @@ class Processor(AgentService):
|
|||
|
||||
await next(r)
|
||||
|
||||
print("Done.", flush=True)
|
||||
logger.debug("React agent processing complete")
|
||||
|
||||
return
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print(f"agent_request Exception: {e}")
|
||||
logger.error(f"agent_request Exception: {e}", exc_info=True)
|
||||
|
||||
print("Send error response...", flush=True)
|
||||
logger.debug("Send error response...")
|
||||
|
||||
r = AgentResponse(
|
||||
error=Error(
|
||||
|
|
|
|||
|
|
@ -1,7 +1,11 @@
|
|||
|
||||
import json
|
||||
import logging
|
||||
from .types import Argument
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# This tool implementation knows how to put a question to the graph RAG
|
||||
# service
|
||||
class KnowledgeQueryImpl:
|
||||
|
|
@ -21,7 +25,7 @@ class KnowledgeQueryImpl:
|
|||
|
||||
async def invoke(self, **arguments):
|
||||
client = self.context("graph-rag-request")
|
||||
print("Graph RAG question...", flush=True)
|
||||
logger.debug("Graph RAG question...")
|
||||
return await client.rag(
|
||||
arguments.get("question")
|
||||
)
|
||||
|
|
@ -44,7 +48,7 @@ class TextCompletionImpl:
|
|||
|
||||
async def invoke(self, **arguments):
|
||||
client = self.context("prompt-request")
|
||||
print("Prompt question...", flush=True)
|
||||
logger.debug("Prompt question...")
|
||||
return await client.question(
|
||||
arguments.get("question")
|
||||
)
|
||||
|
|
@ -67,13 +71,13 @@ class McpToolImpl:
|
|||
|
||||
client = self.context("mcp-tool-request")
|
||||
|
||||
print(f"MCP tool invocation: {self.mcp_tool_id}...", flush=True)
|
||||
logger.debug(f"MCP tool invocation: {self.mcp_tool_id}...")
|
||||
output = await client.invoke(
|
||||
name = self.mcp_tool_id,
|
||||
parameters = arguments, # Pass the actual arguments
|
||||
)
|
||||
|
||||
print(output)
|
||||
logger.debug(f"MCP tool output: {output}")
|
||||
|
||||
if isinstance(output, str):
|
||||
return output
|
||||
|
|
@ -94,7 +98,7 @@ class PromptImpl:
|
|||
|
||||
async def invoke(self, **arguments):
|
||||
client = self.context("prompt-request")
|
||||
print(f"Prompt template invocation: {self.template_id}...", flush=True)
|
||||
logger.debug(f"Prompt template invocation: {self.template_id}...")
|
||||
return await client.prompt(
|
||||
id=self.template_id,
|
||||
variables=arguments
|
||||
|
|
|
|||
|
|
@ -4,12 +4,16 @@ Simple decoder, accepts text documents on input, outputs chunks from the
|
|||
as text as separate output objects.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
||||
from prometheus_client import Histogram
|
||||
|
||||
from ... schema import TextDocument, Chunk
|
||||
from ... base import FlowProcessor, ConsumerSpec, ProducerSpec
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "chunker"
|
||||
|
||||
class Processor(FlowProcessor):
|
||||
|
|
@ -54,12 +58,12 @@ class Processor(FlowProcessor):
|
|||
)
|
||||
)
|
||||
|
||||
print("Chunker initialised", flush=True)
|
||||
logger.info("Recursive chunker initialized")
|
||||
|
||||
async def on_message(self, msg, consumer, flow):
|
||||
|
||||
v = msg.value()
|
||||
print(f"Chunking {v.metadata.id}...", flush=True)
|
||||
logger.info(f"Chunking document {v.metadata.id}...")
|
||||
|
||||
texts = self.text_splitter.create_documents(
|
||||
[v.text.decode("utf-8")]
|
||||
|
|
@ -67,7 +71,7 @@ class Processor(FlowProcessor):
|
|||
|
||||
for ix, chunk in enumerate(texts):
|
||||
|
||||
print("Chunk", len(chunk.page_content), flush=True)
|
||||
logger.debug(f"Created chunk of size {len(chunk.page_content)}")
|
||||
|
||||
r = Chunk(
|
||||
metadata=v.metadata,
|
||||
|
|
@ -80,7 +84,7 @@ class Processor(FlowProcessor):
|
|||
|
||||
await flow("output").send(r)
|
||||
|
||||
print("Done.", flush=True)
|
||||
logger.debug("Document chunking complete")
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
|
|
|||
|
|
@ -4,12 +4,16 @@ Simple decoder, accepts text documents on input, outputs chunks from the
|
|||
as text as separate output objects.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from langchain_text_splitters import TokenTextSplitter
|
||||
from prometheus_client import Histogram
|
||||
|
||||
from ... schema import TextDocument, Chunk
|
||||
from ... base import FlowProcessor
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "chunker"
|
||||
|
||||
class Processor(FlowProcessor):
|
||||
|
|
@ -53,12 +57,12 @@ class Processor(FlowProcessor):
|
|||
)
|
||||
)
|
||||
|
||||
print("Chunker initialised", flush=True)
|
||||
logger.info("Token chunker initialized")
|
||||
|
||||
async def on_message(self, msg, consumer, flow):
|
||||
|
||||
v = msg.value()
|
||||
print(f"Chunking {v.metadata.id}...", flush=True)
|
||||
logger.info(f"Chunking document {v.metadata.id}...")
|
||||
|
||||
texts = self.text_splitter.create_documents(
|
||||
[v.text.decode("utf-8")]
|
||||
|
|
@ -66,7 +70,7 @@ class Processor(FlowProcessor):
|
|||
|
||||
for ix, chunk in enumerate(texts):
|
||||
|
||||
print("Chunk", len(chunk.page_content), flush=True)
|
||||
logger.debug(f"Created chunk of size {len(chunk.page_content)}")
|
||||
|
||||
r = Chunk(
|
||||
metadata=v.metadata,
|
||||
|
|
@ -79,7 +83,7 @@ class Processor(FlowProcessor):
|
|||
|
||||
await flow("output").send(r)
|
||||
|
||||
print("Done.", flush=True)
|
||||
logger.debug("Document chunking complete")
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
|
|
|||
|
|
@ -1,9 +1,14 @@
|
|||
|
||||
import logging
|
||||
|
||||
from trustgraph.schema import ConfigResponse
|
||||
from trustgraph.schema import ConfigValue, Error
|
||||
|
||||
from ... tables.config import ConfigTableStore
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class ConfigurationClass:
|
||||
|
||||
async def keys(self):
|
||||
|
|
@ -228,7 +233,7 @@ class Configuration:
|
|||
|
||||
async def handle(self, msg):
|
||||
|
||||
print("Handle message ", msg.operation)
|
||||
logger.debug(f"Handling config message: {msg.operation}")
|
||||
|
||||
if msg.operation == "get":
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,10 @@
|
|||
|
||||
from trustgraph.schema import FlowResponse, Error
|
||||
import json
|
||||
import logging
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class FlowConfig:
|
||||
def __init__(self, config):
|
||||
|
|
@ -41,7 +45,7 @@ class FlowConfig:
|
|||
|
||||
async def handle_delete_class(self, msg):
|
||||
|
||||
print(msg)
|
||||
logger.debug(f"Flow config message: {msg}")
|
||||
|
||||
await self.config.get("flow-classes").delete(msg.class_name)
|
||||
|
||||
|
|
@ -218,7 +222,7 @@ class FlowConfig:
|
|||
|
||||
async def handle(self, msg):
|
||||
|
||||
print("Handle message ", msg.operation)
|
||||
logger.debug(f"Handling flow message: {msg.operation}")
|
||||
|
||||
if msg.operation == "list-classes":
|
||||
resp = await self.handle_list_classes(msg)
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@
|
|||
Config service. Manages system global configuration state
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from trustgraph.schema import Error
|
||||
|
||||
from trustgraph.schema import ConfigRequest, ConfigResponse, ConfigPush
|
||||
|
|
@ -20,6 +22,9 @@ from . flow import FlowConfig
|
|||
from ... base import ProcessorMetrics, ConsumerMetrics, ProducerMetrics
|
||||
from ... base import Consumer, Producer
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# FIXME: How to ensure this doesn't conflict with other usage?
|
||||
keyspace = "config"
|
||||
|
||||
|
|
@ -146,7 +151,7 @@ class Processor(AsyncProcessor):
|
|||
|
||||
self.flow = FlowConfig(self.config)
|
||||
|
||||
print("Service initialised.")
|
||||
logger.info("Config service initialized")
|
||||
|
||||
async def start(self):
|
||||
|
||||
|
|
@ -172,7 +177,7 @@ class Processor(AsyncProcessor):
|
|||
|
||||
# Race condition, should make sure version & config sync
|
||||
|
||||
print("Pushed version ", await self.config.get_version())
|
||||
logger.info(f"Pushed configuration version {await self.config.get_version()}")
|
||||
|
||||
async def on_config_request(self, msg, consumer, flow):
|
||||
|
||||
|
|
@ -183,7 +188,7 @@ class Processor(AsyncProcessor):
|
|||
# Sender-produced ID
|
||||
id = msg.properties()["id"]
|
||||
|
||||
print(f"Handling {id}...", flush=True)
|
||||
logger.info(f"Handling config request {id}...")
|
||||
|
||||
resp = await self.config.handle(v)
|
||||
|
||||
|
|
@ -214,7 +219,7 @@ class Processor(AsyncProcessor):
|
|||
# Sender-produced ID
|
||||
id = msg.properties()["id"]
|
||||
|
||||
print(f"Handling {id}...", flush=True)
|
||||
logger.info(f"Handling flow request {id}...")
|
||||
|
||||
resp = await self.flow.handle(v)
|
||||
|
||||
|
|
|
|||
|
|
@ -8,6 +8,10 @@ from .. base import Publisher
|
|||
import base64
|
||||
import asyncio
|
||||
import uuid
|
||||
import logging
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class KnowledgeManager:
|
||||
|
||||
|
|
@ -26,7 +30,7 @@ class KnowledgeManager:
|
|||
|
||||
async def delete_kg_core(self, request, respond):
|
||||
|
||||
print("Deleting core...", flush=True)
|
||||
logger.info("Deleting knowledge core...")
|
||||
|
||||
await self.table_store.delete_kg_core(
|
||||
request.user, request.id
|
||||
|
|
@ -44,7 +48,7 @@ class KnowledgeManager:
|
|||
|
||||
async def get_kg_core(self, request, respond):
|
||||
|
||||
print("Get core...", flush=True)
|
||||
logger.info("Getting knowledge core...")
|
||||
|
||||
async def publish_triples(t):
|
||||
await respond(
|
||||
|
|
@ -82,7 +86,7 @@ class KnowledgeManager:
|
|||
publish_ge,
|
||||
)
|
||||
|
||||
print("Get complete", flush=True)
|
||||
logger.debug("Knowledge core retrieval complete")
|
||||
|
||||
await respond(
|
||||
KnowledgeResponse(
|
||||
|
|
@ -158,13 +162,13 @@ class KnowledgeManager:
|
|||
|
||||
async def core_loader(self):
|
||||
|
||||
print("Running...", flush=True)
|
||||
logger.info("Knowledge background processor running...")
|
||||
while True:
|
||||
|
||||
print("Wait for next load...", flush=True)
|
||||
logger.debug("Waiting for next load...")
|
||||
request, respond = await self.loader_queue.get()
|
||||
|
||||
print("Loading...", request.id, flush=True)
|
||||
logger.info(f"Loading knowledge: {request.id}")
|
||||
|
||||
try:
|
||||
|
||||
|
|
@ -204,7 +208,7 @@ class KnowledgeManager:
|
|||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
logger.error(f"Knowledge exception: {e}", exc_info=True)
|
||||
await respond(
|
||||
KnowledgeResponse(
|
||||
error = Error(
|
||||
|
|
@ -219,15 +223,15 @@ class KnowledgeManager:
|
|||
)
|
||||
|
||||
|
||||
print("Going to start loading...", flush=True)
|
||||
logger.debug("Starting knowledge loading process...")
|
||||
|
||||
try:
|
||||
|
||||
t_pub = None
|
||||
ge_pub = None
|
||||
|
||||
print(t_q, flush=True)
|
||||
print(ge_q, flush=True)
|
||||
logger.debug(f"Triples queue: {t_q}")
|
||||
logger.debug(f"Graph embeddings queue: {ge_q}")
|
||||
|
||||
t_pub = Publisher(
|
||||
self.flow_config.pulsar_client, t_q,
|
||||
|
|
@ -238,7 +242,7 @@ class KnowledgeManager:
|
|||
schema=GraphEmbeddings
|
||||
)
|
||||
|
||||
print("Start publishers...", flush=True)
|
||||
logger.debug("Starting publishers...")
|
||||
|
||||
await t_pub.start()
|
||||
await ge_pub.start()
|
||||
|
|
@ -246,7 +250,7 @@ class KnowledgeManager:
|
|||
async def publish_triples(t):
|
||||
await t_pub.send(None, t)
|
||||
|
||||
print("Publish triples...", flush=True)
|
||||
logger.debug("Publishing triples...")
|
||||
|
||||
# Remove doc table row
|
||||
await self.table_store.get_triples(
|
||||
|
|
@ -258,7 +262,7 @@ class KnowledgeManager:
|
|||
async def publish_ge(g):
|
||||
await ge_pub.send(None, g)
|
||||
|
||||
print("Publish GEs...", flush=True)
|
||||
logger.debug("Publishing graph embeddings...")
|
||||
|
||||
# Remove doc table row
|
||||
await self.table_store.get_graph_embeddings(
|
||||
|
|
@ -267,19 +271,19 @@ class KnowledgeManager:
|
|||
publish_ge,
|
||||
)
|
||||
|
||||
print("Completed that.", flush=True)
|
||||
logger.debug("Knowledge loading completed")
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
logger.error(f"Knowledge exception: {e}", exc_info=True)
|
||||
|
||||
finally:
|
||||
|
||||
print("Stopping publishers...", flush=True)
|
||||
logger.debug("Stopping publishers...")
|
||||
|
||||
if t_pub: await t_pub.stop()
|
||||
if ge_pub: await ge_pub.stop()
|
||||
|
||||
print("Done", flush=True)
|
||||
logger.debug("Knowledge processing done")
|
||||
|
||||
continue
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ from functools import partial
|
|||
import asyncio
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
|
||||
from .. base import AsyncProcessor, Consumer, Producer, Publisher, Subscriber
|
||||
from .. base import ConsumerMetrics, ProducerMetrics
|
||||
|
|
@ -21,6 +22,9 @@ from .. exceptions import RequestError
|
|||
|
||||
from . knowledge import KnowledgeManager
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "knowledge"
|
||||
|
||||
default_knowledge_request_queue = knowledge_request_queue
|
||||
|
|
@ -96,7 +100,7 @@ class Processor(AsyncProcessor):
|
|||
|
||||
self.flows = {}
|
||||
|
||||
print("Initialised.", flush=True)
|
||||
logger.info("Knowledge service initialized")
|
||||
|
||||
async def start(self):
|
||||
|
||||
|
|
@ -106,7 +110,7 @@ class Processor(AsyncProcessor):
|
|||
|
||||
async def on_knowledge_config(self, config, version):
|
||||
|
||||
print("config version", version)
|
||||
logger.info(f"Configuration version: {version}")
|
||||
|
||||
if "flows" in config:
|
||||
|
||||
|
|
@ -115,14 +119,14 @@ class Processor(AsyncProcessor):
|
|||
for k, v in config["flows"].items()
|
||||
}
|
||||
|
||||
print(self.flows)
|
||||
logger.debug(f"Flows: {self.flows}")
|
||||
|
||||
async def process_request(self, v, id):
|
||||
|
||||
if v.operation is None:
|
||||
raise RequestError("Null operation")
|
||||
|
||||
print("request", v.operation)
|
||||
logger.debug(f"Knowledge request: {v.operation}")
|
||||
|
||||
impls = {
|
||||
"list-kg-cores": self.knowledge.list_kg_cores,
|
||||
|
|
@ -150,7 +154,7 @@ class Processor(AsyncProcessor):
|
|||
|
||||
id = msg.properties()["id"]
|
||||
|
||||
print(f"Handling input {id}...", flush=True)
|
||||
logger.info(f"Handling knowledge input {id}...")
|
||||
|
||||
try:
|
||||
|
||||
|
|
@ -187,7 +191,7 @@ class Processor(AsyncProcessor):
|
|||
|
||||
return
|
||||
|
||||
print("Done.", flush=True)
|
||||
logger.debug("Knowledge input processing complete")
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
|
|
|||
|
|
@ -19,6 +19,10 @@ from ... schema import document_ingest_queue, text_ingest_queue
|
|||
from ... log_level import LogLevel
|
||||
from ... base import InputOutputProcessor
|
||||
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
module = "ocr"
|
||||
|
||||
default_subscriber = module
|
||||
|
|
@ -94,18 +98,18 @@ class Processor(InputOutputProcessor):
|
|||
# Used with Mistral doc upload
|
||||
self.unique_id = str(uuid.uuid4())
|
||||
|
||||
print("PDF inited")
|
||||
logger.info("PDF inited")
|
||||
|
||||
def ocr(self, blob):
|
||||
|
||||
print("Parse PDF...", flush=True)
|
||||
logger.debug("Parse PDF...")
|
||||
|
||||
pdfbuf = BytesIO(blob)
|
||||
pdf = PdfReader(pdfbuf)
|
||||
|
||||
for chunk in chunks(pdf.pages, pages_per_chunk):
|
||||
|
||||
print("Get next pages...", flush=True)
|
||||
logger.debug("Get next pages...")
|
||||
|
||||
part = PdfWriter()
|
||||
for page in chunk:
|
||||
|
|
@ -114,7 +118,7 @@ class Processor(InputOutputProcessor):
|
|||
buf = BytesIO()
|
||||
part.write_stream(buf)
|
||||
|
||||
print("Upload chunk...", flush=True)
|
||||
logger.debug("Upload chunk...")
|
||||
|
||||
uploaded_file = self.mistral.files.upload(
|
||||
file={
|
||||
|
|
@ -128,7 +132,7 @@ class Processor(InputOutputProcessor):
|
|||
file_id=uploaded_file.id, expiry=1
|
||||
)
|
||||
|
||||
print("OCR...", flush=True)
|
||||
logger.debug("OCR...")
|
||||
|
||||
processed = self.mistral.ocr.process(
|
||||
model="mistral-ocr-latest",
|
||||
|
|
@ -139,21 +143,21 @@ class Processor(InputOutputProcessor):
|
|||
}
|
||||
)
|
||||
|
||||
print("Extract markdown...", flush=True)
|
||||
logger.debug("Extract markdown...")
|
||||
|
||||
markdown = get_combined_markdown(processed)
|
||||
|
||||
print("OCR complete.", flush=True)
|
||||
logger.info("OCR complete.")
|
||||
|
||||
return markdown
|
||||
|
||||
async def on_message(self, msg, consumer):
|
||||
|
||||
print("PDF message received")
|
||||
logger.debug("PDF message received")
|
||||
|
||||
v = msg.value()
|
||||
|
||||
print(f"Decoding {v.metadata.id}...", flush=True)
|
||||
logger.info(f"Decoding {v.metadata.id}...")
|
||||
|
||||
markdown = self.ocr(base64.b64decode(v.data))
|
||||
|
||||
|
|
@ -164,7 +168,7 @@ class Processor(InputOutputProcessor):
|
|||
|
||||
await consumer.q.output.send(r)
|
||||
|
||||
print("Done.", flush=True)
|
||||
logger.info("Done.")
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
|
|
|||
|
|
@ -6,11 +6,15 @@ PDF document as text as separate output objects.
|
|||
|
||||
import tempfile
|
||||
import base64
|
||||
import logging
|
||||
from langchain_community.document_loaders import PyPDFLoader
|
||||
|
||||
from ... schema import Document, TextDocument, Metadata
|
||||
from ... base import FlowProcessor, ConsumerSpec, ProducerSpec
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "pdf-decoder"
|
||||
|
||||
class Processor(FlowProcessor):
|
||||
|
|
@ -40,15 +44,15 @@ class Processor(FlowProcessor):
|
|||
)
|
||||
)
|
||||
|
||||
print("PDF inited", flush=True)
|
||||
logger.info("PDF decoder initialized")
|
||||
|
||||
async def on_message(self, msg, consumer, flow):
|
||||
|
||||
print("PDF message received", flush=True)
|
||||
logger.debug("PDF message received")
|
||||
|
||||
v = msg.value()
|
||||
|
||||
print(f"Decoding {v.metadata.id}...", flush=True)
|
||||
logger.info(f"Decoding PDF {v.metadata.id}...")
|
||||
|
||||
with tempfile.NamedTemporaryFile(delete_on_close=False) as fp:
|
||||
|
||||
|
|
@ -62,7 +66,7 @@ class Processor(FlowProcessor):
|
|||
|
||||
for ix, page in enumerate(pages):
|
||||
|
||||
print("page", ix, flush=True)
|
||||
logger.debug(f"Processing page {ix}")
|
||||
|
||||
r = TextDocument(
|
||||
metadata=v.metadata,
|
||||
|
|
@ -71,7 +75,7 @@ class Processor(FlowProcessor):
|
|||
|
||||
await flow("output").send(r)
|
||||
|
||||
print("Done.", flush=True)
|
||||
logger.debug("PDF decoding complete")
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
|
|
|||
|
|
@ -1,6 +1,9 @@
|
|||
|
||||
from pymilvus import MilvusClient, CollectionSchema, FieldSchema, DataType
|
||||
import time
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class DocVectors:
|
||||
|
||||
|
|
@ -21,7 +24,7 @@ class DocVectors:
|
|||
|
||||
# Next time to reload - this forces a reload at next window
|
||||
self.next_reload = time.time() + self.reload_time
|
||||
print("Reload at", self.next_reload)
|
||||
logger.debug(f"Reload at {self.next_reload}")
|
||||
|
||||
def init_collection(self, dimension):
|
||||
|
||||
|
|
@ -110,12 +113,12 @@ class DocVectors:
|
|||
}
|
||||
}
|
||||
|
||||
print("Loading...")
|
||||
logger.debug("Loading...")
|
||||
self.client.load_collection(
|
||||
collection_name=coll,
|
||||
)
|
||||
|
||||
print("Searching...")
|
||||
logger.debug("Searching...")
|
||||
|
||||
res = self.client.search(
|
||||
collection_name=coll,
|
||||
|
|
@ -128,7 +131,7 @@ class DocVectors:
|
|||
|
||||
# If reload time has passed, unload collection
|
||||
if time.time() > self.next_reload:
|
||||
print("Unloading, reload at", self.next_reload)
|
||||
logger.debug(f"Unloading, reload at {self.next_reload}")
|
||||
self.client.release_collection(
|
||||
collection_name=coll,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,9 @@
|
|||
|
||||
from pymilvus import MilvusClient, CollectionSchema, FieldSchema, DataType
|
||||
import time
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class EntityVectors:
|
||||
|
||||
|
|
@ -21,7 +24,7 @@ class EntityVectors:
|
|||
|
||||
# Next time to reload - this forces a reload at next window
|
||||
self.next_reload = time.time() + self.reload_time
|
||||
print("Reload at", self.next_reload)
|
||||
logger.debug(f"Reload at {self.next_reload}")
|
||||
|
||||
def init_collection(self, dimension):
|
||||
|
||||
|
|
@ -110,12 +113,12 @@ class EntityVectors:
|
|||
}
|
||||
}
|
||||
|
||||
print("Loading...")
|
||||
logger.debug("Loading...")
|
||||
self.client.load_collection(
|
||||
collection_name=coll,
|
||||
)
|
||||
|
||||
print("Searching...")
|
||||
logger.debug("Searching...")
|
||||
|
||||
res = self.client.search(
|
||||
collection_name=coll,
|
||||
|
|
@ -128,7 +131,7 @@ class EntityVectors:
|
|||
|
||||
# If reload time has passed, unload collection
|
||||
if time.time() > self.next_reload:
|
||||
print("Unloading, reload at", self.next_reload)
|
||||
logger.debug(f"Unloading, reload at {self.next_reload}")
|
||||
self.client.release_collection(
|
||||
collection_name=coll,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,9 @@
|
|||
|
||||
from pymilvus import MilvusClient, CollectionSchema, FieldSchema, DataType
|
||||
import time
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class ObjectVectors:
|
||||
|
||||
|
|
@ -21,7 +24,7 @@ class ObjectVectors:
|
|||
|
||||
# Next time to reload - this forces a reload at next window
|
||||
self.next_reload = time.time() + self.reload_time
|
||||
print("Reload at", self.next_reload)
|
||||
logger.debug(f"Reload at {self.next_reload}")
|
||||
|
||||
def init_collection(self, dimension, name):
|
||||
|
||||
|
|
@ -126,12 +129,12 @@ class ObjectVectors:
|
|||
}
|
||||
}
|
||||
|
||||
print("Loading...")
|
||||
logger.debug("Loading...")
|
||||
self.client.load_collection(
|
||||
collection_name=coll,
|
||||
)
|
||||
|
||||
print("Searching...")
|
||||
logger.debug("Searching...")
|
||||
|
||||
res = self.client.search(
|
||||
collection_name=coll,
|
||||
|
|
@ -144,7 +147,7 @@ class ObjectVectors:
|
|||
|
||||
# If reload time has passed, unload collection
|
||||
if time.time() > self.next_reload:
|
||||
print("Unloading, reload at", self.next_reload)
|
||||
logger.debug(f"Unloading, reload at {self.next_reload}")
|
||||
self.client.release_collection(
|
||||
collection_name=coll,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -11,6 +11,10 @@ from ... schema import EmbeddingsRequest, EmbeddingsResponse
|
|||
from ... base import FlowProcessor, RequestResponseSpec, ConsumerSpec
|
||||
from ... base import ProducerSpec
|
||||
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "document-embeddings"
|
||||
|
||||
class Processor(FlowProcessor):
|
||||
|
|
@ -52,7 +56,7 @@ class Processor(FlowProcessor):
|
|||
async def on_message(self, msg, consumer, flow):
|
||||
|
||||
v = msg.value()
|
||||
print(f"Indexing {v.metadata.id}...", flush=True)
|
||||
logger.info(f"Indexing {v.metadata.id}...")
|
||||
|
||||
try:
|
||||
|
||||
|
|
@ -79,12 +83,12 @@ class Processor(FlowProcessor):
|
|||
await flow("output").send(r)
|
||||
|
||||
except Exception as e:
|
||||
print("Exception:", e, flush=True)
|
||||
logger.error("Exception occurred", exc_info=True)
|
||||
|
||||
# Retry
|
||||
raise e
|
||||
|
||||
print("Done.", flush=True)
|
||||
logger.info("Done.")
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
|
|
|||
|
|
@ -4,10 +4,15 @@ Embeddings service, applies an embeddings model using fastembed
|
|||
Input is text, output is embeddings vector.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from ... base import EmbeddingsService
|
||||
|
||||
from fastembed import TextEmbedding
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "embeddings"
|
||||
|
||||
default_model="sentence-transformers/all-MiniLM-L6-v2"
|
||||
|
|
@ -22,7 +27,7 @@ class Processor(EmbeddingsService):
|
|||
**params | { "model": model }
|
||||
)
|
||||
|
||||
print("Get model...", flush=True)
|
||||
logger.info("Loading FastEmbed model...")
|
||||
self.embeddings = TextEmbedding(model_name = model)
|
||||
|
||||
async def on_embeddings(self, text):
|
||||
|
|
|
|||
|
|
@ -11,6 +11,10 @@ from ... schema import EmbeddingsRequest, EmbeddingsResponse
|
|||
from ... base import FlowProcessor, EmbeddingsClientSpec, ConsumerSpec
|
||||
from ... base import ProducerSpec
|
||||
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "graph-embeddings"
|
||||
|
||||
class Processor(FlowProcessor):
|
||||
|
|
@ -50,7 +54,7 @@ class Processor(FlowProcessor):
|
|||
async def on_message(self, msg, consumer, flow):
|
||||
|
||||
v = msg.value()
|
||||
print(f"Indexing {v.metadata.id}...", flush=True)
|
||||
logger.info(f"Indexing {v.metadata.id}...")
|
||||
|
||||
entities = []
|
||||
|
||||
|
|
@ -77,12 +81,12 @@ class Processor(FlowProcessor):
|
|||
await flow("output").send(r)
|
||||
|
||||
except Exception as e:
|
||||
print("Exception:", e, flush=True)
|
||||
logger.error("Exception occurred", exc_info=True)
|
||||
|
||||
# Retry
|
||||
raise e
|
||||
|
||||
print("Done.", flush=True)
|
||||
logger.info("Done.")
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
|
|
|||
|
|
@ -10,6 +10,9 @@ from trustgraph.schema import encyclopedia_lookup_response_queue
|
|||
from trustgraph.log_level import LogLevel
|
||||
from trustgraph.base import ConsumerProducer
|
||||
import requests
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
module = "wikipedia"
|
||||
|
||||
|
|
@ -46,7 +49,7 @@ class Processor(ConsumerProducer):
|
|||
# Sender-produced ID
|
||||
id = msg.properties()["id"]
|
||||
|
||||
print(f"Handling {v.kind} / {v.term}...", flush=True)
|
||||
logger.info(f"Handling {v.kind} / {v.term}...")
|
||||
|
||||
try:
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import re
|
||||
import json
|
||||
import urllib.parse
|
||||
import logging
|
||||
|
||||
from ....schema import Chunk, Triple, Triples, Metadata, Value
|
||||
from ....schema import EntityContext, EntityContexts
|
||||
|
|
@ -12,6 +13,9 @@ from ....base import AgentClientSpec
|
|||
|
||||
from ....template import PromptManager
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "kg-extract-agent"
|
||||
default_concurrency = 1
|
||||
default_template_id = "agent-kg-extract"
|
||||
|
|
@ -74,10 +78,10 @@ class Processor(FlowProcessor):
|
|||
|
||||
async def on_prompt_config(self, config, version):
|
||||
|
||||
print("Loading configuration version", version, flush=True)
|
||||
logger.info(f"Loading configuration version {version}")
|
||||
|
||||
if self.config_key not in config:
|
||||
print(f"No key {self.config_key} in config", flush=True)
|
||||
logger.warning(f"No key {self.config_key} in config")
|
||||
return
|
||||
|
||||
config = config[self.config_key]
|
||||
|
|
@ -86,12 +90,12 @@ class Processor(FlowProcessor):
|
|||
|
||||
self.manager.load_config(config)
|
||||
|
||||
print("Prompt configuration reloaded.", flush=True)
|
||||
logger.info("Prompt configuration reloaded")
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
print("Configuration reload failed", flush=True)
|
||||
logger.error(f"Configuration reload exception: {e}", exc_info=True)
|
||||
logger.error("Configuration reload failed")
|
||||
|
||||
def to_uri(self, text):
|
||||
return TRUSTGRAPH_ENTITIES + urllib.parse.quote(text)
|
||||
|
|
@ -142,7 +146,7 @@ class Processor(FlowProcessor):
|
|||
# Extract chunk text
|
||||
chunk_text = v.chunk.decode('utf-8')
|
||||
|
||||
print("Got chunk", flush=True)
|
||||
logger.debug("Processing chunk for agent extraction")
|
||||
|
||||
prompt = self.manager.render(
|
||||
self.template_id,
|
||||
|
|
@ -151,11 +155,11 @@ class Processor(FlowProcessor):
|
|||
}
|
||||
)
|
||||
|
||||
print("Prompt:", prompt, flush=True)
|
||||
logger.debug(f"Agent prompt: {prompt}")
|
||||
|
||||
async def handle(response):
|
||||
|
||||
print("Response:", response, flush=True)
|
||||
logger.debug(f"Agent response: {response}")
|
||||
|
||||
if response.error is not None:
|
||||
if response.error.message:
|
||||
|
|
@ -201,7 +205,7 @@ class Processor(FlowProcessor):
|
|||
)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error processing chunk: {e}", flush=True)
|
||||
logger.error(f"Error processing chunk: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
def process_extraction_data(self, data, metadata):
|
||||
|
|
|
|||
|
|
@ -7,8 +7,12 @@ entity/context definitions for embedding.
|
|||
|
||||
import json
|
||||
import urllib.parse
|
||||
import logging
|
||||
|
||||
from .... schema import Chunk, Triple, Triples, Metadata, Value
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
from .... schema import EntityContext, EntityContexts
|
||||
from .... schema import PromptRequest, PromptResponse
|
||||
from .... rdf import TRUSTGRAPH_ENTITIES, DEFINITION, RDF_LABEL, SUBJECT_OF
|
||||
|
|
@ -94,11 +98,11 @@ class Processor(FlowProcessor):
|
|||
async def on_message(self, msg, consumer, flow):
|
||||
|
||||
v = msg.value()
|
||||
print(f"Indexing {v.metadata.id}...", flush=True)
|
||||
logger.info(f"Extracting definitions from {v.metadata.id}...")
|
||||
|
||||
chunk = v.chunk.decode("utf-8")
|
||||
|
||||
print(chunk, flush=True)
|
||||
logger.debug(f"Processing chunk: {chunk[:200]}...") # Log first 200 chars
|
||||
|
||||
try:
|
||||
|
||||
|
|
@ -108,13 +112,13 @@ class Processor(FlowProcessor):
|
|||
text = chunk
|
||||
)
|
||||
|
||||
print("Response", defs, flush=True)
|
||||
logger.debug(f"Definitions response: {defs}")
|
||||
|
||||
if type(defs) != list:
|
||||
raise RuntimeError("Expecting array in prompt response")
|
||||
|
||||
except Exception as e:
|
||||
print("Prompt exception:", e, flush=True)
|
||||
logger.error(f"Prompt exception: {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
triples = []
|
||||
|
|
@ -187,9 +191,9 @@ class Processor(FlowProcessor):
|
|||
)
|
||||
|
||||
except Exception as e:
|
||||
print("Exception: ", e, flush=True)
|
||||
logger.error(f"Definitions extraction exception: {e}", exc_info=True)
|
||||
|
||||
print("Done.", flush=True)
|
||||
logger.debug("Definitions extraction complete")
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
|
|
|||
|
|
@ -6,8 +6,12 @@ graph edges.
|
|||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import urllib.parse
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from .... schema import Chunk, Triple, Triples
|
||||
from .... schema import Metadata, Value
|
||||
from .... schema import PromptRequest, PromptResponse
|
||||
|
|
@ -78,11 +82,11 @@ class Processor(FlowProcessor):
|
|||
async def on_message(self, msg, consumer, flow):
|
||||
|
||||
v = msg.value()
|
||||
print(f"Indexing {v.metadata.id}...", flush=True)
|
||||
logger.info(f"Extracting relationships from {v.metadata.id}...")
|
||||
|
||||
chunk = v.chunk.decode("utf-8")
|
||||
|
||||
print(chunk, flush=True)
|
||||
logger.debug(f"Processing chunk: {chunk[:100]}..." if len(chunk) > 100 else f"Processing chunk: {chunk}")
|
||||
|
||||
try:
|
||||
|
||||
|
|
@ -92,13 +96,13 @@ class Processor(FlowProcessor):
|
|||
text = chunk
|
||||
)
|
||||
|
||||
print("Response", rels, flush=True)
|
||||
logger.debug(f"Prompt response: {rels}")
|
||||
|
||||
if type(rels) != list:
|
||||
raise RuntimeError("Expecting array in prompt response")
|
||||
|
||||
except Exception as e:
|
||||
print("Prompt exception:", e, flush=True)
|
||||
logger.error(f"Prompt exception: {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
triples = []
|
||||
|
|
@ -189,9 +193,9 @@ class Processor(FlowProcessor):
|
|||
)
|
||||
|
||||
except Exception as e:
|
||||
print("Exception: ", e, flush=True)
|
||||
logger.error(f"Relationship extraction exception: {e}", exc_info=True)
|
||||
|
||||
print("Done.", flush=True)
|
||||
logger.debug("Relationship extraction complete")
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
|
|
|||
|
|
@ -6,6 +6,10 @@ get topics which are output as graph edges.
|
|||
|
||||
import urllib.parse
|
||||
import json
|
||||
import logging
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from .... schema import Chunk, Triple, Triples, Metadata, Value
|
||||
from .... schema import chunk_ingest_queue, triples_store_queue
|
||||
|
|
@ -81,7 +85,7 @@ class Processor(ConsumerProducer):
|
|||
async def handle(self, msg):
|
||||
|
||||
v = msg.value()
|
||||
print(f"Indexing {v.metadata.id}...", flush=True)
|
||||
logger.info(f"Extracting topics from {v.metadata.id}...")
|
||||
|
||||
chunk = v.chunk.decode("utf-8")
|
||||
|
||||
|
|
@ -110,9 +114,9 @@ class Processor(ConsumerProducer):
|
|||
)
|
||||
|
||||
except Exception as e:
|
||||
print("Exception: ", e, flush=True)
|
||||
logger.error(f"Topic extraction exception: {e}", exc_info=True)
|
||||
|
||||
print("Done.", flush=True)
|
||||
logger.debug("Topic extraction complete")
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
|
|
|||
|
|
@ -6,8 +6,12 @@ out a row of fields. Output as a vector plus object.
|
|||
|
||||
import urllib.parse
|
||||
import os
|
||||
import logging
|
||||
from pulsar.schema import JsonSchema
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from .... schema import ChunkEmbeddings, Rows, ObjectEmbeddings, Metadata
|
||||
from .... schema import RowSchema, Field
|
||||
from .... schema import chunk_embeddings_ingest_queue, rows_store_queue
|
||||
|
|
@ -75,7 +79,7 @@ class Processor(ConsumerProducer):
|
|||
flds = __class__.parse_fields(params["field"])
|
||||
|
||||
for fld in flds:
|
||||
print(fld)
|
||||
logger.debug(f"Field configuration: {fld}")
|
||||
|
||||
self.primary = None
|
||||
|
||||
|
|
@ -142,7 +146,7 @@ class Processor(ConsumerProducer):
|
|||
async def handle(self, msg):
|
||||
|
||||
v = msg.value()
|
||||
print(f"Indexing {v.metadata.id}...", flush=True)
|
||||
logger.info(f"Extracting rows from {v.metadata.id}...")
|
||||
|
||||
chunk = v.chunk.decode("utf-8")
|
||||
|
||||
|
|
@ -163,12 +167,12 @@ class Processor(ConsumerProducer):
|
|||
)
|
||||
|
||||
for row in rows:
|
||||
print(row)
|
||||
logger.debug(f"Extracted row: {row}")
|
||||
|
||||
except Exception as e:
|
||||
print("Exception: ", e, flush=True)
|
||||
logger.error(f"Row extraction exception: {e}", exc_info=True)
|
||||
|
||||
print("Done.", flush=True)
|
||||
logger.debug("Row extraction complete")
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
|
|
|||
|
|
@ -18,6 +18,9 @@ import logging
|
|||
import os
|
||||
import base64
|
||||
import uuid
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
import json
|
||||
|
||||
import pulsar
|
||||
|
|
@ -48,7 +51,7 @@ class ConfigReceiver:
|
|||
|
||||
v = msg.value()
|
||||
|
||||
print(f"Config version", v.version)
|
||||
logger.info(f"Config version: {v.version}")
|
||||
|
||||
if "flows" in v.config:
|
||||
|
||||
|
|
@ -68,29 +71,29 @@ class ConfigReceiver:
|
|||
del self.flows[k]
|
||||
|
||||
except Exception as e:
|
||||
print(f"Exception: {e}", flush=True)
|
||||
logger.error(f"Config processing exception: {e}", exc_info=True)
|
||||
|
||||
async def start_flow(self, id, flow):
|
||||
|
||||
print("Start flow", id)
|
||||
logger.info(f"Starting flow: {id}")
|
||||
|
||||
for handler in self.flow_handlers:
|
||||
|
||||
try:
|
||||
await handler.start_flow(id, flow)
|
||||
except Exception as e:
|
||||
print(f"Exception: {e}", flush=True)
|
||||
logger.error(f"Config processing exception: {e}", exc_info=True)
|
||||
|
||||
async def stop_flow(self, id, flow):
|
||||
|
||||
print("Stop flow", id)
|
||||
logger.info(f"Stopping flow: {id}")
|
||||
|
||||
for handler in self.flow_handlers:
|
||||
|
||||
try:
|
||||
await handler.stop_flow(id, flow)
|
||||
except Exception as e:
|
||||
print(f"Exception: {e}", flush=True)
|
||||
logger.error(f"Config processing exception: {e}", exc_info=True)
|
||||
|
||||
async def config_loader(self):
|
||||
|
||||
|
|
@ -111,9 +114,9 @@ class ConfigReceiver:
|
|||
|
||||
await self.config_cons.start()
|
||||
|
||||
print("Waiting...")
|
||||
logger.debug("Waiting for config updates...")
|
||||
|
||||
print("Config consumer done. :/")
|
||||
logger.info("Config consumer finished")
|
||||
|
||||
async def start(self):
|
||||
|
||||
|
|
|
|||
|
|
@ -2,8 +2,12 @@
|
|||
import asyncio
|
||||
import uuid
|
||||
import msgpack
|
||||
import logging
|
||||
from . knowledge import KnowledgeRequestor
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class CoreExport:
|
||||
|
||||
def __init__(self, pulsar_client):
|
||||
|
|
@ -84,7 +88,7 @@ class CoreExport:
|
|||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e)
|
||||
logger.error(f"Core export exception: {e}", exc_info=True)
|
||||
|
||||
finally:
|
||||
|
||||
|
|
|
|||
|
|
@ -3,8 +3,12 @@ import asyncio
|
|||
import json
|
||||
import uuid
|
||||
import msgpack
|
||||
import logging
|
||||
from . knowledge import KnowledgeRequestor
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class CoreImport:
|
||||
|
||||
def __init__(self, pulsar_client):
|
||||
|
|
@ -80,14 +84,14 @@ class CoreImport:
|
|||
await kr.process(msg)
|
||||
|
||||
except Exception as e:
|
||||
print("Exception:", e)
|
||||
logger.error(f"Core import exception: {e}", exc_info=True)
|
||||
await error(str(e))
|
||||
|
||||
finally:
|
||||
|
||||
await kr.stop()
|
||||
|
||||
print("All done.")
|
||||
logger.info("Core import completed")
|
||||
response = await ok()
|
||||
await response.write_eof()
|
||||
|
||||
|
|
|
|||
|
|
@ -2,12 +2,16 @@
|
|||
import asyncio
|
||||
import queue
|
||||
import uuid
|
||||
import logging
|
||||
|
||||
from ... schema import DocumentEmbeddings
|
||||
from ... base import Subscriber
|
||||
|
||||
from . serialize import serialize_document_embeddings
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class DocumentEmbeddingsExport:
|
||||
|
||||
def __init__(
|
||||
|
|
@ -55,7 +59,7 @@ class DocumentEmbeddingsExport:
|
|||
continue
|
||||
|
||||
except Exception as e:
|
||||
print(f"Exception: {str(e)}", flush=True)
|
||||
logger.error(f"Exception: {str(e)}", exc_info=True)
|
||||
break
|
||||
|
||||
await subs.unsubscribe_all(id)
|
||||
|
|
|
|||
|
|
@ -1,11 +1,15 @@
|
|||
|
||||
import base64
|
||||
import logging
|
||||
|
||||
from ... schema import Document, Metadata
|
||||
from ... messaging import TranslatorRegistry
|
||||
|
||||
from . sender import ServiceSender
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class DocumentLoad(ServiceSender):
|
||||
def __init__(self, pulsar_client, queue):
|
||||
|
||||
|
|
@ -18,6 +22,6 @@ class DocumentLoad(ServiceSender):
|
|||
self.translator = TranslatorRegistry.get_request_translator("document")
|
||||
|
||||
def to_request(self, body):
|
||||
print("Document received")
|
||||
logger.info("Document received")
|
||||
return self.translator.to_pulsar(body)
|
||||
|
||||
|
|
|
|||
|
|
@ -2,12 +2,16 @@
|
|||
import asyncio
|
||||
import queue
|
||||
import uuid
|
||||
import logging
|
||||
|
||||
from ... schema import EntityContexts
|
||||
from ... base import Subscriber
|
||||
|
||||
from . serialize import serialize_entity_contexts
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class EntityContextsExport:
|
||||
|
||||
def __init__(
|
||||
|
|
@ -55,7 +59,7 @@ class EntityContextsExport:
|
|||
continue
|
||||
|
||||
except Exception as e:
|
||||
print(f"Exception: {str(e)}", flush=True)
|
||||
logger.error(f"Exception: {str(e)}", exc_info=True)
|
||||
break
|
||||
|
||||
await subs.unsubscribe_all(id)
|
||||
|
|
|
|||
|
|
@ -2,12 +2,16 @@
|
|||
import asyncio
|
||||
import queue
|
||||
import uuid
|
||||
import logging
|
||||
|
||||
from ... schema import GraphEmbeddings
|
||||
from ... base import Subscriber
|
||||
|
||||
from . serialize import serialize_graph_embeddings
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class GraphEmbeddingsExport:
|
||||
|
||||
def __init__(
|
||||
|
|
@ -55,7 +59,7 @@ class GraphEmbeddingsExport:
|
|||
continue
|
||||
|
||||
except Exception as e:
|
||||
print(f"Exception: {str(e)}", flush=True)
|
||||
logger.error(f"Exception: {str(e)}", exc_info=True)
|
||||
break
|
||||
|
||||
await subs.unsubscribe_all(id)
|
||||
|
|
|
|||
|
|
@ -2,6 +2,10 @@
|
|||
import asyncio
|
||||
from aiohttp import web
|
||||
import uuid
|
||||
import logging
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from . config import ConfigRequestor
|
||||
from . flow import FlowRequestor
|
||||
|
|
@ -92,12 +96,12 @@ class DispatcherManager:
|
|||
self.dispatchers = {}
|
||||
|
||||
async def start_flow(self, id, flow):
|
||||
print("Start flow", id)
|
||||
logger.info(f"Starting flow {id}")
|
||||
self.flows[id] = flow
|
||||
return
|
||||
|
||||
async def stop_flow(self, id, flow):
|
||||
print("Stop flow", id)
|
||||
logger.info(f"Stopping flow {id}")
|
||||
del self.flows[id]
|
||||
return
|
||||
|
||||
|
|
|
|||
|
|
@ -2,6 +2,10 @@
|
|||
import asyncio
|
||||
import queue
|
||||
import uuid
|
||||
import logging
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
MAX_OUTSTANDING_REQUESTS = 15
|
||||
WORKER_CLOSE_WAIT = 0.01
|
||||
|
|
@ -46,7 +50,7 @@ class Mux:
|
|||
))
|
||||
|
||||
except Exception as e:
|
||||
print("receive exception:", str(e), flush=True)
|
||||
logger.error(f"Receive exception: {str(e)}", exc_info=True)
|
||||
await self.ws.send_json({"error": str(e)})
|
||||
|
||||
async def maybe_tidy_workers(self, workers):
|
||||
|
|
@ -138,7 +142,7 @@ class Mux:
|
|||
|
||||
except Exception as e:
|
||||
# This is an internal working error, may not be recoverable
|
||||
print("run prepare exception:", e)
|
||||
logger.error(f"Run prepare exception: {e}", exc_info=True)
|
||||
await self.ws.send_json({"id": id, "error": str(e)})
|
||||
self.running.stop()
|
||||
|
||||
|
|
@ -155,7 +159,7 @@ class Mux:
|
|||
)
|
||||
|
||||
except Exception as e:
|
||||
print("Exception2:", e)
|
||||
logger.error(f"Exception in mux: {e}", exc_info=True)
|
||||
await self.ws.send_json({"error": str(e)})
|
||||
|
||||
self.running.stop()
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ class ServiceRequestor:
|
|||
q.get(), timeout=self.timeout
|
||||
)
|
||||
except Exception as e:
|
||||
print("Exception", e)
|
||||
logger.error(f"Request timeout exception: {e}", exc_info=True)
|
||||
raise RuntimeError("Timeout")
|
||||
|
||||
if resp.error:
|
||||
|
|
|
|||
|
|
@ -1,11 +1,15 @@
|
|||
|
||||
import base64
|
||||
import logging
|
||||
|
||||
from ... schema import TextDocument, Metadata
|
||||
from ... messaging import TranslatorRegistry
|
||||
|
||||
from . sender import ServiceSender
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class TextLoad(ServiceSender):
|
||||
def __init__(self, pulsar_client, queue):
|
||||
|
||||
|
|
@ -18,6 +22,6 @@ class TextLoad(ServiceSender):
|
|||
self.translator = TranslatorRegistry.get_request_translator("text-document")
|
||||
|
||||
def to_request(self, body):
|
||||
print("Text document received")
|
||||
logger.info("Text document received")
|
||||
return self.translator.to_pulsar(body)
|
||||
|
||||
|
|
|
|||
|
|
@ -2,12 +2,16 @@
|
|||
import asyncio
|
||||
import queue
|
||||
import uuid
|
||||
import logging
|
||||
|
||||
from ... schema import Triples
|
||||
from ... base import Subscriber
|
||||
|
||||
from . serialize import serialize_triples
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class TriplesExport:
|
||||
|
||||
def __init__(
|
||||
|
|
@ -55,7 +59,7 @@ class TriplesExport:
|
|||
continue
|
||||
|
||||
except Exception as e:
|
||||
print(f"Exception: {str(e)}", flush=True)
|
||||
logger.error(f"Exception: {str(e)}", exc_info=True)
|
||||
break
|
||||
|
||||
await subs.unsubscribe_all(id)
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ class ConstantEndpoint:
|
|||
|
||||
async def handle(self, request):
|
||||
|
||||
print(request.path, "...")
|
||||
logger.debug(f"Processing request: {request.path}")
|
||||
|
||||
try:
|
||||
ht = request.headers["Authorization"]
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ class MetricsEndpoint:
|
|||
|
||||
async def handle(self, request):
|
||||
|
||||
print(request.path, "...")
|
||||
logger.debug(f"Processing metrics request: {request.path}")
|
||||
|
||||
try:
|
||||
ht = request.headers["Authorization"]
|
||||
|
|
|
|||
|
|
@ -74,24 +74,24 @@ class SocketEndpoint:
|
|||
self.listener(ws, dispatcher, running)
|
||||
)
|
||||
|
||||
print("Created taskgroup, waiting...")
|
||||
logger.debug("Created task group, waiting for completion...")
|
||||
|
||||
# Wait for threads to complete
|
||||
|
||||
print("Task group closed")
|
||||
logger.debug("Task group closed")
|
||||
|
||||
# Finally?
|
||||
await dispatcher.destroy()
|
||||
|
||||
except ExceptionGroup as e:
|
||||
|
||||
print("Exception group:", flush=True)
|
||||
logger.error("Exception group occurred:", exc_info=True)
|
||||
|
||||
for se in e.exceptions:
|
||||
print(" Type:", type(se), flush=True)
|
||||
print(f" Exception: {se}", flush=True)
|
||||
logger.error(f" Exception type: {type(se)}")
|
||||
logger.error(f" Exception: {se}")
|
||||
except Exception as e:
|
||||
print("Socket exception:", e, flush=True)
|
||||
logger.error(f"Socket exception: {e}", exc_info=True)
|
||||
|
||||
await ws.close()
|
||||
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ class StreamEndpoint:
|
|||
|
||||
async def handle(self, request):
|
||||
|
||||
print(request.path, "...")
|
||||
logger.debug(f"Processing request: {request.path}")
|
||||
|
||||
try:
|
||||
ht = request.headers["Authorization"]
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ class VariableEndpoint:
|
|||
|
||||
async def handle(self, request):
|
||||
|
||||
print(request.path, "...")
|
||||
logger.debug(f"Processing request: {request.path}")
|
||||
|
||||
try:
|
||||
ht = request.headers["Authorization"]
|
||||
|
|
|
|||
|
|
@ -5,6 +5,10 @@ from .. exceptions import RequestError
|
|||
from minio import Minio
|
||||
import time
|
||||
import io
|
||||
import logging
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class BlobStore:
|
||||
|
||||
|
|
@ -23,7 +27,7 @@ class BlobStore:
|
|||
|
||||
self.bucket_name = bucket_name
|
||||
|
||||
print("Connected to minio", flush=True)
|
||||
logger.info("Connected to MinIO")
|
||||
|
||||
self.ensure_bucket()
|
||||
|
||||
|
|
@ -33,9 +37,9 @@ class BlobStore:
|
|||
found = self.minio.bucket_exists(self.bucket_name)
|
||||
if not found:
|
||||
self.minio.make_bucket(self.bucket_name)
|
||||
print("Created bucket", self.bucket_name, flush=True)
|
||||
logger.info(f"Created bucket {self.bucket_name}")
|
||||
else:
|
||||
print("Bucket", self.bucket_name, "already exists", flush=True)
|
||||
logger.debug(f"Bucket {self.bucket_name} already exists")
|
||||
|
||||
async def add(self, object_id, blob, kind):
|
||||
|
||||
|
|
@ -48,7 +52,7 @@ class BlobStore:
|
|||
content_type = kind,
|
||||
)
|
||||
|
||||
print("Add blob complete", flush=True)
|
||||
logger.debug("Add blob complete")
|
||||
|
||||
async def remove(self, object_id):
|
||||
|
||||
|
|
@ -58,7 +62,7 @@ class BlobStore:
|
|||
object_name = "doc/" + str(object_id),
|
||||
)
|
||||
|
||||
print("Remove blob complete", flush=True)
|
||||
logger.debug("Remove blob complete")
|
||||
|
||||
|
||||
async def get(self, object_id):
|
||||
|
|
|
|||
|
|
@ -5,9 +5,13 @@ from .. exceptions import RequestError
|
|||
from .. tables.library import LibraryTableStore
|
||||
from . blob_store import BlobStore
|
||||
import base64
|
||||
import logging
|
||||
|
||||
import uuid
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class Librarian:
|
||||
|
||||
def __init__(
|
||||
|
|
@ -45,20 +49,20 @@ class Librarian:
|
|||
# Create object ID for blob
|
||||
object_id = uuid.uuid4()
|
||||
|
||||
print("Add blob...")
|
||||
logger.debug("Adding blob...")
|
||||
|
||||
await self.blob_store.add(
|
||||
object_id, base64.b64decode(request.content),
|
||||
request.document_metadata.kind
|
||||
)
|
||||
|
||||
print("Add table...")
|
||||
logger.debug("Adding to table...")
|
||||
|
||||
await self.table_store.add_document(
|
||||
request.document_metadata, object_id
|
||||
)
|
||||
|
||||
print("Add complete", flush=True)
|
||||
logger.debug("Add complete")
|
||||
|
||||
return LibrarianResponse(
|
||||
error = None,
|
||||
|
|
@ -70,7 +74,7 @@ class Librarian:
|
|||
|
||||
async def remove_document(self, request):
|
||||
|
||||
print("Removing doc...")
|
||||
logger.debug("Removing document...")
|
||||
|
||||
if not await self.table_store.document_exists(
|
||||
request.user,
|
||||
|
|
@ -92,7 +96,7 @@ class Librarian:
|
|||
request.document_id
|
||||
)
|
||||
|
||||
print("Remove complete", flush=True)
|
||||
logger.debug("Remove complete")
|
||||
|
||||
return LibrarianResponse(
|
||||
error = None,
|
||||
|
|
@ -104,7 +108,7 @@ class Librarian:
|
|||
|
||||
async def update_document(self, request):
|
||||
|
||||
print("Updating doc...")
|
||||
logger.debug("Updating document...")
|
||||
|
||||
# You can't update the document ID, user or kind.
|
||||
|
||||
|
|
@ -116,7 +120,7 @@ class Librarian:
|
|||
|
||||
await self.table_store.update_document(request.document_metadata)
|
||||
|
||||
print("Update complete", flush=True)
|
||||
logger.debug("Update complete")
|
||||
|
||||
return LibrarianResponse(
|
||||
error = None,
|
||||
|
|
@ -128,14 +132,14 @@ class Librarian:
|
|||
|
||||
async def get_document_metadata(self, request):
|
||||
|
||||
print("Get doc...")
|
||||
logger.debug("Getting document metadata...")
|
||||
|
||||
doc = await self.table_store.get_document(
|
||||
request.user,
|
||||
request.document_id
|
||||
)
|
||||
|
||||
print("Get complete", flush=True)
|
||||
logger.debug("Get complete")
|
||||
|
||||
return LibrarianResponse(
|
||||
error = None,
|
||||
|
|
@ -147,7 +151,7 @@ class Librarian:
|
|||
|
||||
async def get_document_content(self, request):
|
||||
|
||||
print("Get doc content...")
|
||||
logger.debug("Getting document content...")
|
||||
|
||||
object_id = await self.table_store.get_document_object_id(
|
||||
request.user,
|
||||
|
|
@ -158,7 +162,7 @@ class Librarian:
|
|||
object_id
|
||||
)
|
||||
|
||||
print("Get complete", flush=True)
|
||||
logger.debug("Get complete")
|
||||
|
||||
return LibrarianResponse(
|
||||
error = None,
|
||||
|
|
@ -170,7 +174,7 @@ class Librarian:
|
|||
|
||||
async def add_processing(self, request):
|
||||
|
||||
print("Add processing")
|
||||
logger.debug("Adding processing metadata...")
|
||||
|
||||
if await self.table_store.processing_exists(
|
||||
request.processing_metadata.user,
|
||||
|
|
@ -192,13 +196,13 @@ class Librarian:
|
|||
object_id
|
||||
)
|
||||
|
||||
print("Got content")
|
||||
logger.debug("Retrieved content")
|
||||
|
||||
print("Add processing...")
|
||||
logger.debug("Adding processing to table...")
|
||||
|
||||
await self.table_store.add_processing(request.processing_metadata)
|
||||
|
||||
print("Invoke document processing...")
|
||||
logger.debug("Invoking document processing...")
|
||||
|
||||
await self.load_document(
|
||||
document = doc,
|
||||
|
|
@ -206,7 +210,7 @@ class Librarian:
|
|||
content = content,
|
||||
)
|
||||
|
||||
print("Add complete", flush=True)
|
||||
logger.debug("Add complete")
|
||||
|
||||
return LibrarianResponse(
|
||||
error = None,
|
||||
|
|
@ -218,7 +222,7 @@ class Librarian:
|
|||
|
||||
async def remove_processing(self, request):
|
||||
|
||||
print("Removing processing...")
|
||||
logger.debug("Removing processing metadata...")
|
||||
|
||||
if not await self.table_store.processing_exists(
|
||||
request.user,
|
||||
|
|
@ -232,7 +236,7 @@ class Librarian:
|
|||
request.processing_id
|
||||
)
|
||||
|
||||
print("Remove complete", flush=True)
|
||||
logger.debug("Remove complete")
|
||||
|
||||
return LibrarianResponse(
|
||||
error = None,
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ from functools import partial
|
|||
import asyncio
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
|
||||
from .. base import AsyncProcessor, Consumer, Producer, Publisher, Subscriber
|
||||
from .. base import ConsumerMetrics, ProducerMetrics
|
||||
|
|
@ -21,6 +22,9 @@ from .. exceptions import RequestError
|
|||
|
||||
from . librarian import Librarian
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "librarian"
|
||||
|
||||
default_librarian_request_queue = librarian_request_queue
|
||||
|
|
@ -119,7 +123,7 @@ class Processor(AsyncProcessor):
|
|||
|
||||
self.flows = {}
|
||||
|
||||
print("Initialised.", flush=True)
|
||||
logger.info("Librarian service initialized")
|
||||
|
||||
async def start(self):
|
||||
|
||||
|
|
@ -129,7 +133,7 @@ class Processor(AsyncProcessor):
|
|||
|
||||
async def on_librarian_config(self, config, version):
|
||||
|
||||
print("config version", version)
|
||||
logger.info(f"Configuration version: {version}")
|
||||
|
||||
if "flows" in config:
|
||||
|
||||
|
|
@ -138,7 +142,7 @@ class Processor(AsyncProcessor):
|
|||
for k, v in config["flows"].items()
|
||||
}
|
||||
|
||||
print(self.flows)
|
||||
logger.debug(f"Flows: {self.flows}")
|
||||
|
||||
def __del__(self):
|
||||
|
||||
|
|
@ -146,9 +150,9 @@ class Processor(AsyncProcessor):
|
|||
|
||||
async def load_document(self, document, processing, content):
|
||||
|
||||
print("Ready for processing...")
|
||||
logger.debug("Ready for document processing...")
|
||||
|
||||
print(document, processing, len(content))
|
||||
logger.debug(f"Document: {document}, processing: {processing}, content length: {len(content)}")
|
||||
|
||||
if processing.flow not in self.flows:
|
||||
raise RuntimeError("Invalid flow ID")
|
||||
|
|
@ -188,7 +192,7 @@ class Processor(AsyncProcessor):
|
|||
)
|
||||
schema = Document
|
||||
|
||||
print(f"Submit on queue {q}...")
|
||||
logger.debug(f"Submitting to queue {q}...")
|
||||
|
||||
pub = Publisher(
|
||||
self.pulsar_client, q, schema=schema
|
||||
|
|
@ -203,14 +207,14 @@ class Processor(AsyncProcessor):
|
|||
|
||||
await pub.stop()
|
||||
|
||||
print("Document submitted")
|
||||
logger.debug("Document submitted")
|
||||
|
||||
async def process_request(self, v):
|
||||
|
||||
if v.operation is None:
|
||||
raise RequestError("Null operation")
|
||||
|
||||
print("request", v.operation)
|
||||
logger.debug(f"Librarian request: {v.operation}")
|
||||
|
||||
impls = {
|
||||
"add-document": self.librarian.add_document,
|
||||
|
|
@ -237,7 +241,7 @@ class Processor(AsyncProcessor):
|
|||
|
||||
id = msg.properties()["id"]
|
||||
|
||||
print(f"Handling input {id}...", flush=True)
|
||||
logger.info(f"Handling librarian input {id}...")
|
||||
|
||||
try:
|
||||
|
||||
|
|
@ -276,7 +280,7 @@ class Processor(AsyncProcessor):
|
|||
|
||||
return
|
||||
|
||||
print("Done.", flush=True)
|
||||
logger.debug("Librarian input processing complete")
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
|
|
|||
|
|
@ -4,10 +4,14 @@ Simple token counter for each LLM response.
|
|||
|
||||
from prometheus_client import Counter
|
||||
import json
|
||||
import logging
|
||||
|
||||
from .. schema import TextCompletionResponse, Error
|
||||
from .. base import FlowProcessor, ConsumerSpec
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "metering"
|
||||
|
||||
class Processor(FlowProcessor):
|
||||
|
|
@ -59,10 +63,10 @@ class Processor(FlowProcessor):
|
|||
# Load token costs from the config service
|
||||
async def on_cost_config(self, config, version):
|
||||
|
||||
print("Loading configuration version", version)
|
||||
logger.info(f"Loading metering configuration version {version}")
|
||||
|
||||
if self.config_key not in config:
|
||||
print(f"No key {self.config_key} in config", flush=True)
|
||||
logger.warning(f"No key {self.config_key} in config")
|
||||
return
|
||||
|
||||
config = config[self.config_key]
|
||||
|
|
@ -102,9 +106,9 @@ class Processor(FlowProcessor):
|
|||
__class__.input_cost_metric.inc(cost_in)
|
||||
__class__.output_cost_metric.inc(cost_out)
|
||||
|
||||
print(f"Input Tokens: {num_in}", flush=True)
|
||||
print(f"Output Tokens: {num_out}", flush=True)
|
||||
print(f"Cost for call: ${cost_per_call}", flush=True)
|
||||
logger.info(f"Input Tokens: {num_in}")
|
||||
logger.info(f"Output Tokens: {num_out}")
|
||||
logger.info(f"Cost for call: ${cost_per_call}")
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
|
|
|||
|
|
@ -8,10 +8,14 @@ import requests
|
|||
import json
|
||||
from prometheus_client import Histogram
|
||||
import os
|
||||
import logging
|
||||
|
||||
from .... exceptions import TooManyRequests
|
||||
from .... base import LlmService, LlmResult
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "text-completion"
|
||||
|
||||
default_temperature = 0.0
|
||||
|
|
@ -111,11 +115,11 @@ class Processor(LlmService):
|
|||
inputtokens = response['usage']['prompt_tokens']
|
||||
outputtokens = response['usage']['completion_tokens']
|
||||
|
||||
print(resp, flush=True)
|
||||
print(f"Input Tokens: {inputtokens}", flush=True)
|
||||
print(f"Output Tokens: {outputtokens}", flush=True)
|
||||
logger.debug(f"LLM response: {resp}")
|
||||
logger.info(f"Input Tokens: {inputtokens}")
|
||||
logger.info(f"Output Tokens: {outputtokens}")
|
||||
|
||||
print("Send response...", flush=True)
|
||||
logger.debug("Sending response...")
|
||||
|
||||
resp = LlmResult(
|
||||
text = resp,
|
||||
|
|
@ -128,7 +132,7 @@ class Processor(LlmService):
|
|||
|
||||
except TooManyRequests:
|
||||
|
||||
print("Rate limit...")
|
||||
logger.warning("Rate limit exceeded")
|
||||
|
||||
# Leave rate limit retries to the base handler
|
||||
raise TooManyRequests()
|
||||
|
|
@ -137,10 +141,10 @@ class Processor(LlmService):
|
|||
|
||||
# Apart from rate limits, treat all exceptions as unrecoverable
|
||||
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"Azure LLM exception ({type(e).__name__}): {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
print("Done.", flush=True)
|
||||
logger.debug("Azure LLM processing complete")
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
|
|
|||
|
|
@ -8,6 +8,10 @@ import json
|
|||
from prometheus_client import Histogram
|
||||
from openai import AzureOpenAI, RateLimitError
|
||||
import os
|
||||
import logging
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from .... exceptions import TooManyRequests
|
||||
from .... base import LlmService, LlmResult
|
||||
|
|
@ -84,10 +88,10 @@ class Processor(LlmService):
|
|||
|
||||
inputtokens = resp.usage.prompt_tokens
|
||||
outputtokens = resp.usage.completion_tokens
|
||||
print(resp.choices[0].message.content, flush=True)
|
||||
print(f"Input Tokens: {inputtokens}", flush=True)
|
||||
print(f"Output Tokens: {outputtokens}", flush=True)
|
||||
print("Send response...", flush=True)
|
||||
logger.debug(f"LLM response: {resp.choices[0].message.content}")
|
||||
logger.info(f"Input Tokens: {inputtokens}")
|
||||
logger.info(f"Output Tokens: {outputtokens}")
|
||||
logger.debug("Sending response...")
|
||||
|
||||
r = LlmResult(
|
||||
text = resp.choices[0].message.content,
|
||||
|
|
@ -100,7 +104,7 @@ class Processor(LlmService):
|
|||
|
||||
except RateLimitError:
|
||||
|
||||
print("Send rate limit response...", flush=True)
|
||||
logger.warning("Rate limit exceeded")
|
||||
|
||||
# Leave rate limit retries to the base handler
|
||||
raise TooManyRequests()
|
||||
|
|
@ -108,10 +112,10 @@ class Processor(LlmService):
|
|||
except Exception as e:
|
||||
|
||||
# Apart from rate limits, treat all exceptions as unrecoverable
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"Azure OpenAI LLM exception ({type(e).__name__}): {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
print("Done.", flush=True)
|
||||
logger.debug("Azure OpenAI LLM processing complete")
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
|
|
|
|||
|
|
@ -6,10 +6,14 @@ Input is prompt, output is response.
|
|||
|
||||
import anthropic
|
||||
import os
|
||||
import logging
|
||||
|
||||
from .... exceptions import TooManyRequests
|
||||
from .... base import LlmService, LlmResult
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "text-completion"
|
||||
|
||||
default_model = 'claude-3-5-sonnet-20240620'
|
||||
|
|
@ -42,7 +46,7 @@ class Processor(LlmService):
|
|||
self.temperature = temperature
|
||||
self.max_output = max_output
|
||||
|
||||
print("Initialised", flush=True)
|
||||
logger.info("Claude LLM service initialized")
|
||||
|
||||
async def generate_content(self, system, prompt):
|
||||
|
||||
|
|
@ -69,9 +73,9 @@ class Processor(LlmService):
|
|||
resp = response.content[0].text
|
||||
inputtokens = response.usage.input_tokens
|
||||
outputtokens = response.usage.output_tokens
|
||||
print(resp, flush=True)
|
||||
print(f"Input Tokens: {inputtokens}", flush=True)
|
||||
print(f"Output Tokens: {outputtokens}", flush=True)
|
||||
logger.debug(f"LLM response: {resp}")
|
||||
logger.info(f"Input Tokens: {inputtokens}")
|
||||
logger.info(f"Output Tokens: {outputtokens}")
|
||||
|
||||
resp = LlmResult(
|
||||
text = resp,
|
||||
|
|
@ -91,7 +95,7 @@ class Processor(LlmService):
|
|||
|
||||
# Apart from rate limits, treat all exceptions as unrecoverable
|
||||
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"Claude LLM exception ({type(e).__name__}): {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -7,6 +7,10 @@ Input is prompt, output is response.
|
|||
import cohere
|
||||
from prometheus_client import Histogram
|
||||
import os
|
||||
import logging
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from .... exceptions import TooManyRequests
|
||||
from .... base import LlmService, LlmResult
|
||||
|
|
@ -39,7 +43,7 @@ class Processor(LlmService):
|
|||
self.temperature = temperature
|
||||
self.cohere = cohere.Client(api_key=api_key)
|
||||
|
||||
print("Initialised", flush=True)
|
||||
logger.info("Cohere LLM service initialized")
|
||||
|
||||
async def generate_content(self, system, prompt):
|
||||
|
||||
|
|
@ -59,9 +63,9 @@ class Processor(LlmService):
|
|||
inputtokens = int(output.meta.billed_units.input_tokens)
|
||||
outputtokens = int(output.meta.billed_units.output_tokens)
|
||||
|
||||
print(resp, flush=True)
|
||||
print(f"Input Tokens: {inputtokens}", flush=True)
|
||||
print(f"Output Tokens: {outputtokens}", flush=True)
|
||||
logger.debug(f"LLM response: {resp}")
|
||||
logger.info(f"Input Tokens: {inputtokens}")
|
||||
logger.info(f"Output Tokens: {outputtokens}")
|
||||
|
||||
resp = LlmResult(
|
||||
text = resp,
|
||||
|
|
@ -83,7 +87,7 @@ class Processor(LlmService):
|
|||
|
||||
# Apart from rate limits, treat all exceptions as unrecoverable
|
||||
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"Cohere LLM exception ({type(e).__name__}): {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -17,6 +17,10 @@ from google.genai import types
|
|||
from google.genai.types import HarmCategory, HarmBlockThreshold
|
||||
from google.api_core.exceptions import ResourceExhausted
|
||||
import os
|
||||
import logging
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from .... exceptions import TooManyRequests
|
||||
from .... base import LlmService, LlmResult
|
||||
|
|
@ -77,7 +81,7 @@ class Processor(LlmService):
|
|||
# HarmCategory.HARM_CATEGORY_CIVIC_INTEGRITY: block_level,
|
||||
]
|
||||
|
||||
print("Initialised", flush=True)
|
||||
logger.info("GoogleAIStudio LLM service initialized")
|
||||
|
||||
async def generate_content(self, system, prompt):
|
||||
|
||||
|
|
@ -102,9 +106,9 @@ class Processor(LlmService):
|
|||
resp = response.text
|
||||
inputtokens = int(response.usage_metadata.prompt_token_count)
|
||||
outputtokens = int(response.usage_metadata.candidates_token_count)
|
||||
print(resp, flush=True)
|
||||
print(f"Input Tokens: {inputtokens}", flush=True)
|
||||
print(f"Output Tokens: {outputtokens}", flush=True)
|
||||
logger.debug(f"LLM response: {resp}")
|
||||
logger.info(f"Input Tokens: {inputtokens}")
|
||||
logger.info(f"Output Tokens: {outputtokens}")
|
||||
|
||||
resp = LlmResult(
|
||||
text = resp,
|
||||
|
|
@ -117,7 +121,7 @@ class Processor(LlmService):
|
|||
|
||||
except ResourceExhausted as e:
|
||||
|
||||
print("Hit rate limit:", e, flush=True)
|
||||
logger.warning("Rate limit exceeded")
|
||||
|
||||
# Leave rate limit retries to the default handler
|
||||
raise TooManyRequests()
|
||||
|
|
@ -126,8 +130,7 @@ class Processor(LlmService):
|
|||
|
||||
# Apart from rate limits, treat all exceptions as unrecoverable
|
||||
|
||||
print(type(e), flush=True)
|
||||
print(f"Exception: {e}", flush=True)
|
||||
logger.error(f"GoogleAIStudio LLM exception ({type(e).__name__}): {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -6,6 +6,10 @@ Input is prompt, output is response.
|
|||
|
||||
from openai import OpenAI
|
||||
import os
|
||||
import logging
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from .... exceptions import TooManyRequests
|
||||
from .... base import LlmService, LlmResult
|
||||
|
|
@ -44,7 +48,7 @@ class Processor(LlmService):
|
|||
api_key = "sk-no-key-required",
|
||||
)
|
||||
|
||||
print("Initialised", flush=True)
|
||||
logger.info("Llamafile LLM service initialized")
|
||||
|
||||
async def generate_content(self, system, prompt):
|
||||
|
||||
|
|
@ -70,9 +74,9 @@ class Processor(LlmService):
|
|||
inputtokens = resp.usage.prompt_tokens
|
||||
outputtokens = resp.usage.completion_tokens
|
||||
|
||||
print(resp.choices[0].message.content, flush=True)
|
||||
print(f"Input Tokens: {inputtokens}", flush=True)
|
||||
print(f"Output Tokens: {outputtokens}", flush=True)
|
||||
logger.debug(f"LLM response: {resp.choices[0].message.content}")
|
||||
logger.info(f"Input Tokens: {inputtokens}")
|
||||
logger.info(f"Output Tokens: {outputtokens}")
|
||||
|
||||
resp = LlmResult(
|
||||
text = resp.choices[0].message.content,
|
||||
|
|
@ -87,7 +91,7 @@ class Processor(LlmService):
|
|||
|
||||
except Exception as e:
|
||||
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"Llamafile LLM exception ({type(e).__name__}): {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -6,6 +6,10 @@ Input is prompt, output is response.
|
|||
|
||||
from openai import OpenAI
|
||||
import os
|
||||
import logging
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from .... exceptions import TooManyRequests
|
||||
from .... base import LlmService, LlmResult
|
||||
|
|
@ -44,7 +48,7 @@ class Processor(LlmService):
|
|||
api_key = "sk-no-key-required",
|
||||
)
|
||||
|
||||
print("Initialised", flush=True)
|
||||
logger.info("LMStudio LLM service initialized")
|
||||
|
||||
async def generate_content(self, system, prompt):
|
||||
|
||||
|
|
@ -52,7 +56,7 @@ class Processor(LlmService):
|
|||
|
||||
try:
|
||||
|
||||
print(prompt)
|
||||
logger.debug(f"Prompt: {prompt}")
|
||||
|
||||
resp = self.openai.chat.completions.create(
|
||||
model=self.model,
|
||||
|
|
@ -69,14 +73,14 @@ class Processor(LlmService):
|
|||
#}
|
||||
)
|
||||
|
||||
print(resp)
|
||||
logger.debug(f"Full response: {resp}")
|
||||
|
||||
inputtokens = resp.usage.prompt_tokens
|
||||
outputtokens = resp.usage.completion_tokens
|
||||
|
||||
print(resp.choices[0].message.content, flush=True)
|
||||
print(f"Input Tokens: {inputtokens}", flush=True)
|
||||
print(f"Output Tokens: {outputtokens}", flush=True)
|
||||
logger.debug(f"LLM response: {resp.choices[0].message.content}")
|
||||
logger.info(f"Input Tokens: {inputtokens}")
|
||||
logger.info(f"Output Tokens: {outputtokens}")
|
||||
|
||||
resp = LlmResult(
|
||||
text = resp.choices[0].message.content,
|
||||
|
|
@ -91,7 +95,7 @@ class Processor(LlmService):
|
|||
|
||||
except Exception as e:
|
||||
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"LMStudio LLM exception ({type(e).__name__}): {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -6,6 +6,10 @@ Input is prompt, output is response.
|
|||
|
||||
from mistralai import Mistral
|
||||
import os
|
||||
import logging
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from .... exceptions import TooManyRequests
|
||||
from .... base import LlmService, LlmResult
|
||||
|
|
@ -42,7 +46,7 @@ class Processor(LlmService):
|
|||
self.max_output = max_output
|
||||
self.mistral = Mistral(api_key=api_key)
|
||||
|
||||
print("Initialised", flush=True)
|
||||
logger.info("Mistral LLM service initialized")
|
||||
|
||||
async def generate_content(self, system, prompt):
|
||||
|
||||
|
|
@ -75,9 +79,9 @@ class Processor(LlmService):
|
|||
|
||||
inputtokens = resp.usage.prompt_tokens
|
||||
outputtokens = resp.usage.completion_tokens
|
||||
print(resp.choices[0].message.content, flush=True)
|
||||
print(f"Input Tokens: {inputtokens}", flush=True)
|
||||
print(f"Output Tokens: {outputtokens}", flush=True)
|
||||
logger.debug(f"LLM response: {resp.choices[0].message.content}")
|
||||
logger.info(f"Input Tokens: {inputtokens}")
|
||||
logger.info(f"Output Tokens: {outputtokens}")
|
||||
|
||||
resp = LlmResult(
|
||||
text = resp.choices[0].message.content,
|
||||
|
|
@ -105,7 +109,7 @@ class Processor(LlmService):
|
|||
|
||||
# Apart from rate limits, treat all exceptions as unrecoverable
|
||||
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"Mistral LLM exception ({type(e).__name__}): {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -6,6 +6,10 @@ Input is prompt, output is response.
|
|||
|
||||
from ollama import Client
|
||||
import os
|
||||
import logging
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from .... exceptions import TooManyRequests
|
||||
from .... base import LlmService, LlmResult
|
||||
|
|
@ -41,8 +45,8 @@ class Processor(LlmService):
|
|||
response = self.llm.generate(self.model, prompt)
|
||||
|
||||
response_text = response['response']
|
||||
print("Send response...", flush=True)
|
||||
print(response_text, flush=True)
|
||||
logger.debug("Sending response...")
|
||||
logger.debug(f"LLM response: {response_text}")
|
||||
|
||||
inputtokens = int(response['prompt_eval_count'])
|
||||
outputtokens = int(response['eval_count'])
|
||||
|
|
@ -60,7 +64,7 @@ class Processor(LlmService):
|
|||
|
||||
except Exception as e:
|
||||
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"Ollama LLM exception ({type(e).__name__}): {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -6,10 +6,14 @@ Input is prompt, output is response.
|
|||
|
||||
from openai import OpenAI, RateLimitError
|
||||
import os
|
||||
import logging
|
||||
|
||||
from .... exceptions import TooManyRequests
|
||||
from .... base import LlmService, LlmResult
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "text-completion"
|
||||
|
||||
default_model = 'gpt-3.5-turbo'
|
||||
|
|
@ -52,7 +56,7 @@ class Processor(LlmService):
|
|||
else:
|
||||
self.openai = OpenAI(api_key=api_key)
|
||||
|
||||
print("Initialised", flush=True)
|
||||
logger.info("OpenAI LLM service initialized")
|
||||
|
||||
async def generate_content(self, system, prompt):
|
||||
|
||||
|
|
@ -85,9 +89,9 @@ class Processor(LlmService):
|
|||
|
||||
inputtokens = resp.usage.prompt_tokens
|
||||
outputtokens = resp.usage.completion_tokens
|
||||
print(resp.choices[0].message.content, flush=True)
|
||||
print(f"Input Tokens: {inputtokens}", flush=True)
|
||||
print(f"Output Tokens: {outputtokens}", flush=True)
|
||||
logger.debug(f"LLM response: {resp.choices[0].message.content}")
|
||||
logger.info(f"Input Tokens: {inputtokens}")
|
||||
logger.info(f"Output Tokens: {outputtokens}")
|
||||
|
||||
resp = LlmResult(
|
||||
text = resp.choices[0].message.content,
|
||||
|
|
@ -109,7 +113,7 @@ class Processor(LlmService):
|
|||
|
||||
# Apart from rate limits, treat all exceptions as unrecoverable
|
||||
|
||||
print(f"Exception: {type(e)} {e}")
|
||||
logger.error(f"OpenAI LLM exception ({type(e).__name__}): {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -6,6 +6,10 @@ Input is prompt, output is response.
|
|||
|
||||
import os
|
||||
import aiohttp
|
||||
import logging
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from .... exceptions import TooManyRequests
|
||||
from .... base import LlmService, LlmResult
|
||||
|
|
@ -41,9 +45,8 @@ class Processor(LlmService):
|
|||
|
||||
self.session = aiohttp.ClientSession()
|
||||
|
||||
print("Using TGI service at", base_url)
|
||||
|
||||
print("Initialised", flush=True)
|
||||
logger.info(f"Using TGI service at {base_url}")
|
||||
logger.info("TGI LLM service initialized")
|
||||
|
||||
async def generate_content(self, system, prompt):
|
||||
|
||||
|
|
@ -85,9 +88,9 @@ class Processor(LlmService):
|
|||
inputtokens = resp["usage"]["prompt_tokens"]
|
||||
outputtokens = resp["usage"]["completion_tokens"]
|
||||
ans = resp["choices"][0]["message"]["content"]
|
||||
print(f"Input Tokens: {inputtokens}", flush=True)
|
||||
print(f"Output Tokens: {outputtokens}", flush=True)
|
||||
print(ans, flush=True)
|
||||
logger.info(f"Input Tokens: {inputtokens}")
|
||||
logger.info(f"Output Tokens: {outputtokens}")
|
||||
logger.debug(f"LLM response: {ans}")
|
||||
|
||||
resp = LlmResult(
|
||||
text = ans,
|
||||
|
|
@ -104,7 +107,7 @@ class Processor(LlmService):
|
|||
|
||||
# Apart from rate limits, treat all exceptions as unrecoverable
|
||||
|
||||
print(f"Exception: {type(e)} {e}")
|
||||
logger.error(f"TGI LLM exception ({type(e).__name__}): {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -6,6 +6,10 @@ Input is prompt, output is response.
|
|||
|
||||
import os
|
||||
import aiohttp
|
||||
import logging
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from .... exceptions import TooManyRequests
|
||||
from .... base import LlmService, LlmResult
|
||||
|
|
@ -45,9 +49,8 @@ class Processor(LlmService):
|
|||
|
||||
self.session = aiohttp.ClientSession()
|
||||
|
||||
print("Using vLLM service at", base_url)
|
||||
|
||||
print("Initialised", flush=True)
|
||||
logger.info(f"Using vLLM service at {base_url}")
|
||||
logger.info("vLLM LLM service initialized")
|
||||
|
||||
async def generate_content(self, system, prompt):
|
||||
|
||||
|
|
@ -80,9 +83,9 @@ class Processor(LlmService):
|
|||
inputtokens = resp["usage"]["prompt_tokens"]
|
||||
outputtokens = resp["usage"]["completion_tokens"]
|
||||
ans = resp["choices"][0]["text"]
|
||||
print(f"Input Tokens: {inputtokens}", flush=True)
|
||||
print(f"Output Tokens: {outputtokens}", flush=True)
|
||||
print(ans, flush=True)
|
||||
logger.info(f"Input Tokens: {inputtokens}")
|
||||
logger.info(f"Output Tokens: {outputtokens}")
|
||||
logger.debug(f"LLM response: {ans}")
|
||||
|
||||
resp = LlmResult(
|
||||
text = ans,
|
||||
|
|
@ -99,7 +102,7 @@ class Processor(LlmService):
|
|||
|
||||
# Apart from rate limits, treat all exceptions as unrecoverable
|
||||
|
||||
print(f"Exception: {type(e)} {e}")
|
||||
logger.error(f"vLLM LLM exception ({type(e).__name__}): {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -11,9 +11,13 @@ import importlib
|
|||
|
||||
from .. log_level import LogLevel
|
||||
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def fn(module_name, class_name, params, w):
|
||||
|
||||
print(f"Starting {module_name}...")
|
||||
logger.info(f"Starting {module_name}...")
|
||||
|
||||
if "log_level" in params:
|
||||
params["log_level"] = LogLevel(params["log_level"])
|
||||
|
|
@ -22,7 +26,7 @@ def fn(module_name, class_name, params, w):
|
|||
|
||||
try:
|
||||
|
||||
print(f"Starting {class_name} using {module_name}...")
|
||||
logger.info(f"Starting {class_name} using {module_name}...")
|
||||
|
||||
module = importlib.import_module(module_name)
|
||||
class_object = getattr(module, class_name)
|
||||
|
|
@ -30,16 +34,16 @@ def fn(module_name, class_name, params, w):
|
|||
processor = class_object(**params)
|
||||
|
||||
processor.run()
|
||||
print(f"{module_name} stopped.")
|
||||
logger.info(f"{module_name} stopped.")
|
||||
|
||||
except Exception as e:
|
||||
print("Exception:", e)
|
||||
logger.error("Exception occurred", exc_info=True)
|
||||
|
||||
print("Restarting in 10...")
|
||||
logger.info("Restarting in 10...")
|
||||
|
||||
time.sleep(10)
|
||||
|
||||
print("Closing")
|
||||
logger.info("Closing")
|
||||
w.close()
|
||||
|
||||
class Processing:
|
||||
|
|
@ -108,7 +112,7 @@ class Processing:
|
|||
readers.remove(r)
|
||||
wait_for -= 1
|
||||
|
||||
print("All processes exited")
|
||||
logger.info("All processes exited")
|
||||
|
||||
for p in procs:
|
||||
p.join()
|
||||
|
|
@ -169,13 +173,12 @@ def run():
|
|||
|
||||
p.run()
|
||||
|
||||
print("Finished.")
|
||||
logger.info("Finished.")
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
print("Will retry...", flush=True)
|
||||
logger.error("Exception occurred, will retry...", exc_info=True)
|
||||
|
||||
time.sleep(10)
|
||||
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ Language service abstracts prompt engineering from LLM.
|
|||
import asyncio
|
||||
import json
|
||||
import re
|
||||
import logging
|
||||
|
||||
from ...schema import Definition, Relationship, Triple
|
||||
from ...schema import Topic
|
||||
|
|
@ -17,6 +18,9 @@ from ...base import ProducerSpec, ConsumerSpec, TextCompletionClientSpec
|
|||
|
||||
from ...template import PromptManager
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "prompt"
|
||||
default_concurrency = 1
|
||||
|
||||
|
|
@ -68,10 +72,10 @@ class Processor(FlowProcessor):
|
|||
|
||||
async def on_prompt_config(self, config, version):
|
||||
|
||||
print("Loading configuration version", version)
|
||||
logger.info(f"Loading prompt configuration version {version}")
|
||||
|
||||
if self.config_key not in config:
|
||||
print(f"No key {self.config_key} in config", flush=True)
|
||||
logger.warning(f"No key {self.config_key} in config")
|
||||
return
|
||||
|
||||
config = config[self.config_key]
|
||||
|
|
@ -80,12 +84,12 @@ class Processor(FlowProcessor):
|
|||
|
||||
self.manager.load_config(config)
|
||||
|
||||
print("Prompt configuration reloaded.", flush=True)
|
||||
logger.info("Prompt configuration reloaded")
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print("Exception:", e, flush=True)
|
||||
print("Configuration reload failed", flush=True)
|
||||
logger.error(f"Prompt configuration exception: {e}", exc_info=True)
|
||||
logger.error("Configuration reload failed")
|
||||
|
||||
async def on_request(self, msg, consumer, flow):
|
||||
|
||||
|
|
@ -99,19 +103,19 @@ class Processor(FlowProcessor):
|
|||
|
||||
try:
|
||||
|
||||
print(v.terms, flush=True)
|
||||
logger.debug(f"Prompt terms: {v.terms}")
|
||||
|
||||
input = {
|
||||
k: json.loads(v)
|
||||
for k, v in v.terms.items()
|
||||
}
|
||||
|
||||
print(f"Handling kind {kind}...", flush=True)
|
||||
logger.debug(f"Handling prompt kind {kind}...")
|
||||
|
||||
async def llm(system, prompt):
|
||||
|
||||
print(system, flush=True)
|
||||
print(prompt, flush=True)
|
||||
logger.debug(f"System prompt: {system}")
|
||||
logger.debug(f"User prompt: {prompt}")
|
||||
|
||||
resp = await flow("text-completion-request").text_completion(
|
||||
system = system, prompt = prompt,
|
||||
|
|
@ -120,20 +124,20 @@ class Processor(FlowProcessor):
|
|||
try:
|
||||
return resp
|
||||
except Exception as e:
|
||||
print("LLM Exception:", e, flush=True)
|
||||
logger.error(f"LLM Exception: {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
try:
|
||||
resp = await self.manager.invoke(kind, input, llm)
|
||||
except Exception as e:
|
||||
print("Invocation exception:", e, flush=True)
|
||||
logger.error(f"Prompt invocation exception: {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
print(resp, flush=True)
|
||||
logger.debug(f"Prompt response: {resp}")
|
||||
|
||||
if isinstance(resp, str):
|
||||
|
||||
print("Send text response...", flush=True)
|
||||
logger.debug("Sending text response...")
|
||||
|
||||
r = PromptResponse(
|
||||
text=resp,
|
||||
|
|
@ -147,8 +151,8 @@ class Processor(FlowProcessor):
|
|||
|
||||
else:
|
||||
|
||||
print("Send object response...", flush=True)
|
||||
print(json.dumps(resp, indent=4), flush=True)
|
||||
logger.debug("Sending object response...")
|
||||
logger.debug(f"Response object: {json.dumps(resp, indent=4)}")
|
||||
|
||||
r = PromptResponse(
|
||||
text=None,
|
||||
|
|
@ -162,9 +166,9 @@ class Processor(FlowProcessor):
|
|||
|
||||
except Exception as e:
|
||||
|
||||
print(f"Exception: {e}", flush=True)
|
||||
logger.error(f"Prompt service exception: {e}", exc_info=True)
|
||||
|
||||
print("Send error response...", flush=True)
|
||||
logger.debug("Sending error response...")
|
||||
|
||||
r = PromptResponse(
|
||||
error=Error(
|
||||
|
|
@ -178,9 +182,9 @@ class Processor(FlowProcessor):
|
|||
|
||||
except Exception as e:
|
||||
|
||||
print(f"Exception: {e}", flush=True)
|
||||
logger.error(f"Prompt service exception: {e}", exc_info=True)
|
||||
|
||||
print("Send error response...", flush=True)
|
||||
logger.debug("Sending error response...")
|
||||
|
||||
r = PromptResponse(
|
||||
error=Error(
|
||||
|
|
|
|||
|
|
@ -4,11 +4,16 @@ Document embeddings query service. Input is vector, output is an array
|
|||
of chunks
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from .... direct.milvus_doc_embeddings import DocVectors
|
||||
from .... schema import DocumentEmbeddingsResponse
|
||||
from .... schema import Error, Value
|
||||
from .... base import DocumentEmbeddingsQueryService
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "de-query"
|
||||
default_store_uri = 'http://localhost:19530'
|
||||
|
||||
|
|
@ -48,7 +53,7 @@ class Processor(DocumentEmbeddingsQueryService):
|
|||
|
||||
except Exception as e:
|
||||
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"Exception querying document embeddings: {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -4,14 +4,18 @@ Document embeddings query service. Input is vector, output is an array
|
|||
of chunks. Pinecone implementation.
|
||||
"""
|
||||
|
||||
from pinecone import Pinecone, ServerlessSpec
|
||||
from pinecone.grpc import PineconeGRPC, GRPCClientConfig
|
||||
|
||||
import logging
|
||||
import uuid
|
||||
import os
|
||||
|
||||
from pinecone import Pinecone, ServerlessSpec
|
||||
from pinecone.grpc import PineconeGRPC, GRPCClientConfig
|
||||
|
||||
from .... base import DocumentEmbeddingsQueryService
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "de-query"
|
||||
default_api_key = os.getenv("PINECONE_API_KEY", "not-specified")
|
||||
|
||||
|
|
@ -78,7 +82,7 @@ class Processor(DocumentEmbeddingsQueryService):
|
|||
|
||||
except Exception as e:
|
||||
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"Exception querying document embeddings: {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@ Document embeddings query service. Input is vector, output is an array
|
|||
of chunks
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.models import PointStruct
|
||||
from qdrant_client.models import Distance, VectorParams
|
||||
|
|
@ -12,6 +14,9 @@ from .... schema import DocumentEmbeddingsResponse
|
|||
from .... schema import Error, Value
|
||||
from .... base import DocumentEmbeddingsQueryService
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "de-query"
|
||||
|
||||
default_store_uri = 'http://localhost:6333'
|
||||
|
|
@ -63,7 +68,7 @@ class Processor(DocumentEmbeddingsQueryService):
|
|||
|
||||
except Exception as e:
|
||||
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"Exception querying document embeddings: {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -4,11 +4,16 @@ Graph embeddings query service. Input is vector, output is list of
|
|||
entities
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from .... direct.milvus_graph_embeddings import EntityVectors
|
||||
from .... schema import GraphEmbeddingsResponse
|
||||
from .... schema import Error, Value
|
||||
from .... base import GraphEmbeddingsQueryService
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "ge-query"
|
||||
default_store_uri = 'http://localhost:19530'
|
||||
|
||||
|
|
@ -68,14 +73,12 @@ class Processor(GraphEmbeddingsQueryService):
|
|||
|
||||
entities = ents2
|
||||
|
||||
print("Send response...", flush=True)
|
||||
logger.debug("Send response...")
|
||||
return entities
|
||||
|
||||
print("Done.", flush=True)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"Exception querying graph embeddings: {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -4,16 +4,20 @@ Graph embeddings query service. Input is vector, output is list of
|
|||
entities. Pinecone implementation.
|
||||
"""
|
||||
|
||||
from pinecone import Pinecone, ServerlessSpec
|
||||
from pinecone.grpc import PineconeGRPC, GRPCClientConfig
|
||||
|
||||
import logging
|
||||
import uuid
|
||||
import os
|
||||
|
||||
from pinecone import Pinecone, ServerlessSpec
|
||||
from pinecone.grpc import PineconeGRPC, GRPCClientConfig
|
||||
|
||||
from .... schema import GraphEmbeddingsResponse
|
||||
from .... schema import Error, Value
|
||||
from .... base import GraphEmbeddingsQueryService
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "ge-query"
|
||||
default_api_key = os.getenv("PINECONE_API_KEY", "not-specified")
|
||||
|
||||
|
|
@ -107,7 +111,7 @@ class Processor(GraphEmbeddingsQueryService):
|
|||
|
||||
except Exception as e:
|
||||
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"Exception querying graph embeddings: {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@ Graph embeddings query service. Input is vector, output is list of
|
|||
entities
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.models import PointStruct
|
||||
from qdrant_client.models import Distance, VectorParams
|
||||
|
|
@ -12,6 +14,9 @@ from .... schema import GraphEmbeddingsResponse
|
|||
from .... schema import Error, Value
|
||||
from .... base import GraphEmbeddingsQueryService
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "ge-query"
|
||||
|
||||
default_store_uri = 'http://localhost:6333'
|
||||
|
|
@ -85,14 +90,12 @@ class Processor(GraphEmbeddingsQueryService):
|
|||
|
||||
entities = ents2
|
||||
|
||||
print("Send response...", flush=True)
|
||||
logger.debug("Send response...")
|
||||
return entities
|
||||
|
||||
print("Done.", flush=True)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"Exception querying graph embeddings: {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -4,11 +4,16 @@ Triples query service. Input is a (s, p, o) triple, some values may be
|
|||
null. Output is a list of triples.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from .... direct.cassandra import TrustGraph
|
||||
from .... schema import TriplesQueryRequest, TriplesQueryResponse, Error
|
||||
from .... schema import Value, Triple
|
||||
from .... base import TriplesQueryService
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "triples-query"
|
||||
|
||||
default_graph_host='localhost'
|
||||
|
|
@ -135,7 +140,7 @@ class Processor(TriplesQueryService):
|
|||
|
||||
except Exception as e:
|
||||
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"Exception querying triples: {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -5,12 +5,17 @@ Input is a (s, p, o) triple, some values may be null. Output is a list of
|
|||
triples.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from falkordb import FalkorDB
|
||||
|
||||
from .... schema import TriplesQueryRequest, TriplesQueryResponse, Error
|
||||
from .... schema import Value, Triple
|
||||
from .... base import TriplesQueryService
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "triples-query"
|
||||
|
||||
default_graph_url = 'falkor://falkordb:6379'
|
||||
|
|
@ -299,7 +304,7 @@ class Processor(TriplesQueryService):
|
|||
|
||||
except Exception as e:
|
||||
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"Exception querying triples: {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -5,12 +5,17 @@ Input is a (s, p, o) triple, some values may be null. Output is a list of
|
|||
triples.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from neo4j import GraphDatabase
|
||||
|
||||
from .... schema import TriplesQueryRequest, TriplesQueryResponse, Error
|
||||
from .... schema import Value, Triple
|
||||
from .... base import TriplesQueryService
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "triples-query"
|
||||
|
||||
default_graph_host = 'bolt://memgraph:7687'
|
||||
|
|
@ -296,9 +301,7 @@ class Processor(TriplesQueryService):
|
|||
|
||||
except Exception as e:
|
||||
|
||||
print(f"Exception: {e}")
|
||||
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"Exception querying triples: {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -5,12 +5,17 @@ Input is a (s, p, o) triple, some values may be null. Output is a list of
|
|||
triples.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
||||
from neo4j import GraphDatabase
|
||||
|
||||
from .... schema import TriplesQueryRequest, TriplesQueryResponse, Error
|
||||
from .... schema import Value, Triple
|
||||
from .... base import TriplesQueryService
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "triples-query"
|
||||
|
||||
default_graph_host = 'bolt://neo4j:7687'
|
||||
|
|
@ -280,7 +285,7 @@ class Processor(TriplesQueryService):
|
|||
|
||||
except Exception as e:
|
||||
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"Exception querying triples: {e}", exc_info=True)
|
||||
raise e
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -1,5 +1,9 @@
|
|||
|
||||
import asyncio
|
||||
import logging
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
LABEL="http://www.w3.org/2000/01/rdf-schema#label"
|
||||
|
||||
|
|
@ -18,12 +22,12 @@ class Query:
|
|||
async def get_vector(self, query):
|
||||
|
||||
if self.verbose:
|
||||
print("Compute embeddings...", flush=True)
|
||||
logger.debug("Computing embeddings...")
|
||||
|
||||
qembeds = await self.rag.embeddings_client.embed(query)
|
||||
|
||||
if self.verbose:
|
||||
print("Done.", flush=True)
|
||||
logger.debug("Embeddings computed")
|
||||
|
||||
return qembeds
|
||||
|
||||
|
|
@ -32,7 +36,7 @@ class Query:
|
|||
vectors = await self.get_vector(query)
|
||||
|
||||
if self.verbose:
|
||||
print("Get docs...", flush=True)
|
||||
logger.debug("Getting documents...")
|
||||
|
||||
docs = await self.rag.doc_embeddings_client.query(
|
||||
vectors, limit=self.doc_limit,
|
||||
|
|
@ -40,9 +44,9 @@ class Query:
|
|||
)
|
||||
|
||||
if self.verbose:
|
||||
print("Docs:", flush=True)
|
||||
logger.debug("Documents:")
|
||||
for doc in docs:
|
||||
print(doc, flush=True)
|
||||
logger.debug(f" {doc}")
|
||||
|
||||
return docs
|
||||
|
||||
|
|
@ -60,7 +64,7 @@ class DocumentRag:
|
|||
self.doc_embeddings_client = doc_embeddings_client
|
||||
|
||||
if self.verbose:
|
||||
print("Initialised", flush=True)
|
||||
logger.debug("DocumentRag initialized")
|
||||
|
||||
async def query(
|
||||
self, query, user="trustgraph", collection="default",
|
||||
|
|
@ -68,7 +72,7 @@ class DocumentRag:
|
|||
):
|
||||
|
||||
if self.verbose:
|
||||
print("Construct prompt...", flush=True)
|
||||
logger.debug("Constructing prompt...")
|
||||
|
||||
q = Query(
|
||||
rag=self, user=user, collection=collection, verbose=self.verbose,
|
||||
|
|
@ -78,9 +82,9 @@ class DocumentRag:
|
|||
docs = await q.get_docs(query)
|
||||
|
||||
if self.verbose:
|
||||
print("Invoke LLM...", flush=True)
|
||||
print(docs)
|
||||
print(query)
|
||||
logger.debug("Invoking LLM...")
|
||||
logger.debug(f"Documents: {docs}")
|
||||
logger.debug(f"Query: {query}")
|
||||
|
||||
resp = await self.prompt_client.document_prompt(
|
||||
query = query,
|
||||
|
|
@ -88,7 +92,7 @@ class DocumentRag:
|
|||
)
|
||||
|
||||
if self.verbose:
|
||||
print("Done", flush=True)
|
||||
logger.debug("Query processing complete")
|
||||
|
||||
return resp
|
||||
|
||||
|
|
|
|||
|
|
@ -4,12 +4,16 @@ Simple RAG service, performs query using document RAG an LLM.
|
|||
Input is query, output is response.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from ... schema import DocumentRagQuery, DocumentRagResponse, Error
|
||||
from . document_rag import DocumentRag
|
||||
from ... base import FlowProcessor, ConsumerSpec, ProducerSpec
|
||||
from ... base import PromptClientSpec, EmbeddingsClientSpec
|
||||
from ... base import DocumentEmbeddingsClientSpec
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "document-rag"
|
||||
|
||||
class Processor(FlowProcessor):
|
||||
|
|
@ -81,7 +85,7 @@ class Processor(FlowProcessor):
|
|||
# Sender-produced ID
|
||||
id = msg.properties()["id"]
|
||||
|
||||
print(f"Handling input {id}...", flush=True)
|
||||
logger.info(f"Handling input {id}...")
|
||||
|
||||
if v.doc_limit:
|
||||
doc_limit = v.doc_limit
|
||||
|
|
@ -98,13 +102,13 @@ class Processor(FlowProcessor):
|
|||
properties = {"id": id}
|
||||
)
|
||||
|
||||
print("Done.", flush=True)
|
||||
logger.info("Request processing complete")
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"Document RAG service exception: {e}", exc_info=True)
|
||||
|
||||
print("Send error response...", flush=True)
|
||||
logger.debug("Sending error response...")
|
||||
|
||||
await flow("response").send(
|
||||
DocumentRagResponse(
|
||||
|
|
|
|||
|
|
@ -1,5 +1,9 @@
|
|||
|
||||
import asyncio
|
||||
import logging
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
LABEL="http://www.w3.org/2000/01/rdf-schema#label"
|
||||
|
||||
|
|
@ -22,12 +26,12 @@ class Query:
|
|||
async def get_vector(self, query):
|
||||
|
||||
if self.verbose:
|
||||
print("Compute embeddings...", flush=True)
|
||||
logger.debug("Computing embeddings...")
|
||||
|
||||
qembeds = await self.rag.embeddings_client.embed(query)
|
||||
|
||||
if self.verbose:
|
||||
print("Done.", flush=True)
|
||||
logger.debug("Done.")
|
||||
|
||||
return qembeds
|
||||
|
||||
|
|
@ -36,7 +40,7 @@ class Query:
|
|||
vectors = await self.get_vector(query)
|
||||
|
||||
if self.verbose:
|
||||
print("Get entities...", flush=True)
|
||||
logger.debug("Getting entities...")
|
||||
|
||||
entities = await self.rag.graph_embeddings_client.query(
|
||||
vectors=vectors, limit=self.entity_limit,
|
||||
|
|
@ -49,9 +53,9 @@ class Query:
|
|||
]
|
||||
|
||||
if self.verbose:
|
||||
print("Entities:", flush=True)
|
||||
logger.debug("Entities:")
|
||||
for ent in entities:
|
||||
print(" ", ent, flush=True)
|
||||
logger.debug(f" {ent}")
|
||||
|
||||
return entities
|
||||
|
||||
|
|
@ -126,7 +130,7 @@ class Query:
|
|||
entities = await self.get_entities(query)
|
||||
|
||||
if self.verbose:
|
||||
print("Get subgraph...", flush=True)
|
||||
logger.debug("Getting subgraph...")
|
||||
|
||||
subgraph = set()
|
||||
|
||||
|
|
@ -157,12 +161,12 @@ class Query:
|
|||
sg2 = sg2[0:self.max_subgraph_size]
|
||||
|
||||
if self.verbose:
|
||||
print("Subgraph:", flush=True)
|
||||
logger.debug("Subgraph:")
|
||||
for edge in sg2:
|
||||
print(" ", str(edge), flush=True)
|
||||
logger.debug(f" {str(edge)}")
|
||||
|
||||
if self.verbose:
|
||||
print("Done.", flush=True)
|
||||
logger.debug("Done.")
|
||||
|
||||
return sg2
|
||||
|
||||
|
|
@ -183,7 +187,7 @@ class GraphRag:
|
|||
self.label_cache = {}
|
||||
|
||||
if self.verbose:
|
||||
print("Initialised", flush=True)
|
||||
logger.debug("GraphRag initialized")
|
||||
|
||||
async def query(
|
||||
self, query, user = "trustgraph", collection = "default",
|
||||
|
|
@ -192,7 +196,7 @@ class GraphRag:
|
|||
):
|
||||
|
||||
if self.verbose:
|
||||
print("Construct prompt...", flush=True)
|
||||
logger.debug("Constructing prompt...")
|
||||
|
||||
q = Query(
|
||||
rag = self, user = user, collection = collection,
|
||||
|
|
@ -205,14 +209,14 @@ class GraphRag:
|
|||
kg = await q.get_labelgraph(query)
|
||||
|
||||
if self.verbose:
|
||||
print("Invoke LLM...", flush=True)
|
||||
print(kg)
|
||||
print(query)
|
||||
logger.debug("Invoking LLM...")
|
||||
logger.debug(f"Knowledge graph: {kg}")
|
||||
logger.debug(f"Query: {query}")
|
||||
|
||||
resp = await self.prompt_client.kg_prompt(query, kg)
|
||||
|
||||
if self.verbose:
|
||||
print("Done", flush=True)
|
||||
logger.debug("Query processing complete")
|
||||
|
||||
return resp
|
||||
|
||||
|
|
|
|||
|
|
@ -4,12 +4,16 @@ Simple RAG service, performs query using graph RAG an LLM.
|
|||
Input is query, output is response.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from ... schema import GraphRagQuery, GraphRagResponse, Error
|
||||
from . graph_rag import GraphRag
|
||||
from ... base import FlowProcessor, ConsumerSpec, ProducerSpec
|
||||
from ... base import PromptClientSpec, EmbeddingsClientSpec
|
||||
from ... base import GraphEmbeddingsClientSpec, TriplesClientSpec
|
||||
|
||||
# Module logger
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
default_ident = "graph-rag"
|
||||
default_concurrency = 1
|
||||
|
||||
|
|
@ -102,7 +106,7 @@ class Processor(FlowProcessor):
|
|||
# Sender-produced ID
|
||||
id = msg.properties()["id"]
|
||||
|
||||
print(f"Handling input {id}...", flush=True)
|
||||
logger.info(f"Handling input {id}...")
|
||||
|
||||
if v.entity_limit:
|
||||
entity_limit = v.entity_limit
|
||||
|
|
@ -139,13 +143,13 @@ class Processor(FlowProcessor):
|
|||
properties = {"id": id}
|
||||
)
|
||||
|
||||
print("Done.", flush=True)
|
||||
logger.info("Request processing complete")
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print(f"Exception: {e}")
|
||||
logger.error(f"Graph RAG service exception: {e}", exc_info=True)
|
||||
|
||||
print("Send error response...", flush=True)
|
||||
logger.debug("Sending error response...")
|
||||
|
||||
await flow("response").send(
|
||||
GraphRagResponse(
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue