2024-11-07 22:11:00 -06:00
|
|
|
import os
|
2024-10-08 12:40:24 -07:00
|
|
|
|
2024-12-04 16:41:30 -08:00
|
|
|
from app.commons.globals import handler_map
|
2024-12-05 11:00:22 -08:00
|
|
|
from app.model_handler.base_handler import ChatMessage
|
2024-12-04 16:41:30 -08:00
|
|
|
from app.model_handler.guardrails import GuardRequest
|
|
|
|
|
|
2024-12-05 15:19:41 -08:00
|
|
|
from fastapi import FastAPI, Response
|
2024-11-07 22:11:00 -06:00
|
|
|
from opentelemetry import trace
|
|
|
|
|
from opentelemetry.sdk.trace import TracerProvider
|
|
|
|
|
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
|
|
|
|
|
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
|
|
|
|
|
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
|
|
|
|
from opentelemetry.sdk.resources import Resource
|
|
|
|
|
|
2024-12-04 16:41:30 -08:00
|
|
|
|
2024-11-07 22:11:00 -06:00
|
|
|
resource = Resource.create(
|
|
|
|
|
{
|
|
|
|
|
"service.name": "model-server",
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Initialize the tracer provider
|
|
|
|
|
trace.set_tracer_provider(TracerProvider(resource=resource))
|
|
|
|
|
tracer = trace.get_tracer(__name__)
|
|
|
|
|
|
|
|
|
|
|
2024-07-18 14:04:51 -07:00
|
|
|
app = FastAPI()
|
|
|
|
|
|
2024-11-07 22:11:00 -06:00
|
|
|
FastAPIInstrumentor().instrument_app(app)
|
|
|
|
|
|
|
|
|
|
# DEFAULT_OTLP_HOST = "http://localhost:4317"
|
|
|
|
|
DEFAULT_OTLP_HOST = "none"
|
|
|
|
|
|
|
|
|
|
# Configure the OTLP exporter (Jaeger, Zipkin, etc.)
|
|
|
|
|
otlp_exporter = OTLPSpanExporter(
|
|
|
|
|
endpoint=os.getenv("OTLP_HOST", DEFAULT_OTLP_HOST) # noqa: F821
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
trace.get_tracer_provider().add_span_processor(BatchSpanProcessor(otlp_exporter))
|
|
|
|
|
|
2024-10-09 11:25:07 -07:00
|
|
|
|
2024-07-30 16:23:23 -07:00
|
|
|
@app.get("/healthz")
|
|
|
|
|
async def healthz():
|
2024-09-23 12:07:31 -07:00
|
|
|
return {"status": "ok"}
|
|
|
|
|
|
2024-10-07 15:21:05 -07:00
|
|
|
|
2024-07-18 14:04:51 -07:00
|
|
|
@app.get("/models")
|
|
|
|
|
async def models():
|
2024-10-09 18:04:52 -07:00
|
|
|
return {
|
|
|
|
|
"object": "list",
|
2024-12-04 16:41:30 -08:00
|
|
|
"data": [{"id": model_name, "object": "model"} for model_name in handler_map],
|
2024-10-09 18:04:52 -07:00
|
|
|
}
|
2024-07-18 14:04:51 -07:00
|
|
|
|
|
|
|
|
|
2024-12-04 16:41:30 -08:00
|
|
|
@app.post("/function_calling")
|
2024-12-05 15:19:41 -08:00
|
|
|
async def function_calling(req: ChatMessage, res: Response):
|
2024-12-04 16:41:30 -08:00
|
|
|
try:
|
2024-12-05 15:19:41 -08:00
|
|
|
intent_response = await handler_map["Arch-Intent"].chat_completion(req)
|
2024-09-23 12:07:31 -07:00
|
|
|
|
2024-12-05 15:19:41 -08:00
|
|
|
if handler_map["Arch-Intent"].detect_intent(intent_response):
|
|
|
|
|
# [TODO] measure agreement between intent detection and function calling
|
2024-12-04 16:41:30 -08:00
|
|
|
try:
|
2024-12-05 15:19:41 -08:00
|
|
|
function_calling_response = await handler_map[
|
|
|
|
|
"Arch-Function"
|
|
|
|
|
].chat_completion(req)
|
|
|
|
|
return function_calling_response
|
2024-12-04 16:41:30 -08:00
|
|
|
except Exception as e:
|
2024-12-05 15:19:41 -08:00
|
|
|
# [TODO] Review: update how to collect debugging outputs
|
2024-12-04 16:41:30 -08:00
|
|
|
# logger.error(f"Error in chat_completion from `Arch-Function`: {e}")
|
|
|
|
|
res.status_code = 500
|
|
|
|
|
return {"error": f"[Arch-Function] - {e}"}
|
2024-12-06 14:14:44 -08:00
|
|
|
# [TODO] Review: define the behavior if `Arch-Intent` doesn't detect an intent
|
|
|
|
|
# else:
|
2024-09-16 19:20:07 -07:00
|
|
|
|
2024-12-04 16:41:30 -08:00
|
|
|
except Exception as e:
|
2024-12-05 15:19:41 -08:00
|
|
|
# [TODO] Review: update how to collect debugging outputs
|
2024-12-04 16:41:30 -08:00
|
|
|
# logger.error(f"Error in chat_completion from `Arch-Intent`: {e}")
|
|
|
|
|
res.status_code = 500
|
|
|
|
|
return {"error": f"[Arch-Intent] - {e}"}
|
2024-10-04 11:05:25 -07:00
|
|
|
|
|
|
|
|
|
2024-12-04 16:41:30 -08:00
|
|
|
@app.post("/guardrails")
|
|
|
|
|
async def guardrails(req: GuardRequest, res: Response, max_num_words=300):
|
2024-11-07 11:59:29 -08:00
|
|
|
try:
|
2024-12-04 16:41:30 -08:00
|
|
|
guard_result = handler_map["Arch-Guard"].predict(req)
|
|
|
|
|
return guard_result
|
2024-11-07 11:59:29 -08:00
|
|
|
except Exception as e:
|
2024-12-05 15:19:41 -08:00
|
|
|
# [TODO] Review: update how to collect debugging outputs
|
2024-11-07 11:59:29 -08:00
|
|
|
res.status_code = 500
|
2024-12-04 16:41:30 -08:00
|
|
|
return {"error": f"[Arch-Guard] - {e}"}
|