From c1e142f55f1d8369d9564093a99593b1bccd6a52 Mon Sep 17 00:00:00 2001 From: Adil Hafeez Date: Thu, 11 Sep 2025 11:35:51 -0700 Subject: [PATCH] push demo changes --- demos/use_cases/rag_agent/arch_config.yaml | 15 ++- demos/use_cases/rag_agent/pyproject.toml | 2 + .../rag_agent/src/rag_agent/__init__.py | 17 +++- .../rag_agent/src/rag_agent/__main__.py | 4 + .../rag_agent/src/rag_agent/query_parser.py | 92 ++++++++++++++++++- demos/use_cases/rag_agent/uv.lock | 18 ++++ 6 files changed, 136 insertions(+), 12 deletions(-) create mode 100644 demos/use_cases/rag_agent/src/rag_agent/__main__.py diff --git a/demos/use_cases/rag_agent/arch_config.yaml b/demos/use_cases/rag_agent/arch_config.yaml index cf993e73..b75bdc0c 100644 --- a/demos/use_cases/rag_agent/arch_config.yaml +++ b/demos/use_cases/rag_agent/arch_config.yaml @@ -3,19 +3,16 @@ version: v0.2.0 agents: - name: query_rewriter kind: openai - endpoint: openai://localhost:10500 + endpoint: http://host.docker.internal:10500 - name: context_builder kind: openai - endpoint: openai://localhost:10501 + endpoint: http://host.docker.internal:10501 - name: response_generator kind: openai - endpoint: openai://localhost:10502 + endpoint: http://host.docker.internal:10502 - name: research_agent kind: openai - endpoint: https://localhost:10500 - - name: input_guard_rails - kind: openai - endpoint: https://localhost:10503 + endpoint: http://host.docker.internal:10503 listeners: - name: rag agent @@ -32,9 +29,9 @@ listeners: filter_chain: - research_agent - response_generator - port: 8000 + port: 8001 - - name: llm_provider + - name: egress_traffic description: llm provider configuration port: 12000 protocol: openai diff --git a/demos/use_cases/rag_agent/pyproject.toml b/demos/use_cases/rag_agent/pyproject.toml index bc0cab81..3ca7faea 100644 --- a/demos/use_cases/rag_agent/pyproject.toml +++ b/demos/use_cases/rag_agent/pyproject.toml @@ -9,6 +9,8 @@ dependencies = [ "mcp>=1.13.1", "fastmcp>=2.12.2", "pydantic>=2.11.7", + "fastapi>=0.104.1", + "uvicorn>=0.24.0", ] [project.scripts] diff --git a/demos/use_cases/rag_agent/src/rag_agent/__init__.py b/demos/use_cases/rag_agent/src/rag_agent/__init__.py index b26f3e20..429be7b0 100644 --- a/demos/use_cases/rag_agent/src/rag_agent/__init__.py +++ b/demos/use_cases/rag_agent/src/rag_agent/__init__.py @@ -9,7 +9,22 @@ mcp = None @click.option("--host", "host", default="localhost") @click.option("--port", "port", default=10101) @click.option("--agent", "agent", default=None) -def main(host, port, agent, transport): +@click.option( + "--rest-server", + "rest_server", + is_flag=True, + help="Start REST server instead of MCP server", +) +@click.option("--rest-port", "rest_port", default=8000, help="Port for REST server") +def main(host, port, agent, transport, rest_server, rest_port): + if rest_server: + print(f"Starting REST server on {host}:{rest_port}") + + from rag_agent.query_parser import start_server + + start_server(host=host, port=rest_port) + return + print(f"Starting agent(s): {agent if agent else 'all'}") global mcp mcp = FastMCP("RAG Agent Demo", host=host, port=port) diff --git a/demos/use_cases/rag_agent/src/rag_agent/__main__.py b/demos/use_cases/rag_agent/src/rag_agent/__main__.py new file mode 100644 index 00000000..868d99ef --- /dev/null +++ b/demos/use_cases/rag_agent/src/rag_agent/__main__.py @@ -0,0 +1,4 @@ +from . import main + +if __name__ == "__main__": + main() diff --git a/demos/use_cases/rag_agent/src/rag_agent/query_parser.py b/demos/use_cases/rag_agent/src/rag_agent/query_parser.py index 1bc54548..81f87063 100644 --- a/demos/use_cases/rag_agent/src/rag_agent/query_parser.py +++ b/demos/use_cases/rag_agent/src/rag_agent/query_parser.py @@ -1,5 +1,34 @@ from pydantic import BaseModel -from . import mcp +from typing import List, Optional, Dict, Any +from fastapi import FastAPI, HTTPException +import uvicorn + + +# OpenAI Chat Completions API models +class ChatMessage(BaseModel): + role: str + content: str + + +class ChatCompletionRequest(BaseModel): + model: str + messages: List[ChatMessage] + temperature: Optional[float] = 1.0 + max_tokens: Optional[int] = None + top_p: Optional[float] = 1.0 + frequency_penalty: Optional[float] = 0.0 + presence_penalty: Optional[float] = 0.0 + stream: Optional[bool] = False + stop: Optional[List[str]] = None + + +class ChatCompletionResponse(BaseModel): + id: str + object: str = "chat.completion" + created: int + model: str + choices: List[Dict[str, Any]] + usage: Dict[str, int] class Response(BaseModel): @@ -7,7 +36,66 @@ class Response(BaseModel): metadata: dict -@mcp.tool() +# FastAPI app for REST server +app = FastAPI(title="RAG Agent Query Parser", version="1.0.0") + + +@app.post("/v1/chat/completions") +async def chat_completions(request: ChatCompletionRequest): + """Chat completions endpoint that passes through the request as-is.""" + import time + import uuid + + # Pass-through: return the last user message as the assistant response + last_user_message = "" + for message in reversed(request.messages): + if message.role == "user": + last_user_message = message.content + break + + response = ChatCompletionResponse( + id=f"chatcmpl-{uuid.uuid4().hex[:8]}", + created=int(time.time()), + model=request.model, + choices=[ + { + "index": 0, + "message": {"role": "assistant", "content": last_user_message}, + "finish_reason": "stop", + } + ], + usage={ + "prompt_tokens": sum(len(msg.content.split()) for msg in request.messages), + "completion_tokens": len(last_user_message.split()), + "total_tokens": sum(len(msg.content.split()) for msg in request.messages) + + len(last_user_message.split()), + }, + ) + + return response + + +@app.get("/health") +async def health_check(): + """Health check endpoint.""" + return {"status": "healthy"} + + def parse_query(query): """Parse the user query and returns metadata extracted from query.""" return Response(query=query, metadata={"is_valid": True}) + + +# Register MCP tool only if mcp is available +try: + from . import mcp + + if mcp is not None: + mcp.tool()(parse_query) +except (ImportError, AttributeError): + pass + + +def start_server(host: str = "localhost", port: int = 8000): + """Start the REST server.""" + uvicorn.run(app, host=host, port=port) diff --git a/demos/use_cases/rag_agent/uv.lock b/demos/use_cases/rag_agent/uv.lock index cba02e43..d17d0533 100644 --- a/demos/use_cases/rag_agent/uv.lock +++ b/demos/use_cases/rag_agent/uv.lock @@ -312,6 +312,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/36/f4/c6e662dade71f56cd2f3735141b265c3c79293c109549c1e6933b0651ffc/exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10", size = 16674 }, ] +[[package]] +name = "fastapi" +version = "0.116.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, + { name = "starlette" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/78/d7/6c8b3bfe33eeffa208183ec037fee0cce9f7f024089ab1c5d12ef04bd27c/fastapi-0.116.1.tar.gz", hash = "sha256:ed52cbf946abfd70c5a0dccb24673f0670deeb517a88b3544d03c2a6bf283143", size = 296485 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/47/d63c60f59a59467fda0f93f46335c9d18526d7071f025cb5b89d5353ea42/fastapi-0.116.1-py3-none-any.whl", hash = "sha256:c46ac7c312df840f0c9e220f7964bada936781bc4e2e6eb71f1c4d7553786565", size = 95631 }, +] + [[package]] name = "fastmcp" version = "2.12.2" @@ -909,17 +923,21 @@ version = "0.1.0" source = { editable = "." } dependencies = [ { name = "click" }, + { name = "fastapi" }, { name = "fastmcp" }, { name = "mcp" }, { name = "pydantic" }, + { name = "uvicorn" }, ] [package.metadata] requires-dist = [ { name = "click", specifier = ">=8.2.1" }, + { name = "fastapi", specifier = ">=0.104.1" }, { name = "fastmcp", specifier = ">=2.12.2" }, { name = "mcp", specifier = ">=1.13.1" }, { name = "pydantic", specifier = ">=2.11.7" }, + { name = "uvicorn", specifier = ">=0.24.0" }, ] [[package]]