From 6381dd09c3becf99a0653b8ce32514238fd75510 Mon Sep 17 00:00:00 2001 From: alpha-nerd-nomyo Date: Sat, 13 Sep 2025 11:24:28 +0200 Subject: [PATCH] starting an openai2ollama client translation layer with rechunking class --- router.py | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 4 deletions(-) diff --git a/router.py b/router.py index 19043fe..4c568d2 100644 --- a/router.py +++ b/router.py @@ -6,7 +6,7 @@ version: 0.3 license: AGPL """ # ------------------------------------------------------------- -import json, time, asyncio, yaml, ollama, openai, os, re, aiohttp, ssl +import json, time, asyncio, yaml, ollama, openai, os, re, aiohttp, ssl, datetime from pathlib import Path from typing import Dict, Set, List, Optional from fastapi import FastAPI, Request, HTTPException @@ -257,6 +257,31 @@ async def decrement_usage(endpoint: str, model: str) -> None: # usage_counts.pop(endpoint, None) await publish_snapshot() +def iso8601_ns(): + ns_since_epoch = time.time_ns() + dt = datetime.datetime.fromtimestamp( + ns_since_epoch / 1_000_000_000, # seconds + tz=datetime.timezone.utc + ) + iso8601_with_ns = ( + dt.strftime("%Y-%m-%dT%H:%M:%S.") + f"{ns_since_epoch % 1_000_000_000:09d}Z" + ) + return iso8601_with_ns + +class rechunk: + def openai_chat_completion2ollama(chunk): + chunk = { "model": chunk.model, + "created_at": iso8601_ns() , + "done_reason": chunk.choices[0].finish_reason, + "load_duration": None, + "prompt_eval_count": None, + "prompt_eval_duration": None, + "eval_count": None, + "eval_duration": None, + "message": {"role": chunk.choices[0].delta.role, "content": chunk.choices[0].delta.content, "thinking": None, "images": None, "tool_name": None, "tool_calls": None}, + } + return chunk + # ------------------------------------------------------------------ # SSE Helpser # ------------------------------------------------------------------ @@ -473,7 +498,7 @@ async def chat_proxy(request: Request): ) if not isinstance(messages, list): raise HTTPException( - status_code=400, detail="Missing or invalid 'message' field (must be a list)" + status_code=400, detail="Missing or invalid 'messages' field (must be a list)" ) except json.JSONDecodeError as e: raise HTTPException(status_code=400, detail=f"Invalid JSON: {e}") from e @@ -481,20 +506,41 @@ async def chat_proxy(request: Request): # 2. Endpoint logic endpoint = await choose_endpoint(model) await increment_usage(endpoint, model) - client = ollama.AsyncClient(host=endpoint) + if "/v1" in endpoint: + params = { + "messages": messages, + "model": model, + } + + optional_params = { + "tools": tools, + "stream": stream, + } + + params.update({k: v for k, v in optional_params.items() if v is not None}) + oclient = openai.AsyncOpenAI(base_url=endpoint, api_key=config.api_keys[endpoint]) + else: + client = ollama.AsyncClient(host=endpoint) # 3. Async generator that streams chat data and decrements the counter async def stream_chat_response(): try: # The chat method returns a generator of dicts (or GenerateResponse) - async_gen = await client.chat(model=model, messages=messages, tools=tools, stream=stream, think=think, format=format, options=options, keep_alive=keep_alive) + if "/v1" in endpoint: + async_gen = await oclient.chat.completions.create(**params) + else: + async_gen = await client.chat(model=model, messages=messages, tools=tools, stream=stream, think=think, format=format, options=options, keep_alive=keep_alive) if stream == True: async for chunk in async_gen: + if "/v1" in endpoint: + print(chunk) + chunk = rechunk.openai_chat_completion2ollama(chunk) # `chunk` can be a dict or a pydantic model – dump to JSON safely if hasattr(chunk, "model_dump_json"): json_line = chunk.model_dump_json() else: json_line = json.dumps(chunk) + print(json_line) yield json_line.encode("utf-8") + b"\n" else: json_line = (