From ad4a1d07b2a7a8d5ba45cb82a7846fde5a0db887 Mon Sep 17 00:00:00 2001 From: alpha-nerd-nomyo Date: Fri, 27 Feb 2026 16:39:27 +0100 Subject: [PATCH] fix(/v1/embeddings): returning the async_gen forced FastAPI serialization which caused Pydantic Errors. Also sanizted nan/inf values to floats (0.0). Use try - finally to properly decrement usage counters in case of error. --- router.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/router.py b/router.py index 1ef096a..ec495aa 100644 --- a/router.py +++ b/router.py @@ -2,11 +2,11 @@ title: NOMYO Router - an Ollama Proxy with Endpoint:Model aware routing author: alpha-nerd-nomyo author_url: https://github.com/nomyo-ai -version: 0.6 +version: 0.7 license: AGPL """ # ------------------------------------------------------------- -import orjson, time, asyncio, yaml, ollama, openai, os, re, aiohttp, ssl, random, base64, io, enhance, secrets +import orjson, time, asyncio, yaml, ollama, openai, os, re, aiohttp, ssl, random, base64, io, enhance, secrets, math try: import truststore; truststore.inject_into_ssl() except ImportError: @@ -2637,13 +2637,16 @@ async def openai_embedding_proxy(request: Request): oclient = openai.AsyncOpenAI(base_url=base_url, default_headers=default_headers, api_key=api_key) - # 3. Async generator that streams embedding data and decrements the counter - async_gen = await oclient.embeddings.create(input=doc, model=model) - - await decrement_usage(endpoint, tracking_model) - - # 5. Return a StreamingResponse backed by the generator - return async_gen + try: + async_gen = await oclient.embeddings.create(input=doc, model=model) + result = async_gen.model_dump() + for item in result.get("data", []): + emb = item.get("embedding") + if emb: + item["embedding"] = [0.0 if isinstance(v, float) and not math.isfinite(v) else v for v in emb] + return JSONResponse(content=result) + finally: + await decrement_usage(endpoint, tracking_model) # ------------------------------------------------------------- # 22. API route – OpenAI compatible Chat Completions