fix(/v1/embeddings): returning the async_gen forced FastAPI serialization which caused Pydantic Errors. Also sanizted nan/inf values to floats (0.0).

Use try - finally to properly decrement usage counters in case of error.
This commit is contained in:
Alpha Nerd 2026-02-27 16:39:27 +01:00
parent d2ea65f74a
commit ad4a1d07b2

View file

@ -2,11 +2,11 @@
title: NOMYO Router - an Ollama Proxy with Endpoint:Model aware routing title: NOMYO Router - an Ollama Proxy with Endpoint:Model aware routing
author: alpha-nerd-nomyo author: alpha-nerd-nomyo
author_url: https://github.com/nomyo-ai author_url: https://github.com/nomyo-ai
version: 0.6 version: 0.7
license: AGPL license: AGPL
""" """
# ------------------------------------------------------------- # -------------------------------------------------------------
import orjson, time, asyncio, yaml, ollama, openai, os, re, aiohttp, ssl, random, base64, io, enhance, secrets import orjson, time, asyncio, yaml, ollama, openai, os, re, aiohttp, ssl, random, base64, io, enhance, secrets, math
try: try:
import truststore; truststore.inject_into_ssl() import truststore; truststore.inject_into_ssl()
except ImportError: except ImportError:
@ -2637,14 +2637,17 @@ async def openai_embedding_proxy(request: Request):
oclient = openai.AsyncOpenAI(base_url=base_url, default_headers=default_headers, api_key=api_key) oclient = openai.AsyncOpenAI(base_url=base_url, default_headers=default_headers, api_key=api_key)
# 3. Async generator that streams embedding data and decrements the counter try:
async_gen = await oclient.embeddings.create(input=doc, model=model) async_gen = await oclient.embeddings.create(input=doc, model=model)
result = async_gen.model_dump()
for item in result.get("data", []):
emb = item.get("embedding")
if emb:
item["embedding"] = [0.0 if isinstance(v, float) and not math.isfinite(v) else v for v in emb]
return JSONResponse(content=result)
finally:
await decrement_usage(endpoint, tracking_model) await decrement_usage(endpoint, tracking_model)
# 5. Return a StreamingResponse backed by the generator
return async_gen
# ------------------------------------------------------------- # -------------------------------------------------------------
# 22. API route OpenAI compatible Chat Completions # 22. API route OpenAI compatible Chat Completions
# ------------------------------------------------------------- # -------------------------------------------------------------