fix(/v1/embeddings): returning the async_gen forced FastAPI serialization which caused Pydantic Errors. Also sanizted nan/inf values to floats (0.0).
Use try - finally to properly decrement usage counters in case of error.
This commit is contained in:
parent
d2ea65f74a
commit
ad4a1d07b2
1 changed files with 12 additions and 9 deletions
17
router.py
17
router.py
|
|
@ -2,11 +2,11 @@
|
||||||
title: NOMYO Router - an Ollama Proxy with Endpoint:Model aware routing
|
title: NOMYO Router - an Ollama Proxy with Endpoint:Model aware routing
|
||||||
author: alpha-nerd-nomyo
|
author: alpha-nerd-nomyo
|
||||||
author_url: https://github.com/nomyo-ai
|
author_url: https://github.com/nomyo-ai
|
||||||
version: 0.6
|
version: 0.7
|
||||||
license: AGPL
|
license: AGPL
|
||||||
"""
|
"""
|
||||||
# -------------------------------------------------------------
|
# -------------------------------------------------------------
|
||||||
import orjson, time, asyncio, yaml, ollama, openai, os, re, aiohttp, ssl, random, base64, io, enhance, secrets
|
import orjson, time, asyncio, yaml, ollama, openai, os, re, aiohttp, ssl, random, base64, io, enhance, secrets, math
|
||||||
try:
|
try:
|
||||||
import truststore; truststore.inject_into_ssl()
|
import truststore; truststore.inject_into_ssl()
|
||||||
except ImportError:
|
except ImportError:
|
||||||
|
|
@ -2637,14 +2637,17 @@ async def openai_embedding_proxy(request: Request):
|
||||||
|
|
||||||
oclient = openai.AsyncOpenAI(base_url=base_url, default_headers=default_headers, api_key=api_key)
|
oclient = openai.AsyncOpenAI(base_url=base_url, default_headers=default_headers, api_key=api_key)
|
||||||
|
|
||||||
# 3. Async generator that streams embedding data and decrements the counter
|
try:
|
||||||
async_gen = await oclient.embeddings.create(input=doc, model=model)
|
async_gen = await oclient.embeddings.create(input=doc, model=model)
|
||||||
|
result = async_gen.model_dump()
|
||||||
|
for item in result.get("data", []):
|
||||||
|
emb = item.get("embedding")
|
||||||
|
if emb:
|
||||||
|
item["embedding"] = [0.0 if isinstance(v, float) and not math.isfinite(v) else v for v in emb]
|
||||||
|
return JSONResponse(content=result)
|
||||||
|
finally:
|
||||||
await decrement_usage(endpoint, tracking_model)
|
await decrement_usage(endpoint, tracking_model)
|
||||||
|
|
||||||
# 5. Return a StreamingResponse backed by the generator
|
|
||||||
return async_gen
|
|
||||||
|
|
||||||
# -------------------------------------------------------------
|
# -------------------------------------------------------------
|
||||||
# 22. API route – OpenAI compatible Chat Completions
|
# 22. API route – OpenAI compatible Chat Completions
|
||||||
# -------------------------------------------------------------
|
# -------------------------------------------------------------
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue