comliance with ollama naming conventions and openai model['id']

2025-09-15 17:39:15 +02:00 · 2025-09-15 17:39:15 +02:00 · 4b5834d7df
commit 4b5834d7df
parent 733e215be2
1 changed files with 21 additions and 7 deletions
--- a/router.py
+++ b/router.py
@ -393,6 +393,15 @@ async def choose_endpoint(model: str) -> str:
    ]
    # 6️⃣ 
    if not candidate_endpoints:
        if ":latest" in model:  #ollama naming convention not applicable to openai
            model = model.split(":")
            model = model[0]
            print(model)
            candidate_endpoints = [
                ep for ep, models in zip(config.endpoints, advertised_sets)
                if model in models
            ]
        if not candidate_endpoints:
            raise RuntimeError(
                f"None of the configured endpoints ({', '.join(config.endpoints)}) "
@ -472,9 +481,11 @@ async def proxy(request: Request):
    endpoint = await choose_endpoint(model)
    await increment_usage(endpoint, model)
    is_openai_endpoint = "/v1" in endpoint
    if is_openai_endpoint:
        if ":latest" in model:
            model = model.split(":")
            model = model[0]
        params = {
            "prompt": prompt, 
            "model": model,
@ -488,6 +499,7 @@ async def proxy(request: Request):
        oclient = openai.AsyncOpenAI(base_url=endpoint, api_key=config.api_keys[endpoint])
    else:
        client = ollama.AsyncClient(host=endpoint)
    await increment_usage(endpoint, model)
    # 4. Async generator that streams data and decrements the counter
    async def stream_generate_response():
@ -564,9 +576,11 @@ async def chat_proxy(request: Request):
    # 2. Endpoint logic
    endpoint = await choose_endpoint(model)
    await increment_usage(endpoint, model)
    is_openai_endpoint = "/v1" in endpoint
    if is_openai_endpoint:
        if ":latest" in model:
            model = model.split(":")
            model = model[0]
        params = {
            "messages": messages, 
            "model": model,
@ -581,7 +595,7 @@ async def chat_proxy(request: Request):
        oclient = openai.AsyncOpenAI(base_url=endpoint, api_key=config.api_keys[endpoint])
    else:
        client = ollama.AsyncClient(host=endpoint)
-
+    await increment_usage(endpoint, model)
    # 3. Async generator that streams chat data and decrements the counter
    async def stream_chat_response():
        try: