nomyo-router/backends/normalize.py

"""Endpoint URL, model-name, and endpoint-classification helpers.

The endpoint classifiers read live config via ``get_config()`` so that the
startup-time rebind of ``config`` in router.py is picked up at call time.
"""
from config import get_config


def _normalize_llama_model_name(name: str) -> str:
    """Extract the model name from a huggingface-style identifier.
    e.g. 'unsloth/gpt-oss-20b-GGUF:F16' -> 'gpt-oss-20b-GGUF'
    """
    if "/" in name:
        name = name.rsplit("/", 1)[1]
    if ":" in name:
        name = name.split(":")[0]
    return name


def _extract_llama_quant(name: str) -> str:
    """Extract the quantization level from a huggingface-style identifier.
    e.g. 'unsloth/gpt-oss-20b-GGUF:Q8_0' -> 'Q8_0'
    Returns empty string if no quant suffix is present.
    """
    if ":" in name:
        return name.rsplit(":", 1)[1]
    return ""


def ep2base(ep):
    if "/v1" in ep:
        base_url = ep
    else:
        base_url = ep + "/v1"
    return base_url


def dedupe_on_keys(dicts, key_fields):
    """
    Helper function to deduplicate endpoint details based on given dict keys.
    """
    seen = set()
    out = []
    for d in dicts:
        # Build a tuple of the values for the chosen keys
        key = tuple(d.get(k) for k in key_fields)
        if key not in seen:
            seen.add(key)
            out.append(d)
    return out


def is_llama_swap(endpoint: str) -> bool:
    """True if the endpoint is a configured llama-swap front."""
    return endpoint in get_config().llama_swap_endpoints


def is_llama_server(endpoint: str) -> bool:
    """True for a llama.cpp llama-server OR a llama-swap front.

    Both speak the same OpenAI-compatible surface, so the router treats them
    identically everywhere except loaded-model detection and model unload.
    """
    cfg = get_config()
    return endpoint in cfg.llama_server_endpoints or endpoint in cfg.llama_swap_endpoints


def llama_endpoints(cfg) -> list:
    """Combined, de-duplicated llama-server + llama-swap endpoints (order preserved)."""
    return list(dict.fromkeys([*cfg.llama_server_endpoints, *cfg.llama_swap_endpoints]))


def is_ext_openai_endpoint(endpoint: str) -> bool:
    """
    Determine if an endpoint is an external OpenAI-compatible endpoint (not Ollama, llama-server or llama-swap).

    Returns True for:
    - External services like OpenAI.com, Groq, etc.

    Returns False for:
    - Ollama endpoints (without /v1, or with /v1 but default port 11434)
    - llama-server / llama-swap endpoints (explicitly configured)
    """
    # Check if it's a llama-server / llama-swap endpoint (has /v1 and is in a configured list)
    if is_llama_server(endpoint):
        return False

    if "/v1" not in endpoint:
        return False

    base_endpoint = endpoint.replace('/v1', '')
    if base_endpoint in get_config().endpoints:
        return False  # It's Ollama's /v1

    # Check for default Ollama port
    if ':11434' in endpoint:
        return False  # It's Ollama

    return True  # It's an external OpenAI endpoint


def is_openai_compatible(endpoint: str) -> bool:
    """
    Return True if the endpoint speaks the OpenAI API (not native Ollama).
    This includes external OpenAI endpoints AND llama-server / llama-swap endpoints.
    """
    return "/v1" in endpoint or is_llama_server(endpoint)


def get_tracking_model(endpoint: str, model: str) -> str:
    """
    Normalize model name for tracking purposes so it matches the PS table key.

    - For llama-server endpoints: strips HF prefix and quantization suffix
    - For Ollama endpoints: appends ":latest" if no version suffix is present
    - For external OpenAI endpoints: returns as-is (not shown in PS)

    This ensures consistent model naming across all routes for usage tracking.
    """
    # External OpenAI endpoints are not shown in PS, keep as-is
    if is_ext_openai_endpoint(endpoint):
        return model

    # llama-server / llama-swap endpoints use normalized names in PS
    if is_llama_server(endpoint):
        return _normalize_llama_model_name(model)

    # Ollama endpoints: append ":latest" if no version suffix
    if ":" not in model:
        return model + ":latest"

    return model