"""Endpoint URL, model-name, and endpoint-classification helpers. The endpoint classifiers read live config via ``get_config()`` so that the startup-time rebind of ``config`` in router.py is picked up at call time. """ from config import get_config def _normalize_llama_model_name(name: str) -> str: """Extract the model name from a huggingface-style identifier. e.g. 'unsloth/gpt-oss-20b-GGUF:F16' -> 'gpt-oss-20b-GGUF' """ if "/" in name: name = name.rsplit("/", 1)[1] if ":" in name: name = name.split(":")[0] return name def _extract_llama_quant(name: str) -> str: """Extract the quantization level from a huggingface-style identifier. e.g. 'unsloth/gpt-oss-20b-GGUF:Q8_0' -> 'Q8_0' Returns empty string if no quant suffix is present. """ if ":" in name: return name.rsplit(":", 1)[1] return "" def ep2base(ep): if "/v1" in ep: base_url = ep else: base_url = ep + "/v1" return base_url def dedupe_on_keys(dicts, key_fields): """ Helper function to deduplicate endpoint details based on given dict keys. """ seen = set() out = [] for d in dicts: # Build a tuple of the values for the chosen keys key = tuple(d.get(k) for k in key_fields) if key not in seen: seen.add(key) out.append(d) return out def is_llama_swap(endpoint: str) -> bool: """True if the endpoint is a configured llama-swap front.""" return endpoint in get_config().llama_swap_endpoints def is_llama_server(endpoint: str) -> bool: """True for a llama.cpp llama-server OR a llama-swap front. Both speak the same OpenAI-compatible surface, so the router treats them identically everywhere except loaded-model detection and model unload. """ cfg = get_config() return endpoint in cfg.llama_server_endpoints or endpoint in cfg.llama_swap_endpoints def llama_endpoints(cfg) -> list: """Combined, de-duplicated llama-server + llama-swap endpoints (order preserved).""" return list(dict.fromkeys([*cfg.llama_server_endpoints, *cfg.llama_swap_endpoints])) def is_ext_openai_endpoint(endpoint: str) -> bool: """ Determine if an endpoint is an external OpenAI-compatible endpoint (not Ollama, llama-server or llama-swap). Returns True for: - External services like OpenAI.com, Groq, etc. Returns False for: - Ollama endpoints (without /v1, or with /v1 but default port 11434) - llama-server / llama-swap endpoints (explicitly configured) """ # Check if it's a llama-server / llama-swap endpoint (has /v1 and is in a configured list) if is_llama_server(endpoint): return False if "/v1" not in endpoint: return False base_endpoint = endpoint.replace('/v1', '') if base_endpoint in get_config().endpoints: return False # It's Ollama's /v1 # Check for default Ollama port if ':11434' in endpoint: return False # It's Ollama return True # It's an external OpenAI endpoint def is_openai_compatible(endpoint: str) -> bool: """ Return True if the endpoint speaks the OpenAI API (not native Ollama). This includes external OpenAI endpoints AND llama-server / llama-swap endpoints. """ return "/v1" in endpoint or is_llama_server(endpoint) def get_tracking_model(endpoint: str, model: str) -> str: """ Normalize model name for tracking purposes so it matches the PS table key. - For llama-server endpoints: strips HF prefix and quantization suffix - For Ollama endpoints: appends ":latest" if no version suffix is present - For external OpenAI endpoints: returns as-is (not shown in PS) This ensures consistent model naming across all routes for usage tracking. """ # External OpenAI endpoints are not shown in PS, keep as-is if is_ext_openai_endpoint(endpoint): return model # llama-server / llama-swap endpoints use normalized names in PS if is_llama_server(endpoint): return _normalize_llama_model_name(model) # Ollama endpoints: append ":latest" if no version suffix if ":" not in model: return model + ":latest" return model