2026-05-19 12:05:51 +02:00
|
|
|
"""aiohttp / OpenAI client factories aware of Unix-socket endpoints.
|
|
|
|
|
|
|
|
|
|
Unix socket endpoints follow the ``.sock`` hostname convention (e.g.
|
|
|
|
|
``http://192.168.0.52.sock/v1``) and resolve to ``/run/user/<uid>/<host>``.
|
|
|
|
|
Their sessions/clients live in ``state.app_state`` so that startup can
|
|
|
|
|
populate them once and routes can reuse them.
|
|
|
|
|
"""
|
|
|
|
|
import os
|
|
|
|
|
|
|
|
|
|
import aiohttp
|
|
|
|
|
import openai
|
|
|
|
|
|
|
|
|
|
from state import app_state
|
|
|
|
|
from backends.normalize import ep2base
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _is_unix_socket_endpoint(endpoint: str) -> bool:
|
|
|
|
|
"""Return True if endpoint uses Unix socket (.sock hostname convention).
|
|
|
|
|
|
|
|
|
|
Detects URLs like http://192.168.0.52.sock/v1 where the host ends with
|
|
|
|
|
.sock, indicating the connection should use a Unix domain socket at
|
|
|
|
|
/tmp/<host> instead of TCP.
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
host = endpoint.split("//", 1)[1].split("/")[0].split(":")[0]
|
|
|
|
|
return host.endswith(".sock")
|
|
|
|
|
except IndexError:
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _get_socket_path(endpoint: str) -> str:
|
|
|
|
|
"""Derive Unix socket file path from a .sock endpoint URL.
|
|
|
|
|
|
|
|
|
|
http://192.168.0.52.sock/v1 -> /run/user/<uid>/192.168.0.52.sock
|
|
|
|
|
"""
|
|
|
|
|
host = endpoint.split("//", 1)[1].split("/")[0].split(":")[0]
|
|
|
|
|
return f"/run/user/{os.getuid()}/{host}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_session(endpoint: str) -> aiohttp.ClientSession:
|
|
|
|
|
"""Return the appropriate aiohttp session for the given endpoint.
|
|
|
|
|
|
|
|
|
|
Unix socket endpoints (.sock) get their own UnixConnector session.
|
|
|
|
|
All other endpoints share the main TCP session.
|
|
|
|
|
"""
|
|
|
|
|
if _is_unix_socket_endpoint(endpoint):
|
|
|
|
|
sess = app_state["socket_sessions"].get(endpoint)
|
|
|
|
|
if sess is not None:
|
|
|
|
|
return sess
|
|
|
|
|
return app_state["session"]
|
|
|
|
|
|
|
|
|
|
|
2026-05-28 09:54:53 +02:00
|
|
|
def get_probe_session(endpoint: str) -> aiohttp.ClientSession:
|
|
|
|
|
"""Return the session used for lightweight health/introspection probes.
|
|
|
|
|
|
|
|
|
|
Probes (available/loaded models, endpoint health) run on a connection
|
|
|
|
|
pool kept separate from the proxy/streaming session, so a burst of
|
|
|
|
|
long-lived completion requests cannot starve them — otherwise a probe
|
|
|
|
|
would queue waiting for a connection, hit its deadline, and mark a
|
|
|
|
|
perfectly healthy endpoint as unavailable under load.
|
|
|
|
|
|
|
|
|
|
Unix socket endpoints keep their dedicated per-endpoint session. TCP
|
|
|
|
|
endpoints use the shared probe session, falling back to the main
|
|
|
|
|
session when the probe pool has not been initialised (e.g. in tests).
|
|
|
|
|
"""
|
|
|
|
|
if _is_unix_socket_endpoint(endpoint):
|
|
|
|
|
sess = app_state["socket_sessions"].get(endpoint)
|
|
|
|
|
if sess is not None:
|
|
|
|
|
return sess
|
|
|
|
|
return app_state.get("probe_session") or app_state["session"]
|
|
|
|
|
|
|
|
|
|
|
2026-05-19 12:05:51 +02:00
|
|
|
def _make_openai_client(
|
|
|
|
|
endpoint: str,
|
|
|
|
|
default_headers: dict | None = None,
|
|
|
|
|
api_key: str = "no-key",
|
|
|
|
|
) -> openai.AsyncOpenAI:
|
|
|
|
|
"""Return an AsyncOpenAI client configured for the given endpoint.
|
|
|
|
|
|
|
|
|
|
For Unix socket endpoints, injects a pre-created httpx UDS transport
|
|
|
|
|
so the OpenAI SDK connects via the socket instead of TCP.
|
|
|
|
|
"""
|
|
|
|
|
base_url = ep2base(endpoint)
|
|
|
|
|
kwargs: dict = {"api_key": api_key}
|
|
|
|
|
if default_headers is not None:
|
|
|
|
|
kwargs["default_headers"] = default_headers
|
|
|
|
|
if _is_unix_socket_endpoint(endpoint):
|
|
|
|
|
http_client = app_state["httpx_clients"].get(endpoint)
|
|
|
|
|
if http_client is not None:
|
|
|
|
|
kwargs["http_client"] = http_client
|
|
|
|
|
base_url = "http://localhost/v1"
|
|
|
|
|
return openai.AsyncOpenAI(base_url=base_url, **kwargs)
|