"""aiohttp / OpenAI client factories aware of Unix-socket endpoints. Unix socket endpoints follow the ``.sock`` hostname convention (e.g. ``http://192.168.0.52.sock/v1``) and resolve to ``/run/user//``. Their sessions/clients live in ``state.app_state`` so that startup can populate them once and routes can reuse them. """ import os import aiohttp import openai from state import app_state from backends.normalize import ep2base def _is_unix_socket_endpoint(endpoint: str) -> bool: """Return True if endpoint uses Unix socket (.sock hostname convention). Detects URLs like http://192.168.0.52.sock/v1 where the host ends with .sock, indicating the connection should use a Unix domain socket at /tmp/ instead of TCP. """ try: host = endpoint.split("//", 1)[1].split("/")[0].split(":")[0] return host.endswith(".sock") except IndexError: return False def _get_socket_path(endpoint: str) -> str: """Derive Unix socket file path from a .sock endpoint URL. http://192.168.0.52.sock/v1 -> /run/user//192.168.0.52.sock """ host = endpoint.split("//", 1)[1].split("/")[0].split(":")[0] return f"/run/user/{os.getuid()}/{host}" def get_session(endpoint: str) -> aiohttp.ClientSession: """Return the appropriate aiohttp session for the given endpoint. Unix socket endpoints (.sock) get their own UnixConnector session. All other endpoints share the main TCP session. """ if _is_unix_socket_endpoint(endpoint): sess = app_state["socket_sessions"].get(endpoint) if sess is not None: return sess return app_state["session"] def get_probe_session(endpoint: str) -> aiohttp.ClientSession: """Return the session used for lightweight health/introspection probes. Probes (available/loaded models, endpoint health) run on a connection pool kept separate from the proxy/streaming session, so a burst of long-lived completion requests cannot starve them — otherwise a probe would queue waiting for a connection, hit its deadline, and mark a perfectly healthy endpoint as unavailable under load. Unix socket endpoints keep their dedicated per-endpoint session. TCP endpoints use the shared probe session, falling back to the main session when the probe pool has not been initialised (e.g. in tests). """ if _is_unix_socket_endpoint(endpoint): sess = app_state["socket_sessions"].get(endpoint) if sess is not None: return sess return app_state.get("probe_session") or app_state["session"] def _make_openai_client( endpoint: str, default_headers: dict | None = None, api_key: str = "no-key", ) -> openai.AsyncOpenAI: """Return an AsyncOpenAI client configured for the given endpoint. For Unix socket endpoints, injects a pre-created httpx UDS transport so the OpenAI SDK connects via the socket instead of TCP. """ base_url = ep2base(endpoint) kwargs: dict = {"api_key": api_key} if default_headers is not None: kwargs["default_headers"] = default_headers if _is_unix_socket_endpoint(endpoint): http_client = app_state["httpx_clients"].get(endpoint) if http_client is not None: kwargs["http_client"] = http_client base_url = "http://localhost/v1" return openai.AsyncOpenAI(base_url=base_url, **kwargs)