500 lines
20 KiB
Python
500 lines
20 KiB
Python
|
|
"""Phase 4 daemon concurrency primitives (DAEMON-04, DAEMON-05).
|
||
|
|
|
||
|
|
Persistent-fd flock wrapper. Hold one instance for process lifetime.
|
||
|
|
fcntl.flock (NOT lockf) -- fd-close does not release (see apenwarr 2010, Pitfall 2).
|
||
|
|
|
||
|
|
Constitutional guard:
|
||
|
|
- C1 HUMAN-FIRST: ProcessLock.try_acquire_exclusive is non-blocking; daemon
|
||
|
|
yields immediately when any shared lockholder exists.
|
||
|
|
- C-USER-CONSENT (formerly C2 per D7-16): the user_initiated_sleep
|
||
|
|
branch of _dispatch_socket_request only sets pending flags after receiving
|
||
|
|
an explicit consent payload from the wrapper; the FSM transition itself is
|
||
|
|
performed by _tick_body, never by the dispatcher (C-DISPATCHER-FSM-ISOLATION).
|
||
|
|
- C-DISPATCHER-FSM-ISOLATION (Phase 7 structural; supersedes the bare `C2`
|
||
|
|
inline-comment shorthand previously used at the FSM-yield call sites): the
|
||
|
|
socket dispatcher MUST NOT transition the FSM directly; it only sets pending
|
||
|
|
flags consumed by _tick_body under the FSM lock. New socket_server
|
||
|
|
inherits this invariant.
|
||
|
|
- T-04-06 mitigation: flock is bound to process + open-file-description,
|
||
|
|
so closing an unrelated fd (e.g. /etc/passwd) does NOT release our lock.
|
||
|
|
- T-04-02 mitigation: cleanup_stale_socket + asyncio cleanup_socket kwarg
|
||
|
|
survive SIGKILL-orphaned sockets.
|
||
|
|
- T-04-07 mitigation: lock + socket created with mode 0o600 so cross-user
|
||
|
|
access requires OS privilege escalation (out of scope).
|
||
|
|
|
||
|
|
This module has NO LLM code and NO paid-API env var references.
|
||
|
|
"""
|
||
|
|
from __future__ import annotations
|
||
|
|
|
||
|
|
import asyncio
|
||
|
|
import errno
|
||
|
|
import fcntl
|
||
|
|
import json
|
||
|
|
import os
|
||
|
|
from datetime import datetime, timezone
|
||
|
|
from pathlib import Path
|
||
|
|
from typing import Any, Awaitable, Callable
|
||
|
|
|
||
|
|
LOCK_PATH: Path = Path.home() / ".iai-mcp" / ".lock"
|
||
|
|
SOCKET_PATH: Path = Path.home() / ".iai-mcp" / ".daemon.sock"
|
||
|
|
|
||
|
|
|
||
|
|
class ProcessLock:
|
||
|
|
"""Persistent-fd flock wrapper.
|
||
|
|
|
||
|
|
Hold one instance per process for the entire process lifetime.
|
||
|
|
fcntl.flock (BSD) NOT lockf (POSIX) -- closing an unrelated fd does NOT
|
||
|
|
release our lock (see apenwarr 2010, Pitfall 2).
|
||
|
|
|
||
|
|
Semantics:
|
||
|
|
- acquire_shared(): blocking LOCK_SH (MCP pattern)
|
||
|
|
- try_acquire_exclusive(): LOCK_EX | LOCK_NB (daemon heavy-op pattern)
|
||
|
|
- holds_exclusive_nb(): cooperative-yield probe
|
||
|
|
- release(): LOCK_UN (release without closing fd)
|
||
|
|
- close(): os.close() the fd (shutdown only)
|
||
|
|
"""
|
||
|
|
|
||
|
|
def __init__(self, path: Path = LOCK_PATH) -> None:
|
||
|
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||
|
|
# O_CREAT so lock file is created if missing; mode 0o600 keeps it user-only.
|
||
|
|
self._fd: int | None = os.open(path, os.O_RDWR | os.O_CREAT, 0o600)
|
||
|
|
# Ensure mode is actually 0o600 even if umask altered it on create.
|
||
|
|
try:
|
||
|
|
os.chmod(path, 0o600)
|
||
|
|
except OSError:
|
||
|
|
pass
|
||
|
|
self._path = path
|
||
|
|
|
||
|
|
def acquire_shared(self) -> None:
|
||
|
|
"""Blocking LOCK_SH. MCP sessions call this at session start."""
|
||
|
|
if self._fd is None:
|
||
|
|
raise RuntimeError("ProcessLock closed; cannot acquire")
|
||
|
|
fcntl.flock(self._fd, fcntl.LOCK_SH)
|
||
|
|
|
||
|
|
def try_acquire_exclusive(self) -> bool:
|
||
|
|
"""Non-blocking LOCK_EX | LOCK_NB.
|
||
|
|
|
||
|
|
Returns True if acquired, False if any shared holder blocks us.
|
||
|
|
Daemon calls this before heavy ops; False -> yield to MCP.
|
||
|
|
"""
|
||
|
|
if self._fd is None:
|
||
|
|
return False
|
||
|
|
try:
|
||
|
|
fcntl.flock(self._fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||
|
|
return True
|
||
|
|
except OSError as exc:
|
||
|
|
if exc.errno in (errno.EAGAIN, errno.EWOULDBLOCK):
|
||
|
|
return False
|
||
|
|
raise
|
||
|
|
|
||
|
|
def holds_exclusive_nb(self) -> bool:
|
||
|
|
"""D-06 cooperative-yield probe.
|
||
|
|
|
||
|
|
Non-blocking check: do we still hold the exclusive lock?
|
||
|
|
|
||
|
|
Returns True if our fd has the exclusive lock. Returns False if
|
||
|
|
another process (e.g., MCP) acquired a shared lock while we were
|
||
|
|
working between REM cycles.
|
||
|
|
|
||
|
|
Implementation: fcntl.flock with LOCK_EX | LOCK_NB on our existing fd.
|
||
|
|
On Linux/macOS, re-acquiring an already-held lock is a no-op success.
|
||
|
|
On contention (shared lock held by another process), raises BlockingIOError
|
||
|
|
which we catch and translate to False. EWOULDBLOCK/EAGAIN may surface as
|
||
|
|
OSError on some platforms -- caught the same way.
|
||
|
|
"""
|
||
|
|
if self._fd is None:
|
||
|
|
return False
|
||
|
|
try:
|
||
|
|
fcntl.flock(self._fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||
|
|
return True
|
||
|
|
except BlockingIOError:
|
||
|
|
return False
|
||
|
|
except OSError as exc:
|
||
|
|
if exc.errno in (errno.EAGAIN, errno.EWOULDBLOCK):
|
||
|
|
return False
|
||
|
|
raise
|
||
|
|
|
||
|
|
def release(self) -> None:
|
||
|
|
"""LOCK_UN: release lock but keep fd open for later reacquisition."""
|
||
|
|
if self._fd is None:
|
||
|
|
return
|
||
|
|
fcntl.flock(self._fd, fcntl.LOCK_UN)
|
||
|
|
|
||
|
|
def close(self) -> None:
|
||
|
|
"""Close fd. Only call at process shutdown -- closing releases the lock."""
|
||
|
|
if self._fd is not None:
|
||
|
|
try:
|
||
|
|
os.close(self._fd)
|
||
|
|
finally:
|
||
|
|
self._fd = None
|
||
|
|
|
||
|
|
|
||
|
|
def cleanup_stale_socket(path: Path = SOCKET_PATH) -> None:
|
||
|
|
"""Remove a stale socket file left over from SIGKILL-orphaned daemon.
|
||
|
|
|
||
|
|
Pitfall 10 mitigation: the in-process case is handled either by the
|
||
|
|
3.13+ kwarg (see serve_control_socket) or by the 3.12 finally-block
|
||
|
|
emulation, but a prior daemon killed with SIGKILL never got to run its
|
||
|
|
cleanup. Call this BEFORE the server binds.
|
||
|
|
"""
|
||
|
|
try:
|
||
|
|
path.unlink()
|
||
|
|
except FileNotFoundError:
|
||
|
|
pass
|
||
|
|
except OSError:
|
||
|
|
# Path may be a non-socket file -- still try to unlink. If even that
|
||
|
|
# fails (e.g. permission), let asyncio surface the EADDRINUSE.
|
||
|
|
try:
|
||
|
|
path.unlink()
|
||
|
|
except OSError:
|
||
|
|
pass
|
||
|
|
|
||
|
|
|
||
|
|
def _validate_socket_message(req: dict) -> tuple[bool, str | None]:
|
||
|
|
"""Per-type schema validation (ASVS V5).
|
||
|
|
|
||
|
|
Returns (ok, error_message). `req` must already be known to be a dict.
|
||
|
|
"""
|
||
|
|
req_type = req.get("type")
|
||
|
|
if not isinstance(req_type, str):
|
||
|
|
return False, "type must be a string"
|
||
|
|
|
||
|
|
if req_type == "status":
|
||
|
|
# No required fields.
|
||
|
|
return True, None
|
||
|
|
|
||
|
|
if req_type == "user_initiated_sleep":
|
||
|
|
reason = req.get("reason")
|
||
|
|
ts = req.get("ts")
|
||
|
|
if not isinstance(reason, str):
|
||
|
|
return False, "reason must be a string"
|
||
|
|
if not isinstance(ts, str):
|
||
|
|
return False, "ts must be a string"
|
||
|
|
return True, None
|
||
|
|
|
||
|
|
if req_type in ("force_wake", "force_rem"):
|
||
|
|
ts = req.get("ts")
|
||
|
|
if not isinstance(ts, str):
|
||
|
|
return False, "ts must be a string"
|
||
|
|
return True, None
|
||
|
|
|
||
|
|
if req_type in ("pause", "resume"):
|
||
|
|
# pause may optionally carry `seconds`; we don't persist it as a timer
|
||
|
|
# (the flag is binary) but we DO validate the type if supplied.
|
||
|
|
if "seconds" in req:
|
||
|
|
seconds = req.get("seconds")
|
||
|
|
if not isinstance(seconds, int) or isinstance(seconds, bool):
|
||
|
|
return False, "seconds must be an int"
|
||
|
|
return True, None
|
||
|
|
|
||
|
|
# TOK-14 / D5-05: 7th message type `session_open`.
|
||
|
|
# Both session_id and ts are OPTIONAL; when supplied, they must be strings.
|
||
|
|
# Absence is tolerated so the TS wrapper can emit a bare ping on MCP boot
|
||
|
|
# without stalling on id/ts bookkeeping.
|
||
|
|
if req_type == "session_open":
|
||
|
|
if "session_id" in req and not isinstance(req["session_id"], str):
|
||
|
|
return False, "session_id must be a string"
|
||
|
|
if "ts" in req and not isinstance(req["ts"], str):
|
||
|
|
return False, "ts must be a string"
|
||
|
|
return True, None
|
||
|
|
|
||
|
|
# Unknown types are not rejected at validation time; the dispatcher
|
||
|
|
# returns a structured unknown_message_type response so the caller sees
|
||
|
|
# a different reason code from "invalid_message".
|
||
|
|
return True, None
|
||
|
|
|
||
|
|
|
||
|
|
async def _dispatch_socket_request(
|
||
|
|
req: dict,
|
||
|
|
store: Any,
|
||
|
|
lock: ProcessLock,
|
||
|
|
state: dict,
|
||
|
|
) -> dict:
|
||
|
|
"""Default dispatcher for NDJSON socket requests.
|
||
|
|
|
||
|
|
Handles seven message types; mutates `state` in-place and persists via
|
||
|
|
`save_state` when the message changes scheduler control flags. The
|
||
|
|
dispatcher thread NEVER transitions the FSM directly
|
||
|
|
(C-DISPATCHER-FSM-ISOLATION; renamed from bare `C2` per D7-16) --
|
||
|
|
it only sets pending flags that `_tick_body` reads under the FSM lock.
|
||
|
|
|
||
|
|
Handled types:
|
||
|
|
- status -> state snapshot including version
|
||
|
|
- user_initiated_sleep -> set user_sleep_request pending flag
|
||
|
|
- force_wake -> set force_wake_request pending flag
|
||
|
|
- force_rem -> set force_rem_request pending flag
|
||
|
|
- pause -> scheduler_paused=True
|
||
|
|
- resume -> scheduler_paused=False
|
||
|
|
- session_open -> set first_turn_pending + hippea_cascade_request
|
||
|
|
(Plan 05-04 TOK-14 / D5-05)
|
||
|
|
- any other -> {"ok": False, "reason": "unknown_message_type"}
|
||
|
|
"""
|
||
|
|
# Reject non-dict requests (defence-in-depth; caller already json.loaded).
|
||
|
|
if not isinstance(req, dict):
|
||
|
|
return {
|
||
|
|
"ok": False,
|
||
|
|
"reason": "invalid_message",
|
||
|
|
"error": "request must be a JSON object",
|
||
|
|
}
|
||
|
|
|
||
|
|
# Per-type schema validation (ASVS V5).
|
||
|
|
ok, err = _validate_socket_message(req)
|
||
|
|
if not ok:
|
||
|
|
return {
|
||
|
|
"ok": False,
|
||
|
|
"reason": "invalid_message",
|
||
|
|
"error": err or "schema_validation_failed",
|
||
|
|
}
|
||
|
|
|
||
|
|
req_type = req.get("type")
|
||
|
|
|
||
|
|
# Lazy imports so test monkeypatches of STATE_PATH (via daemon_state) and
|
||
|
|
# __version__ (via iai_mcp) always resolve to the current module state.
|
||
|
|
from datetime import datetime, timezone
|
||
|
|
|
||
|
|
from iai_mcp import __version__ as pkg_version
|
||
|
|
from iai_mcp.daemon_state import save_state
|
||
|
|
|
||
|
|
# -------------------------------------------------------- status snapshot
|
||
|
|
if req_type == "status":
|
||
|
|
fsm_state = state.get("fsm_state", "WAKE")
|
||
|
|
started_at = state.get("daemon_started_at")
|
||
|
|
uptime_sec: float | None = None
|
||
|
|
if started_at:
|
||
|
|
try:
|
||
|
|
start_dt = datetime.fromisoformat(started_at)
|
||
|
|
uptime_sec = (datetime.now(timezone.utc) - start_dt).total_seconds()
|
||
|
|
except (TypeError, ValueError):
|
||
|
|
uptime_sec = None
|
||
|
|
|
||
|
|
# Truncate pending_digest to the top-level counters for socket
|
||
|
|
# transport; the full digest can be multi-KB once insights are baked.
|
||
|
|
pending_digest = state.get("pending_digest")
|
||
|
|
if isinstance(pending_digest, dict):
|
||
|
|
truncated_digest = {
|
||
|
|
"rem_cycles_completed": pending_digest.get("rem_cycles_completed", 0),
|
||
|
|
"episodes_processed": pending_digest.get("episodes_processed", 0),
|
||
|
|
"schemas_induced_tier0": pending_digest.get(
|
||
|
|
"schemas_induced_tier0", 0,
|
||
|
|
),
|
||
|
|
"claude_call_used": pending_digest.get("claude_call_used", False),
|
||
|
|
}
|
||
|
|
else:
|
||
|
|
truncated_digest = None
|
||
|
|
|
||
|
|
return {
|
||
|
|
"ok": True,
|
||
|
|
# Backwards-compat key used by tests/test_concurrency.py Test 6.
|
||
|
|
"state": fsm_state,
|
||
|
|
"uptime_sec": uptime_sec,
|
||
|
|
# Plan 04-gap-1 additions:
|
||
|
|
"version": pkg_version,
|
||
|
|
"fsm_state": fsm_state,
|
||
|
|
"last_tick_at": state.get("last_tick_at"),
|
||
|
|
"quiet_window": state.get("quiet_window"),
|
||
|
|
"pending_digest": truncated_digest,
|
||
|
|
"daemon_started_at": started_at,
|
||
|
|
"scheduler_paused": bool(state.get("scheduler_paused", False)),
|
||
|
|
}
|
||
|
|
|
||
|
|
# -------------------------------------------------- user_initiated_sleep
|
||
|
|
if req_type == "user_initiated_sleep":
|
||
|
|
current_fsm = state.get("fsm_state", "WAKE")
|
||
|
|
if current_fsm in ("SLEEP", "DREAMING", "TRANSITIONING"):
|
||
|
|
return {"ok": False, "reason": "already_sleeping"}
|
||
|
|
|
||
|
|
# Clip reason to 500 chars (ASVS V5 output hardening mirror).
|
||
|
|
reason = str(req.get("reason", ""))[:500]
|
||
|
|
ts = str(req.get("ts", ""))
|
||
|
|
state["user_sleep_request"] = {
|
||
|
|
"reason": reason,
|
||
|
|
"ts": ts,
|
||
|
|
"pending": True,
|
||
|
|
}
|
||
|
|
try:
|
||
|
|
save_state(state)
|
||
|
|
except Exception as exc: # noqa: BLE001 -- socket must never crash daemon
|
||
|
|
return {"ok": False, "reason": "state_write_failed", "error": str(exc)[:200]}
|
||
|
|
# Tell the caller we queued the transition; the scheduler owns the FSM
|
||
|
|
# and will move WAKE->TRANSITIONING->SLEEP on the next tick
|
||
|
|
# (C-DISPATCHER-FSM-ISOLATION; renamed from bare `C2` per D7-16).
|
||
|
|
return {"ok": True, "state": "TRANSITIONING"}
|
||
|
|
|
||
|
|
# ---------------------------------------------------------- force_wake
|
||
|
|
if req_type == "force_wake":
|
||
|
|
ts = str(req.get("ts", ""))
|
||
|
|
state["force_wake_request"] = {"ts": ts, "pending": True}
|
||
|
|
try:
|
||
|
|
save_state(state)
|
||
|
|
except Exception as exc: # noqa: BLE001
|
||
|
|
return {"ok": False, "reason": "state_write_failed", "error": str(exc)[:200]}
|
||
|
|
return {"ok": True, "reason": "wake_queued"}
|
||
|
|
|
||
|
|
# ----------------------------------------------------------- force_rem
|
||
|
|
if req_type == "force_rem":
|
||
|
|
ts = str(req.get("ts", ""))
|
||
|
|
state["force_rem_request"] = {"ts": ts, "pending": True}
|
||
|
|
try:
|
||
|
|
save_state(state)
|
||
|
|
except Exception as exc: # noqa: BLE001
|
||
|
|
return {"ok": False, "reason": "state_write_failed", "error": str(exc)[:200]}
|
||
|
|
return {"ok": True, "reason": "rem_queued"}
|
||
|
|
|
||
|
|
# --------------------------------------------------------- pause/resume
|
||
|
|
if req_type == "pause":
|
||
|
|
state["scheduler_paused"] = True
|
||
|
|
try:
|
||
|
|
save_state(state)
|
||
|
|
except Exception as exc: # noqa: BLE001
|
||
|
|
return {"ok": False, "reason": "state_write_failed", "error": str(exc)[:200]}
|
||
|
|
return {"ok": True, "paused": True}
|
||
|
|
|
||
|
|
if req_type == "resume":
|
||
|
|
state["scheduler_paused"] = False
|
||
|
|
try:
|
||
|
|
save_state(state)
|
||
|
|
except Exception as exc: # noqa: BLE001
|
||
|
|
return {"ok": False, "reason": "state_write_failed", "error": str(exc)[:200]}
|
||
|
|
return {"ok": True, "paused": False}
|
||
|
|
|
||
|
|
# ---------------------------------------------------------- session_open
|
||
|
|
# TOK-14 / D5-05: 7th message type. Sets two flags:
|
||
|
|
# - first_turn_pending[session_id] = True -> consumed by core's
|
||
|
|
# _first_turn_recall_hook exactly once per session.
|
||
|
|
# - hippea_cascade_request {pending=True, session_id, ts} -> polled by
|
||
|
|
# daemon._hippea_cascade_loop which pre-warms the LRU with records
|
||
|
|
# from the top-K salient communities (Van de Cruys HIPPEA operational
|
||
|
|
# form).
|
||
|
|
# Both flags are idempotent under a re-emit: set_overwrite is intentional
|
||
|
|
# so a client that retries session_open gets a fresh cascade.
|
||
|
|
if req_type == "session_open":
|
||
|
|
# Clip session_id to 128 chars (ASVS V5 output hardening — matches
|
||
|
|
# user_initiated_sleep.reason clip at 500).
|
||
|
|
session_id = str(req.get("session_id", ""))[:128]
|
||
|
|
ts = str(req.get("ts", ""))
|
||
|
|
state["last_session_open"] = {"session_id": session_id, "ts": ts}
|
||
|
|
# first-turn hook flag. Co-exists with existing dict form
|
||
|
|
# written by daemon_state.mark_session_opened.
|
||
|
|
first_turn = state.setdefault("first_turn_pending", {})
|
||
|
|
now_iso = datetime.now(timezone.utc).isoformat()
|
||
|
|
if isinstance(first_turn, dict):
|
||
|
|
first_turn[session_id] = now_iso
|
||
|
|
else:
|
||
|
|
# Legacy scalar-bool state -> upgrade in place to the dict form.
|
||
|
|
state["first_turn_pending"] = {session_id: now_iso}
|
||
|
|
# cascade flag.
|
||
|
|
state["hippea_cascade_request"] = {
|
||
|
|
"session_id": session_id,
|
||
|
|
"ts": ts,
|
||
|
|
"pending": True,
|
||
|
|
}
|
||
|
|
try:
|
||
|
|
save_state(state)
|
||
|
|
except Exception as exc: # noqa: BLE001
|
||
|
|
return {"ok": False, "reason": "state_write_failed", "error": str(exc)[:200]}
|
||
|
|
return {"ok": True, "reason": "session_open_queued"}
|
||
|
|
|
||
|
|
# ------------------------------------------------------------ unknown
|
||
|
|
return {
|
||
|
|
"ok": False,
|
||
|
|
"reason": "unknown_message_type",
|
||
|
|
"type": req_type,
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
async def serve_control_socket(
|
||
|
|
store: Any,
|
||
|
|
lock: ProcessLock,
|
||
|
|
state: dict,
|
||
|
|
shutdown: asyncio.Event,
|
||
|
|
*,
|
||
|
|
dispatcher: Callable[[dict], Awaitable[dict]] | None = None,
|
||
|
|
socket_path: Path = SOCKET_PATH,
|
||
|
|
) -> None:
|
||
|
|
"""Unix socket NDJSON server at ~/.iai-mcp/.daemon.sock.
|
||
|
|
|
||
|
|
Protocol: each line from client is a JSON request; each response is one
|
||
|
|
JSON line back. The cleanup_socket kwarg (Python 3.13+) auto-removes the
|
||
|
|
socket file on server shutdown; on 3.12 we emulate in the finally-block.
|
||
|
|
Stale-socket pre-cleanup protects against SIGKILL-orphaned files.
|
||
|
|
|
||
|
|
Permissions: chmod 0o600 immediately after bind so cross-user access
|
||
|
|
requires privilege escalation (T-04-04 accepted risk).
|
||
|
|
|
||
|
|
When dispatcher is provided it receives only the parsed request dict and
|
||
|
|
must return a dict. When None, the default _dispatch_socket_request is used.
|
||
|
|
"""
|
||
|
|
cleanup_stale_socket(socket_path)
|
||
|
|
# Ensure parent dir exists (Path.home() / .iai-mcp could be first-run).
|
||
|
|
socket_path.parent.mkdir(parents=True, exist_ok=True)
|
||
|
|
|
||
|
|
# Python 3.13 added a `cleanup_socket` kwarg to the event-loop unix server
|
||
|
|
# that auto-removes the socket file on shutdown. On 3.12 we emulate the
|
||
|
|
# same behaviour by unlinking in the finally-block below. See:
|
||
|
|
# https://docs.python.org/3.13/library/asyncio-stream.html
|
||
|
|
_supports_cleanup_socket = False
|
||
|
|
try:
|
||
|
|
import inspect as _inspect
|
||
|
|
import asyncio as _asyncio_mod
|
||
|
|
_loop_sig = _inspect.signature(
|
||
|
|
_asyncio_mod.get_event_loop_policy().new_event_loop().create_unix_server
|
||
|
|
)
|
||
|
|
_supports_cleanup_socket = "cleanup_socket" in _loop_sig.parameters
|
||
|
|
except Exception:
|
||
|
|
_supports_cleanup_socket = False
|
||
|
|
|
||
|
|
async def handle(reader: asyncio.StreamReader, writer: asyncio.StreamWriter) -> None:
|
||
|
|
try:
|
||
|
|
line = await reader.readline()
|
||
|
|
if not line:
|
||
|
|
return
|
||
|
|
try:
|
||
|
|
req = json.loads(line)
|
||
|
|
except (TypeError, ValueError) as exc:
|
||
|
|
writer.write((json.dumps({"error": f"invalid_json: {exc}"}) + "\n").encode("utf-8"))
|
||
|
|
await writer.drain()
|
||
|
|
return
|
||
|
|
try:
|
||
|
|
if dispatcher is not None:
|
||
|
|
resp = await dispatcher(req)
|
||
|
|
else:
|
||
|
|
resp = await _dispatch_socket_request(req, store, lock, state)
|
||
|
|
except Exception as exc: # noqa: BLE001 -- socket must never crash daemon
|
||
|
|
resp = {"error": str(exc)}
|
||
|
|
writer.write((json.dumps(resp) + "\n").encode("utf-8"))
|
||
|
|
await writer.drain()
|
||
|
|
finally:
|
||
|
|
try:
|
||
|
|
writer.close()
|
||
|
|
await writer.wait_closed()
|
||
|
|
except Exception:
|
||
|
|
pass
|
||
|
|
|
||
|
|
# Build server kwargs. The native 3.13+ behaviour is opted in via
|
||
|
|
# `cleanup_socket=True`; on 3.12 the finally-block emulates the same unlink
|
||
|
|
# so a subsequent daemon boot cannot hit EADDRINUSE.
|
||
|
|
_server_kwargs = {"cleanup_socket": True} if _supports_cleanup_socket else {}
|
||
|
|
server = await asyncio.start_unix_server(
|
||
|
|
handle, path=str(socket_path), **_server_kwargs,
|
||
|
|
)
|
||
|
|
# chmod 0o600 immediately after bind (T-04-07 mitigation).
|
||
|
|
try:
|
||
|
|
os.chmod(str(socket_path), 0o600)
|
||
|
|
except OSError:
|
||
|
|
pass
|
||
|
|
|
||
|
|
try:
|
||
|
|
async with server:
|
||
|
|
await shutdown.wait()
|
||
|
|
finally:
|
||
|
|
# Python 3.12 cleanup-socket emulation: remove the socket file on
|
||
|
|
# shutdown so the next daemon boot doesn't hit EADDRINUSE. 3.13+ does
|
||
|
|
# this natively inside the server.__aexit__.
|
||
|
|
if not _supports_cleanup_socket:
|
||
|
|
try:
|
||
|
|
socket_path.unlink()
|
||
|
|
except FileNotFoundError:
|
||
|
|
pass
|
||
|
|
except OSError:
|
||
|
|
pass
|