Co-Authored-By: Claude <noreply@anthropic.com> Co-Authored-By: XNLLLLH <XNLLLLH@users.noreply.github.com>
233 lines
8.1 KiB
Python
233 lines
8.1 KiB
Python
"""Phase 10.1 -- typed schema + atomic load/save for lifecycle_state.json.
|
|
|
|
The 4-state lifecycle (WAKE / DROWSY / SLEEP / HIBERNATION) needs a single
|
|
source of truth on disk. Per LOCKED contract L2 (panel verdict R2), the
|
|
daemon is the ONLY writer of `~/.iai-mcp/lifecycle_state.json`; wrappers
|
|
signal events via Unix socket OR atomic-write `~/.iai-mcp/wake.signal`
|
|
filesystem marker.
|
|
|
|
Persistence pattern mirrors `daemon_state.py` (Phase 04-01) and
|
|
`maintenance.py` (Phase 07.11-03):
|
|
- Writes via `tempfile.mkstemp` + `os.replace` (POSIX atomic rename).
|
|
- Crash mid-write leaves the prior file intact; readers either see
|
|
the old complete blob or the new complete blob, never partial bytes.
|
|
- File mode 0o600 (user-only, matches T-04-07 mitigation).
|
|
|
|
Schema mirrors lifecycle_state.json spec.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
import tempfile
|
|
from datetime import datetime, timezone
|
|
from enum import Enum
|
|
from pathlib import Path
|
|
from typing import TypedDict
|
|
|
|
# Default location. Overridable for tests via the `path` arg of load/save.
|
|
LIFECYCLE_STATE_PATH: Path = Path.home() / ".iai-mcp" / "lifecycle_state.json"
|
|
|
|
|
|
class LifecycleState(str, Enum):
|
|
"""Four lifecycle states."""
|
|
|
|
WAKE = "WAKE"
|
|
DROWSY = "DROWSY"
|
|
SLEEP = "SLEEP"
|
|
HIBERNATION = "HIBERNATION"
|
|
|
|
|
|
class SleepCycleProgress(TypedDict, total=False):
|
|
"""Per-attempt progress of the multi-step sleep pipeline.
|
|
|
|
All fields optional so the dict can be partially populated mid-cycle;
|
|
`last_completed_step=0` and `attempt=1` represent a freshly-started cycle.
|
|
"""
|
|
|
|
last_completed_step: int
|
|
attempt: int
|
|
last_error: str | None
|
|
started_at: str # ISO-8601 UTC
|
|
|
|
|
|
class Quarantine(TypedDict):
|
|
"""A failing sleep step can quarantine the cycle until `until_ts`."""
|
|
|
|
until_ts: str # ISO-8601 UTC
|
|
reason: str
|
|
since_ts: str # ISO-8601 UTC
|
|
|
|
|
|
class LifecycleStateRecord(TypedDict):
|
|
"""On-disk schema for `lifecycle_state.json`.
|
|
|
|
`sleep_cycle_progress` and `quarantine` are nullable; the rest are
|
|
always present in a well-formed record. `shadow_run` toggles whether
|
|
the state machine actually executes process termination on
|
|
HIBERNATION (False post-Phase 10.6) or merely logs the would-action.
|
|
"""
|
|
|
|
current_state: str # one of LifecycleState values
|
|
since_ts: str # ISO-8601 UTC
|
|
last_activity_ts: str # ISO-8601 UTC
|
|
wrapper_event_seq: int
|
|
sleep_cycle_progress: SleepCycleProgress | None
|
|
quarantine: Quarantine | None
|
|
shadow_run: bool
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _utc_now_iso() -> str:
|
|
"""Return ISO-8601 UTC timestamp with explicit `+00:00` suffix.
|
|
|
|
`isoformat()` on a UTC-aware datetime emits `+00:00` rather than `Z`.
|
|
Both forms are valid ISO-8601; downstream readers (CLI status, event
|
|
log, Hypothesis tests) parse via `datetime.fromisoformat` which
|
|
accepts the offset form.
|
|
"""
|
|
return datetime.now(timezone.utc).isoformat()
|
|
|
|
|
|
def default_state() -> LifecycleStateRecord:
|
|
"""Return a fresh WAKE record with shadow_run=False (Phase 10.6 default).
|
|
|
|
Used by `load_state` when the file is absent or malformed (self-heal),
|
|
and by tests / callers that need a known starting point.
|
|
|
|
Plan 10.6-01 Task 1.6 flipped the default from True to False:
|
|
HIBERNATION transitions now actually exit the daemon process via the
|
|
global shutdown event in `daemon.main()`. The legacy RSS-watchdog has
|
|
been removed in Task 1.4; the lifecycle state machine owns shutdown
|
|
authority.
|
|
"""
|
|
now = _utc_now_iso()
|
|
return {
|
|
"current_state": LifecycleState.WAKE.value,
|
|
"since_ts": now,
|
|
"last_activity_ts": now,
|
|
"wrapper_event_seq": 0,
|
|
"sleep_cycle_progress": None,
|
|
"quarantine": None,
|
|
"shadow_run": False,
|
|
}
|
|
|
|
|
|
def _validate_record(raw: object) -> LifecycleStateRecord:
|
|
"""Reject malformed JSON; return a typed copy on success.
|
|
|
|
A minimal schema check — enough to catch hand-edited corruption and
|
|
out-of-band writes from a stale schema version, without pulling in
|
|
pydantic for runtime validation. Reads stay zero-allocation past the
|
|
JSON parse step.
|
|
"""
|
|
if not isinstance(raw, dict):
|
|
raise ValueError(
|
|
f"lifecycle_state record must be a JSON object, got {type(raw).__name__}"
|
|
)
|
|
|
|
required_str_keys = ("current_state", "since_ts", "last_activity_ts")
|
|
for k in required_str_keys:
|
|
v = raw.get(k)
|
|
if not isinstance(v, str) or not v:
|
|
raise ValueError(f"lifecycle_state.{k} must be a non-empty string, got {v!r}")
|
|
|
|
state_value = raw["current_state"]
|
|
if state_value not in {s.value for s in LifecycleState}:
|
|
raise ValueError(
|
|
f"lifecycle_state.current_state {state_value!r} is not a valid LifecycleState"
|
|
)
|
|
|
|
seq = raw.get("wrapper_event_seq")
|
|
if not isinstance(seq, int) or seq < 0:
|
|
raise ValueError(
|
|
f"lifecycle_state.wrapper_event_seq must be a non-negative int, got {seq!r}"
|
|
)
|
|
|
|
shadow = raw.get("shadow_run")
|
|
if not isinstance(shadow, bool):
|
|
raise ValueError(
|
|
f"lifecycle_state.shadow_run must be a bool, got {shadow!r}"
|
|
)
|
|
|
|
progress = raw.get("sleep_cycle_progress")
|
|
if progress is not None and not isinstance(progress, dict):
|
|
raise ValueError(
|
|
f"lifecycle_state.sleep_cycle_progress must be dict or null, got {progress!r}"
|
|
)
|
|
|
|
quarantine = raw.get("quarantine")
|
|
if quarantine is not None:
|
|
if not isinstance(quarantine, dict):
|
|
raise ValueError(
|
|
f"lifecycle_state.quarantine must be dict or null, got {quarantine!r}"
|
|
)
|
|
for k in ("until_ts", "reason", "since_ts"):
|
|
if not isinstance(quarantine.get(k), str):
|
|
raise ValueError(
|
|
f"lifecycle_state.quarantine.{k} must be string"
|
|
)
|
|
|
|
# Cast is safe after the checks above; mypy/pylance accept the dict.
|
|
return raw # type: ignore[return-value]
|
|
|
|
|
|
def load_state(path: Path | None = None) -> LifecycleStateRecord:
|
|
"""Read `lifecycle_state.json`; return `default_state()` if absent.
|
|
|
|
On JSON-decode error or schema-validation error: also returns a
|
|
fresh default state. The legacy file is left in place (no auto-delete)
|
|
so an operator can inspect it; `save_state` will overwrite it on the
|
|
next persist.
|
|
"""
|
|
target = path if path is not None else LIFECYCLE_STATE_PATH
|
|
if not target.exists():
|
|
return default_state()
|
|
try:
|
|
raw = json.loads(target.read_text())
|
|
except (OSError, json.JSONDecodeError):
|
|
return default_state()
|
|
try:
|
|
return _validate_record(raw)
|
|
except ValueError:
|
|
return default_state()
|
|
|
|
|
|
def save_state(record: LifecycleStateRecord, path: Path | None = None) -> None:
|
|
"""Atomically persist `record` via tempfile + os.replace.
|
|
|
|
Mirrors `daemon_state.save_state` (Phase 04-01) bullet-for-bullet:
|
|
creates parent dir if missing; writes to a sibling temp file in the
|
|
same directory (required so os.replace is an atomic same-filesystem
|
|
rename); fsyncs the file contents before rename so the data is on
|
|
disk; chmods 0o600 before the swap so the visible file is never
|
|
world-readable; on exception unlinks the temp file so /tmp does not
|
|
accumulate.
|
|
"""
|
|
target = path if path is not None else LIFECYCLE_STATE_PATH
|
|
# Validate before writing so callers get an early ValueError on
|
|
# malformed records rather than persisting garbage to disk.
|
|
_validate_record(record)
|
|
|
|
target.parent.mkdir(parents=True, exist_ok=True)
|
|
fd, tmp = tempfile.mkstemp(
|
|
prefix=".lifecycle_state.",
|
|
suffix=".tmp",
|
|
dir=str(target.parent),
|
|
)
|
|
try:
|
|
with os.fdopen(fd, "w") as f:
|
|
json.dump(record, f, indent=2)
|
|
f.flush()
|
|
os.fsync(f.fileno())
|
|
os.chmod(tmp, 0o600)
|
|
os.replace(tmp, target)
|
|
except Exception:
|
|
try:
|
|
os.unlink(tmp)
|
|
except OSError:
|
|
pass
|
|
raise
|