mirror of
https://github.com/trustgraph-ai/trustgraph.git
synced 2026-04-25 16:36:21 +02:00
190 lines
6.9 KiB
Python
190 lines
6.9 KiB
Python
|
|
"""
|
||
|
|
Regression tests for Subscriber.start() readiness barrier.
|
||
|
|
|
||
|
|
Background: prior to the eager-connect fix, Subscriber.start() created
|
||
|
|
the run() task and returned immediately. The underlying backend consumer
|
||
|
|
was lazily connected on its first receive() call, which left a setup
|
||
|
|
race for request/response clients using ephemeral per-subscriber response
|
||
|
|
queues (RabbitMQ auto-delete exclusive queues): the request would be
|
||
|
|
published before the response queue was bound, and the broker would
|
||
|
|
silently drop the reply. fetch_config(), document-embeddings, and
|
||
|
|
api-gateway all hit this with "Failed to fetch config on notify" /
|
||
|
|
"Request timeout exception" symptoms.
|
||
|
|
|
||
|
|
These tests pin the readiness contract:
|
||
|
|
|
||
|
|
await subscriber.start()
|
||
|
|
# at this point, consumer.ensure_connected() MUST have run
|
||
|
|
|
||
|
|
so that any future change which removes the eager bind, or moves it
|
||
|
|
back to lazy initialisation, fails CI loudly.
|
||
|
|
"""
|
||
|
|
|
||
|
|
import asyncio
|
||
|
|
|
||
|
|
import pytest
|
||
|
|
from unittest.mock import MagicMock
|
||
|
|
|
||
|
|
from trustgraph.base.subscriber import Subscriber
|
||
|
|
|
||
|
|
|
||
|
|
def _make_backend(ensure_connected_side_effect=None,
|
||
|
|
receive_side_effect=None):
|
||
|
|
"""Build a fake backend whose consumer records ensure_connected /
|
||
|
|
receive calls. ensure_connected_side_effect lets a test inject a
|
||
|
|
delay or exception."""
|
||
|
|
backend = MagicMock()
|
||
|
|
consumer = MagicMock()
|
||
|
|
|
||
|
|
consumer.ensure_connected = MagicMock(
|
||
|
|
side_effect=ensure_connected_side_effect,
|
||
|
|
)
|
||
|
|
|
||
|
|
# By default receive raises a timeout-style exception that the
|
||
|
|
# subscriber loop is supposed to swallow as a "no message yet" — this
|
||
|
|
# keeps the subscriber idling cleanly while the test inspects state.
|
||
|
|
if receive_side_effect is None:
|
||
|
|
receive_side_effect = TimeoutError("No message received within timeout")
|
||
|
|
consumer.receive = MagicMock(side_effect=receive_side_effect)
|
||
|
|
|
||
|
|
consumer.acknowledge = MagicMock()
|
||
|
|
consumer.negative_acknowledge = MagicMock()
|
||
|
|
consumer.pause_message_listener = MagicMock()
|
||
|
|
consumer.unsubscribe = MagicMock()
|
||
|
|
consumer.close = MagicMock()
|
||
|
|
|
||
|
|
backend.create_consumer.return_value = consumer
|
||
|
|
return backend, consumer
|
||
|
|
|
||
|
|
|
||
|
|
def _make_subscriber(backend):
|
||
|
|
return Subscriber(
|
||
|
|
backend=backend,
|
||
|
|
topic="response:tg:config",
|
||
|
|
subscription="test-sub",
|
||
|
|
consumer_name="test-consumer",
|
||
|
|
schema=dict,
|
||
|
|
max_size=10,
|
||
|
|
drain_timeout=1.0,
|
||
|
|
backpressure_strategy="block",
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
class TestSubscriberReadiness:
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_start_calls_ensure_connected_before_returning(self):
|
||
|
|
"""The barrier: ensure_connected must have been invoked at least
|
||
|
|
once by the time start() returns."""
|
||
|
|
backend, consumer = _make_backend()
|
||
|
|
subscriber = _make_subscriber(backend)
|
||
|
|
|
||
|
|
await subscriber.start()
|
||
|
|
|
||
|
|
try:
|
||
|
|
consumer.ensure_connected.assert_called_once()
|
||
|
|
finally:
|
||
|
|
await subscriber.stop()
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_start_blocks_until_ensure_connected_completes(self):
|
||
|
|
"""If ensure_connected is slow, start() must wait for it. This is
|
||
|
|
the actual race-condition guard — it would have failed against
|
||
|
|
the buggy version where start() returned before run() had even
|
||
|
|
scheduled the consumer creation."""
|
||
|
|
connect_started = asyncio.Event()
|
||
|
|
release_connect = asyncio.Event()
|
||
|
|
|
||
|
|
# ensure_connected runs in the executor thread, so we need a
|
||
|
|
# threading-safe gate. Use a simple busy-wait on a flag set by
|
||
|
|
# the asyncio side via call_soon_threadsafe — but the simpler
|
||
|
|
# path is to give it a sleep and observe ordering.
|
||
|
|
import threading
|
||
|
|
gate = threading.Event()
|
||
|
|
|
||
|
|
def slow_connect():
|
||
|
|
connect_started.set() # safe: only mutates the Event flag
|
||
|
|
gate.wait(timeout=2.0)
|
||
|
|
|
||
|
|
backend, consumer = _make_backend(
|
||
|
|
ensure_connected_side_effect=slow_connect,
|
||
|
|
)
|
||
|
|
subscriber = _make_subscriber(backend)
|
||
|
|
|
||
|
|
start_task = asyncio.create_task(subscriber.start())
|
||
|
|
|
||
|
|
# Wait until ensure_connected has begun executing.
|
||
|
|
await asyncio.wait_for(connect_started.wait(), timeout=2.0)
|
||
|
|
|
||
|
|
# ensure_connected is in flight — start() must NOT have returned.
|
||
|
|
assert not start_task.done(), (
|
||
|
|
"start() returned before ensure_connected() completed — "
|
||
|
|
"the readiness barrier is broken and the request/response "
|
||
|
|
"race condition is back."
|
||
|
|
)
|
||
|
|
|
||
|
|
# Release the gate; start() should now complete promptly.
|
||
|
|
gate.set()
|
||
|
|
await asyncio.wait_for(start_task, timeout=2.0)
|
||
|
|
|
||
|
|
consumer.ensure_connected.assert_called_once()
|
||
|
|
|
||
|
|
await subscriber.stop()
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_start_propagates_consumer_creation_failure(self):
|
||
|
|
"""If create_consumer() raises, start() must surface the error
|
||
|
|
rather than hang on the readiness future. The old code path
|
||
|
|
retried indefinitely inside run() and never let start() unblock."""
|
||
|
|
backend = MagicMock()
|
||
|
|
backend.create_consumer.side_effect = RuntimeError("broker down")
|
||
|
|
|
||
|
|
subscriber = _make_subscriber(backend)
|
||
|
|
|
||
|
|
with pytest.raises(RuntimeError, match="broker down"):
|
||
|
|
await asyncio.wait_for(subscriber.start(), timeout=2.0)
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_start_propagates_ensure_connected_failure(self):
|
||
|
|
"""Same contract for an ensure_connected() that raises (e.g. the
|
||
|
|
broker is up but the queue declare/bind fails)."""
|
||
|
|
backend, consumer = _make_backend(
|
||
|
|
ensure_connected_side_effect=RuntimeError("queue declare failed"),
|
||
|
|
)
|
||
|
|
subscriber = _make_subscriber(backend)
|
||
|
|
|
||
|
|
with pytest.raises(RuntimeError, match="queue declare failed"):
|
||
|
|
await asyncio.wait_for(subscriber.start(), timeout=2.0)
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_ensure_connected_runs_before_subscriber_running_log(self):
|
||
|
|
"""Subtle ordering: ensure_connected MUST happen before the
|
||
|
|
receive loop, so that any reply is captured. We assert this by
|
||
|
|
checking ensure_connected was called before any receive call."""
|
||
|
|
call_order = []
|
||
|
|
|
||
|
|
def record_ensure():
|
||
|
|
call_order.append("ensure_connected")
|
||
|
|
|
||
|
|
def record_receive(*args, **kwargs):
|
||
|
|
call_order.append("receive")
|
||
|
|
raise TimeoutError("No message received within timeout")
|
||
|
|
|
||
|
|
backend, consumer = _make_backend(
|
||
|
|
ensure_connected_side_effect=record_ensure,
|
||
|
|
receive_side_effect=record_receive,
|
||
|
|
)
|
||
|
|
subscriber = _make_subscriber(backend)
|
||
|
|
|
||
|
|
await subscriber.start()
|
||
|
|
|
||
|
|
# Give the receive loop a tick to run at least once.
|
||
|
|
await asyncio.sleep(0.05)
|
||
|
|
|
||
|
|
await subscriber.stop()
|
||
|
|
|
||
|
|
# ensure_connected must come first; receive may not have happened
|
||
|
|
# yet on a fast machine, but if it did, it must come after.
|
||
|
|
assert call_order, "neither ensure_connected nor receive was called"
|
||
|
|
assert call_order[0] == "ensure_connected"
|