mirror of
https://github.com/MODSetter/SurfSense.git
synced 2026-05-21 18:55:16 +02:00
chore: linting
This commit is contained in:
parent
219a5977b7
commit
c187b04e82
25 changed files with 102 additions and 108 deletions
|
|
@ -28,7 +28,9 @@ def mcp_signature(mcp_tools_by_agent: dict[str, list[BaseTool]]) -> str:
|
||||||
"""Hash the per-agent MCP tool surface so a change rotates the cache key."""
|
"""Hash the per-agent MCP tool surface so a change rotates the cache key."""
|
||||||
rows = []
|
rows = []
|
||||||
for agent_name in sorted(mcp_tools_by_agent.keys()):
|
for agent_name in sorted(mcp_tools_by_agent.keys()):
|
||||||
names = sorted(getattr(t, "name", "") or "" for t in mcp_tools_by_agent[agent_name])
|
names = sorted(
|
||||||
|
getattr(t, "name", "") or "" for t in mcp_tools_by_agent[agent_name]
|
||||||
|
)
|
||||||
rows.append((agent_name, names))
|
rows.append((agent_name, names))
|
||||||
return stable_hash(rows)
|
return stable_hash(rows)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,9 @@ RULESET = Ruleset(
|
||||||
Rule(permission="getVisibleJiraProjects", pattern="*", action="allow"),
|
Rule(permission="getVisibleJiraProjects", pattern="*", action="allow"),
|
||||||
Rule(permission="searchJiraIssuesUsingJql", pattern="*", action="allow"),
|
Rule(permission="searchJiraIssuesUsingJql", pattern="*", action="allow"),
|
||||||
Rule(permission="getJiraIssue", pattern="*", action="allow"),
|
Rule(permission="getJiraIssue", pattern="*", action="allow"),
|
||||||
Rule(permission="getJiraProjectIssueTypesMetadata", pattern="*", action="allow"),
|
Rule(
|
||||||
|
permission="getJiraProjectIssueTypesMetadata", pattern="*", action="allow"
|
||||||
|
),
|
||||||
Rule(permission="getJiraIssueTypeMetaWithFields", pattern="*", action="allow"),
|
Rule(permission="getJiraIssueTypeMetaWithFields", pattern="*", action="allow"),
|
||||||
Rule(permission="getTransitionsForJiraIssue", pattern="*", action="allow"),
|
Rule(permission="getTransitionsForJiraIssue", pattern="*", action="allow"),
|
||||||
Rule(permission="lookupJiraAccountId", pattern="*", action="allow"),
|
Rule(permission="lookupJiraAccountId", pattern="*", action="allow"),
|
||||||
|
|
|
||||||
|
|
@ -181,9 +181,7 @@ class EtlPipelineService:
|
||||||
# Common case: the configured ETL service can't OCR
|
# Common case: the configured ETL service can't OCR
|
||||||
# this image format (or no service is configured at
|
# this image format (or no service is configured at
|
||||||
# all). Don't spam warnings -- just no OCR for it.
|
# all). Don't spam warnings -- just no OCR for it.
|
||||||
logging.debug(
|
logging.debug("Skipping per-image OCR for %s: %s", image_name, exc)
|
||||||
"Skipping per-image OCR for %s: %s", image_name, exc
|
|
||||||
)
|
|
||||||
return ""
|
return ""
|
||||||
return ocr_result.markdown_content
|
return ocr_result.markdown_content
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -96,9 +96,7 @@ async def parse_with_vision_llm(file_path: str, filename: str, llm) -> str:
|
||||||
return await _invoke_vision(llm, _PROMPT, data_url, filename)
|
return await _invoke_vision(llm, _PROMPT, data_url, filename)
|
||||||
|
|
||||||
|
|
||||||
async def parse_image_for_description(
|
async def parse_image_for_description(file_path: str, filename: str, llm) -> str:
|
||||||
file_path: str, filename: str, llm
|
|
||||||
) -> str:
|
|
||||||
"""Visual-description-only call for per-image-in-PDF use.
|
"""Visual-description-only call for per-image-in-PDF use.
|
||||||
|
|
||||||
Used by ``picture_describer`` when an image is embedded inside a
|
Used by ``picture_describer`` when an image is embedded inside a
|
||||||
|
|
|
||||||
|
|
@ -583,9 +583,7 @@ def inject_descriptions_inline(
|
||||||
)
|
)
|
||||||
if desc_idx > before_idx:
|
if desc_idx > before_idx:
|
||||||
continue
|
continue
|
||||||
out, desc_idx = _replace_one_match(
|
out, desc_idx = _replace_one_match(out, _CAPTION_ONLY, descriptions, desc_idx)
|
||||||
out, _CAPTION_ONLY, descriptions, desc_idx
|
|
||||||
)
|
|
||||||
if desc_idx > before_idx:
|
if desc_idx > before_idx:
|
||||||
continue
|
continue
|
||||||
# No more positions to splice into.
|
# No more positions to splice into.
|
||||||
|
|
@ -612,9 +610,7 @@ def render_appended_section(
|
||||||
|
|
||||||
parts: list[str] = ["", heading, ""]
|
parts: list[str] = ["", heading, ""]
|
||||||
for desc in descriptions:
|
for desc in descriptions:
|
||||||
parts.append(
|
parts.append(_format_image_block(desc.name, desc.description, desc.ocr_text))
|
||||||
_format_image_block(desc.name, desc.description, desc.ocr_text)
|
|
||||||
)
|
|
||||||
parts.append("")
|
parts.append("")
|
||||||
|
|
||||||
if skip_notes is not None:
|
if skip_notes is not None:
|
||||||
|
|
|
||||||
|
|
@ -72,9 +72,7 @@ async def add_user_trust(
|
||||||
session, user_id=user_id, connector_id=connector_id
|
session, user_id=user_id, connector_id=connector_id
|
||||||
)
|
)
|
||||||
if connector is None:
|
if connector is None:
|
||||||
raise LookupError(
|
raise LookupError(f"connector {connector_id} not found for user {user_id}")
|
||||||
f"connector {connector_id} not found for user {user_id}"
|
|
||||||
)
|
|
||||||
|
|
||||||
trusted = _read_trusted(connector)
|
trusted = _read_trusted(connector)
|
||||||
if tool_name not in trusted:
|
if tool_name not in trusted:
|
||||||
|
|
@ -96,9 +94,7 @@ async def remove_user_trust(
|
||||||
session, user_id=user_id, connector_id=connector_id
|
session, user_id=user_id, connector_id=connector_id
|
||||||
)
|
)
|
||||||
if connector is None:
|
if connector is None:
|
||||||
raise LookupError(
|
raise LookupError(f"connector {connector_id} not found for user {user_id}")
|
||||||
f"connector {connector_id} not found for user {user_id}"
|
|
||||||
)
|
|
||||||
|
|
||||||
trusted = _read_trusted(connector)
|
trusted = _read_trusted(connector)
|
||||||
if tool_name in trusted:
|
if tool_name in trusted:
|
||||||
|
|
|
||||||
|
|
@ -362,9 +362,7 @@ async def test_full_resume_routing_glue_for_two_paused_subagents():
|
||||||
SimpleNamespace(
|
SimpleNamespace(
|
||||||
id="i-approver",
|
id="i-approver",
|
||||||
value={
|
value={
|
||||||
"action_requests": [
|
"action_requests": [{"name": "approve", "args": {}, "description": ""}],
|
||||||
{"name": "approve", "args": {}, "description": ""}
|
|
||||||
],
|
|
||||||
"review_configs": [{}],
|
"review_configs": [{}],
|
||||||
"tool_call_id": "tcid-approver",
|
"tool_call_id": "tcid-approver",
|
||||||
},
|
},
|
||||||
|
|
@ -403,8 +401,6 @@ async def test_full_resume_routing_glue_for_two_paused_subagents():
|
||||||
|
|
||||||
received_a = ast.literal_eval(result_a.update["decision_text"])
|
received_a = ast.literal_eval(result_a.update["decision_text"])
|
||||||
assert received_a == {"decisions": flat_decisions[0:3]}
|
assert received_a == {"decisions": flat_decisions[0:3]}
|
||||||
assert result_b.update["decision_text"] == repr(
|
assert result_b.update["decision_text"] == repr({"decisions": flat_decisions[3:4]})
|
||||||
{"decisions": flat_decisions[3:4]}
|
|
||||||
)
|
|
||||||
|
|
||||||
assert "surfsense_resume_value" not in parent_config["configurable"]
|
assert "surfsense_resume_value" not in parent_config["configurable"]
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@ This module pins:
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import contextlib
|
||||||
import json
|
import json
|
||||||
from typing import Annotated
|
from typing import Annotated
|
||||||
|
|
||||||
|
|
@ -227,10 +228,8 @@ async def test_heterogeneous_decisions_route_to_correct_subagents_with_metadata_
|
||||||
for msg in final_state.values.get("messages", []) or []:
|
for msg in final_state.values.get("messages", []) or []:
|
||||||
content = getattr(msg, "content", None)
|
content = getattr(msg, "content", None)
|
||||||
if isinstance(content, str):
|
if isinstance(content, str):
|
||||||
try:
|
with contextlib.suppress(json.JSONDecodeError):
|
||||||
payloads.append(json.loads(content))
|
payloads.append(json.loads(content))
|
||||||
except json.JSONDecodeError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
expected_a = {"decisions": [a_approve, a_reject]}
|
expected_a = {"decisions": [a_approve, a_reject]}
|
||||||
expected_b = {"decisions": [b_edit, b_approve, b_reject]}
|
expected_b = {"decisions": [b_edit, b_approve, b_reject]}
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,7 @@ exploratory probe before this test was authored.
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import contextlib
|
||||||
import json
|
import json
|
||||||
from typing import Annotated
|
from typing import Annotated
|
||||||
|
|
||||||
|
|
@ -243,10 +244,8 @@ async def test_partial_pause_routes_only_to_paused_branch_without_rerunning_comp
|
||||||
for msg in final.values.get("messages", []) or []:
|
for msg in final.values.get("messages", []) or []:
|
||||||
content = getattr(msg, "content", None)
|
content = getattr(msg, "content", None)
|
||||||
if isinstance(content, str):
|
if isinstance(content, str):
|
||||||
try:
|
with contextlib.suppress(json.JSONDecodeError):
|
||||||
payloads.append(json.loads(content))
|
payloads.append(json.loads(content))
|
||||||
except json.JSONDecodeError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
assert {"decisions": [{"type": "approve"}]} in payloads, (
|
assert {"decisions": [{"type": "approve"}]} in payloads, (
|
||||||
f"REGRESSION: sub-B did not receive its single approve on resume; "
|
f"REGRESSION: sub-B did not receive its single approve on resume; "
|
||||||
|
|
|
||||||
|
|
@ -23,6 +23,7 @@ Why a separate pin:
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import contextlib
|
||||||
import json
|
import json
|
||||||
from typing import Annotated
|
from typing import Annotated
|
||||||
|
|
||||||
|
|
@ -191,10 +192,8 @@ async def test_all_reject_decisions_route_to_each_subagent_with_messages_intact(
|
||||||
for msg in final_state.values.get("messages", []) or []:
|
for msg in final_state.values.get("messages", []) or []:
|
||||||
content = getattr(msg, "content", None)
|
content = getattr(msg, "content", None)
|
||||||
if isinstance(content, str):
|
if isinstance(content, str):
|
||||||
try:
|
with contextlib.suppress(json.JSONDecodeError):
|
||||||
payloads.append(json.loads(content))
|
payloads.append(json.loads(content))
|
||||||
except json.JSONDecodeError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
expected_a = {"decisions": [a_reject_0, a_reject_1]}
|
expected_a = {"decisions": [a_reject_0, a_reject_1]}
|
||||||
expected_b = {"decisions": [b_reject_0]}
|
expected_b = {"decisions": [b_reject_0]}
|
||||||
|
|
|
||||||
|
|
@ -23,6 +23,7 @@ silently pass such a bug because the slices would coincide.
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import contextlib
|
||||||
import json
|
import json
|
||||||
from typing import Annotated
|
from typing import Annotated
|
||||||
|
|
||||||
|
|
@ -81,9 +82,7 @@ def _build_pausing_subagent(checkpointer: InMemorySaver, *, action_count: int):
|
||||||
"review_configs": [{} for _ in range(action_count)],
|
"review_configs": [{} for _ in range(action_count)],
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
return {
|
return {"messages": [AIMessage(content=json.dumps(decision, sort_keys=True))]}
|
||||||
"messages": [AIMessage(content=json.dumps(decision, sort_keys=True))]
|
|
||||||
}
|
|
||||||
|
|
||||||
g = StateGraph(_SubState)
|
g = StateGraph(_SubState)
|
||||||
g.add_node("approve", approve_node)
|
g.add_node("approve", approve_node)
|
||||||
|
|
@ -202,7 +201,9 @@ async def test_parallel_resume_with_per_interrupt_id_keying_completes_both_subag
|
||||||
await parent.ainvoke({"messages": [HumanMessage(content="seed")]}, config)
|
await parent.ainvoke({"messages": [HumanMessage(content="seed")]}, config)
|
||||||
|
|
||||||
paused_state = await parent.aget_state(config)
|
paused_state = await parent.aget_state(config)
|
||||||
assert len(paused_state.interrupts) == 2, "fixture broken: expected 2 paused subagents"
|
assert len(paused_state.interrupts) == 2, (
|
||||||
|
"fixture broken: expected 2 paused subagents"
|
||||||
|
)
|
||||||
|
|
||||||
pending = collect_pending_tool_calls(paused_state)
|
pending = collect_pending_tool_calls(paused_state)
|
||||||
assert dict(pending) == {tcid_a: 2, tcid_b: 3}, (
|
assert dict(pending) == {tcid_a: 2, tcid_b: 3}, (
|
||||||
|
|
@ -243,10 +244,8 @@ async def test_parallel_resume_with_per_interrupt_id_keying_completes_both_subag
|
||||||
for msg in final_state.values.get("messages", []) or []:
|
for msg in final_state.values.get("messages", []) or []:
|
||||||
content = getattr(msg, "content", None)
|
content = getattr(msg, "content", None)
|
||||||
if isinstance(content, str):
|
if isinstance(content, str):
|
||||||
try:
|
with contextlib.suppress(json.JSONDecodeError):
|
||||||
payloads.append(json.loads(content))
|
payloads.append(json.loads(content))
|
||||||
except json.JSONDecodeError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
expected_a = {"decisions": [a_d0, a_d1]}
|
expected_a = {"decisions": [a_d0, a_d1]}
|
||||||
expected_b = {"decisions": [b_d0, b_d1, b_d2]}
|
expected_b = {"decisions": [b_d0, b_d1, b_d2]}
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,7 @@ their per-slice payload.
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import contextlib
|
||||||
import json
|
import json
|
||||||
from typing import Annotated
|
from typing import Annotated
|
||||||
|
|
||||||
|
|
@ -151,7 +152,11 @@ def _parent_dispatching_one_of_each(
|
||||||
return [
|
return [
|
||||||
Send(
|
Send(
|
||||||
"call_task",
|
"call_task",
|
||||||
{"tcid": tcid_self, "desc": "approve email", "subtype": "self-gated-agent"},
|
{
|
||||||
|
"tcid": tcid_self,
|
||||||
|
"desc": "approve email",
|
||||||
|
"subtype": "self-gated-agent",
|
||||||
|
},
|
||||||
),
|
),
|
||||||
Send(
|
Send(
|
||||||
"call_task",
|
"call_task",
|
||||||
|
|
@ -250,10 +255,8 @@ async def test_parallel_self_gated_and_middleware_gated_route_and_resume_cleanly
|
||||||
for msg in final.values.get("messages", []) or []:
|
for msg in final.values.get("messages", []) or []:
|
||||||
content = getattr(msg, "content", None)
|
content = getattr(msg, "content", None)
|
||||||
if isinstance(content, str):
|
if isinstance(content, str):
|
||||||
try:
|
with contextlib.suppress(json.JSONDecodeError):
|
||||||
payloads.append(json.loads(content))
|
payloads.append(json.loads(content))
|
||||||
except json.JSONDecodeError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
self_payloads = [p for p in payloads if p.get("kind") == "self_gated"]
|
self_payloads = [p for p in payloads if p.get("kind") == "self_gated"]
|
||||||
mw_payloads = [p for p in payloads if p.get("kind") == "middleware_gated"]
|
mw_payloads = [p for p in payloads if p.get("kind") == "middleware_gated"]
|
||||||
|
|
|
||||||
|
|
@ -121,7 +121,11 @@ async def test_two_parallel_atasks_same_subagent_type_different_tool_call_ids():
|
||||||
shared_subagent = _build_success_subagent("ok")
|
shared_subagent = _build_success_subagent("ok")
|
||||||
task_tool = build_task_tool_with_parent_config(
|
task_tool = build_task_tool_with_parent_config(
|
||||||
[
|
[
|
||||||
{"name": "approver", "description": "shared approver", "runnable": shared_subagent},
|
{
|
||||||
|
"name": "approver",
|
||||||
|
"description": "shared approver",
|
||||||
|
"runnable": shared_subagent,
|
||||||
|
},
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -163,7 +167,10 @@ async def test_two_parallel_atasks_same_subagent_type_different_tool_call_ids():
|
||||||
state_parent = await shared_subagent.aget_state(
|
state_parent = await shared_subagent.aget_state(
|
||||||
{"configurable": {"thread_id": "shared-subagent-thread"}}
|
{"configurable": {"thread_id": "shared-subagent-thread"}}
|
||||||
)
|
)
|
||||||
assert state_parent.values == {} or state_parent.values.get("messages") in (None, [])
|
assert state_parent.values == {} or state_parent.values.get("messages") in (
|
||||||
|
None,
|
||||||
|
[],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
|
|
@ -181,8 +188,16 @@ async def test_one_atask_failure_does_not_corrupt_sibling_atask():
|
||||||
surviving_subagent = _build_success_subagent("still here")
|
surviving_subagent = _build_success_subagent("still here")
|
||||||
task_tool = build_task_tool_with_parent_config(
|
task_tool = build_task_tool_with_parent_config(
|
||||||
[
|
[
|
||||||
{"name": "broken", "description": "always fails", "runnable": failing_subagent},
|
{
|
||||||
{"name": "healthy", "description": "always succeeds", "runnable": surviving_subagent},
|
"name": "broken",
|
||||||
|
"description": "always fails",
|
||||||
|
"runnable": failing_subagent,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "healthy",
|
||||||
|
"description": "always succeeds",
|
||||||
|
"runnable": surviving_subagent,
|
||||||
|
},
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -84,9 +84,7 @@ async def test_resume_with_approve_envelope_returns_once_decision():
|
||||||
config = {"configurable": {"thread_id": "perm-once"}}
|
config = {"configurable": {"thread_id": "perm-once"}}
|
||||||
await graph.ainvoke({"messages": [HumanMessage(content="seed")]}, config)
|
await graph.ainvoke({"messages": [HumanMessage(content="seed")]}, config)
|
||||||
|
|
||||||
await graph.ainvoke(
|
await graph.ainvoke(Command(resume={"decisions": [{"type": "approve"}]}), config)
|
||||||
Command(resume={"decisions": [{"type": "approve"}]}), config
|
|
||||||
)
|
|
||||||
final = await graph.aget_state(config)
|
final = await graph.aget_state(config)
|
||||||
assert final.values.get("final_decision") == {"decision_type": "once"}
|
assert final.values.get("final_decision") == {"decision_type": "once"}
|
||||||
|
|
||||||
|
|
@ -116,9 +114,7 @@ async def test_resume_with_reject_and_feedback_carries_feedback_through():
|
||||||
|
|
||||||
await graph.ainvoke(
|
await graph.ainvoke(
|
||||||
Command(
|
Command(
|
||||||
resume={
|
resume={"decisions": [{"type": "reject", "feedback": "use the trash bin"}]}
|
||||||
"decisions": [{"type": "reject", "feedback": "use the trash bin"}]
|
|
||||||
}
|
|
||||||
),
|
),
|
||||||
config,
|
config,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -141,9 +141,7 @@ async def test_kb_ruleset_resume_with_approve_lets_rm_through():
|
||||||
config = {"configurable": {"thread_id": "kb-cloud-rm-approve"}}
|
config = {"configurable": {"thread_id": "kb-cloud-rm-approve"}}
|
||||||
await graph.ainvoke({"messages": [HumanMessage(content="seed")]}, config)
|
await graph.ainvoke({"messages": [HumanMessage(content="seed")]}, config)
|
||||||
|
|
||||||
await graph.ainvoke(
|
await graph.ainvoke(Command(resume={"decisions": [{"type": "approve"}]}), config)
|
||||||
Command(resume={"decisions": [{"type": "approve"}]}), config
|
|
||||||
)
|
|
||||||
final = await graph.aget_state(config)
|
final = await graph.aget_state(config)
|
||||||
assert final.next == (), "graph must complete after approve"
|
assert final.next == (), "graph must complete after approve"
|
||||||
last_ai = next(
|
last_ai = next(
|
||||||
|
|
|
||||||
|
|
@ -100,7 +100,9 @@ class TestBuildLcHitlPayload:
|
||||||
interrupt_type="x",
|
interrupt_type="x",
|
||||||
)
|
)
|
||||||
decisions.append(LC_DECISION_REJECT)
|
decisions.append(LC_DECISION_REJECT)
|
||||||
assert payload["review_configs"][0]["allowed_decisions"] == [LC_DECISION_APPROVE]
|
assert payload["review_configs"][0]["allowed_decisions"] == [
|
||||||
|
LC_DECISION_APPROVE
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
class TestParseLcEnvelope:
|
class TestParseLcEnvelope:
|
||||||
|
|
|
||||||
|
|
@ -903,9 +903,7 @@ async def test_extract_pdf_without_vision_llm_skips_picture_descriptions(
|
||||||
describe_mock.assert_not_called()
|
describe_mock.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
async def test_extract_pdf_with_vision_llm_swallows_describe_failure(
|
async def test_extract_pdf_with_vision_llm_swallows_describe_failure(tmp_path, mocker):
|
||||||
tmp_path, mocker
|
|
||||||
):
|
|
||||||
"""A pypdf or vision LLM blow-up never fails the document upload."""
|
"""A pypdf or vision LLM blow-up never fails the document upload."""
|
||||||
pdf_file = tmp_path / "report.pdf"
|
pdf_file = tmp_path / "report.pdf"
|
||||||
pdf_file.write_bytes(b"%PDF-1.4 fake content")
|
pdf_file.write_bytes(b"%PDF-1.4 fake content")
|
||||||
|
|
@ -976,9 +974,7 @@ async def test_extract_pdf_with_vision_llm_no_images_returns_parser_text(
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
async def test_extract_pdf_passes_ocr_runner_to_describe_pictures(
|
async def test_extract_pdf_passes_ocr_runner_to_describe_pictures(tmp_path, mocker):
|
||||||
tmp_path, mocker
|
|
||||||
):
|
|
||||||
"""The ETL service must wire an ocr_runner kwarg to describe_pictures."""
|
"""The ETL service must wire an ocr_runner kwarg to describe_pictures."""
|
||||||
pdf_file = tmp_path / "report.pdf"
|
pdf_file = tmp_path / "report.pdf"
|
||||||
pdf_file.write_bytes(b"%PDF-1.4 fake content")
|
pdf_file.write_bytes(b"%PDF-1.4 fake content")
|
||||||
|
|
@ -1027,9 +1023,7 @@ async def test_extract_pdf_ocr_runner_invokes_document_parser_on_image(
|
||||||
mocker.patch("app.config.config.ETL_SERVICE", "DOCLING")
|
mocker.patch("app.config.config.ETL_SERVICE", "DOCLING")
|
||||||
|
|
||||||
fake_docling = mocker.AsyncMock()
|
fake_docling = mocker.AsyncMock()
|
||||||
fake_docling.process_document.return_value = {
|
fake_docling.process_document.return_value = {"content": "Slice 24 / 60 L R"}
|
||||||
"content": "Slice 24 / 60 L R"
|
|
||||||
}
|
|
||||||
mocker.patch(
|
mocker.patch(
|
||||||
"app.services.docling_service.create_docling_service",
|
"app.services.docling_service.create_docling_service",
|
||||||
return_value=fake_docling,
|
return_value=fake_docling,
|
||||||
|
|
@ -1074,7 +1068,7 @@ async def test_extract_pdf_ocr_runner_returns_empty_on_unsupported_image(
|
||||||
pdf_file = tmp_path / "report.pdf"
|
pdf_file = tmp_path / "report.pdf"
|
||||||
pdf_file.write_bytes(b"%PDF-1.4 fake content")
|
pdf_file.write_bytes(b"%PDF-1.4 fake content")
|
||||||
weird_image = tmp_path / "Im0.jp2" # JPEG2000, unlikely to be supported
|
weird_image = tmp_path / "Im0.jp2" # JPEG2000, unlikely to be supported
|
||||||
weird_image.write_bytes(b"\x00\x00\x00\x0CjP" + b"\x00" * 50)
|
weird_image.write_bytes(b"\x00\x00\x00\x0cjP" + b"\x00" * 50)
|
||||||
|
|
||||||
mocker.patch("app.config.config.ETL_SERVICE", "DOCLING")
|
mocker.patch("app.config.config.ETL_SERVICE", "DOCLING")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -330,11 +330,17 @@ def test_inject_handles_multiple_images_in_order():
|
||||||
result = PictureExtractionResult(
|
result = PictureExtractionResult(
|
||||||
descriptions=[
|
descriptions=[
|
||||||
PictureDescription(
|
PictureDescription(
|
||||||
page_number=1, ordinal_in_page=0, name="Im0", sha256="aa",
|
page_number=1,
|
||||||
|
ordinal_in_page=0,
|
||||||
|
name="Im0",
|
||||||
|
sha256="aa",
|
||||||
description="Desc A",
|
description="Desc A",
|
||||||
),
|
),
|
||||||
PictureDescription(
|
PictureDescription(
|
||||||
page_number=2, ordinal_in_page=0, name="Im1", sha256="bb",
|
page_number=2,
|
||||||
|
ordinal_in_page=0,
|
||||||
|
name="Im1",
|
||||||
|
sha256="bb",
|
||||||
description="Desc B",
|
description="Desc B",
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
@ -511,9 +517,7 @@ async def test_describe_pictures_calls_ocr_runner_per_image(tmp_path, mocker):
|
||||||
assert by_name == {"Im0.jpeg": "OCR text A", "Im1.png": "OCR text B"}
|
assert by_name == {"Im0.jpeg": "OCR text A", "Im1.png": "OCR text B"}
|
||||||
|
|
||||||
|
|
||||||
async def test_describe_pictures_runs_vision_and_ocr_in_parallel(
|
async def test_describe_pictures_runs_vision_and_ocr_in_parallel(tmp_path, mocker):
|
||||||
tmp_path, mocker
|
|
||||||
):
|
|
||||||
"""Vision LLM and OCR run concurrently per image, not sequentially.
|
"""Vision LLM and OCR run concurrently per image, not sequentially.
|
||||||
|
|
||||||
We verify this by recording call timestamps: if both finish within
|
We verify this by recording call timestamps: if both finish within
|
||||||
|
|
@ -656,9 +660,7 @@ async def test_describe_pictures_vision_failure_with_ocr_runner_skips_image(
|
||||||
assert result.failed == 1
|
assert result.failed == 1
|
||||||
|
|
||||||
|
|
||||||
async def test_describe_pictures_no_ocr_runner_keeps_ocr_text_none(
|
async def test_describe_pictures_no_ocr_runner_keeps_ocr_text_none(tmp_path, mocker):
|
||||||
tmp_path, mocker
|
|
||||||
):
|
|
||||||
"""Backward compat: omitting ocr_runner produces description-only blocks."""
|
"""Backward compat: omitting ocr_runner produces description-only blocks."""
|
||||||
pdf_file = tmp_path / "report.pdf"
|
pdf_file = tmp_path / "report.pdf"
|
||||||
pdf_file.write_bytes(b"%PDF-1.4 fake")
|
pdf_file.write_bytes(b"%PDF-1.4 fake")
|
||||||
|
|
@ -824,11 +826,17 @@ def test_inject_handles_multiple_figures_in_document_order():
|
||||||
result = PictureExtractionResult(
|
result = PictureExtractionResult(
|
||||||
descriptions=[
|
descriptions=[
|
||||||
PictureDescription(
|
PictureDescription(
|
||||||
page_number=1, ordinal_in_page=0, name="Im0", sha256="aa",
|
page_number=1,
|
||||||
|
ordinal_in_page=0,
|
||||||
|
name="Im0",
|
||||||
|
sha256="aa",
|
||||||
description="Description of chart A.",
|
description="Description of chart A.",
|
||||||
),
|
),
|
||||||
PictureDescription(
|
PictureDescription(
|
||||||
page_number=2, ordinal_in_page=0, name="Im1", sha256="bb",
|
page_number=2,
|
||||||
|
ordinal_in_page=0,
|
||||||
|
name="Im1",
|
||||||
|
sha256="bb",
|
||||||
description="Description of chart B.",
|
description="Description of chart B.",
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
@ -842,9 +850,7 @@ def test_inject_handles_multiple_figures_in_document_order():
|
||||||
assert out.count("</figure>") == 2
|
assert out.count("</figure>") == 2
|
||||||
assert "Description of chart A." in out
|
assert "Description of chart A." in out
|
||||||
assert "Description of chart B." in out
|
assert "Description of chart B." in out
|
||||||
assert out.index("Description of chart A.") < out.index(
|
assert out.index("Description of chart A.") < out.index("Description of chart B.")
|
||||||
"Description of chart B."
|
|
||||||
)
|
|
||||||
# Each description appears AFTER its corresponding </figure>.
|
# Each description appears AFTER its corresponding </figure>.
|
||||||
first_close = out.index("</figure>")
|
first_close = out.index("</figure>")
|
||||||
assert first_close < out.index("Description of chart A.")
|
assert first_close < out.index("Description of chart A.")
|
||||||
|
|
@ -856,7 +862,7 @@ def test_inject_figures_with_attributes_and_nested_tags():
|
||||||
"""``<figure>`` with attributes and nested tags is matched and preserved."""
|
"""``<figure>`` with attributes and nested tags is matched and preserved."""
|
||||||
markdown = (
|
markdown = (
|
||||||
'<figure id="fig-3" class="chart">\n'
|
'<figure id="fig-3" class="chart">\n'
|
||||||
'<figcaption>Source: Pew Research</figcaption>\n'
|
"<figcaption>Source: Pew Research</figcaption>\n"
|
||||||
"<table><tr><td>Republican</td><td>57</td></tr></table>\n"
|
"<table><tr><td>Republican</td><td>57</td></tr></table>\n"
|
||||||
"</figure>\n"
|
"</figure>\n"
|
||||||
)
|
)
|
||||||
|
|
@ -899,8 +905,7 @@ def test_inject_figures_more_descriptions_than_figures_returns_remaining():
|
||||||
def test_inject_figures_more_figures_than_descriptions_leaves_extras_untouched():
|
def test_inject_figures_more_figures_than_descriptions_leaves_extras_untouched():
|
||||||
"""Two figures, one description -> first figure enriched, second left raw."""
|
"""Two figures, one description -> first figure enriched, second left raw."""
|
||||||
markdown = (
|
markdown = (
|
||||||
"<figure>\nfigure 1 content\n</figure>\n"
|
"<figure>\nfigure 1 content\n</figure>\n<figure>\nfigure 2 content\n</figure>\n"
|
||||||
"<figure>\nfigure 2 content\n</figure>\n"
|
|
||||||
)
|
)
|
||||||
result = PictureExtractionResult(
|
result = PictureExtractionResult(
|
||||||
descriptions=[_desc(name="Im0", description="Only description.")]
|
descriptions=[_desc(name="Im0", description="Only description.")]
|
||||||
|
|
|
||||||
|
|
@ -151,8 +151,7 @@ async def test_preserves_state_interrupts_traversal_order():
|
||||||
state_order = [
|
state_order = [
|
||||||
i.value["tool_call_id"]
|
i.value["tool_call_id"]
|
||||||
for i in state.interrupts
|
for i in state.interrupts
|
||||||
if isinstance(getattr(i, "value", None), dict)
|
if isinstance(getattr(i, "value", None), dict) and "tool_call_id" in i.value
|
||||||
and "tool_call_id" in i.value
|
|
||||||
]
|
]
|
||||||
|
|
||||||
assert inspector_order == state_order, (
|
assert inspector_order == state_order, (
|
||||||
|
|
|
||||||
|
|
@ -70,7 +70,9 @@ def _tool_start(*, name: str, run_id: str) -> dict[str, Any]:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
async def _drain_step_ids(events: list[dict[str, Any]], *, step_prefix: str) -> set[str]:
|
async def _drain_step_ids(
|
||||||
|
events: list[dict[str, Any]], *, step_prefix: str
|
||||||
|
) -> set[str]:
|
||||||
"""Run ``_stream_agent_events`` once and return every emitted thinking-step ID."""
|
"""Run ``_stream_agent_events`` once and return every emitted thinking-step ID."""
|
||||||
agent = _FakeAgent(events)
|
agent = _FakeAgent(events)
|
||||||
service = VercelStreamingService()
|
service = VercelStreamingService()
|
||||||
|
|
|
||||||
|
|
@ -68,9 +68,7 @@ export function BlogWithSearchMagazine({ blogs }: { blogs: BlogEntry[] }) {
|
||||||
|
|
||||||
<MagazineFeatured blog={coverStory} />
|
<MagazineFeatured blog={coverStory} />
|
||||||
|
|
||||||
{secondaryFeatured.length > 0 ? (
|
{secondaryFeatured.length > 0 ? <MoreFeatured blogs={secondaryFeatured} /> : null}
|
||||||
<MoreFeatured blogs={secondaryFeatured} />
|
|
||||||
) : null}
|
|
||||||
|
|
||||||
<MagazineSearchGrid blogs={blogs} excludedSlugs={heroSlugs} />
|
<MagazineSearchGrid blogs={blogs} excludedSlugs={heroSlugs} />
|
||||||
</Container>
|
</Container>
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
"use client";
|
"use client";
|
||||||
|
|
||||||
import { useQuery, useQueryClient } from "@tanstack/react-query";
|
import { useQuery, useQueryClient } from "@tanstack/react-query";
|
||||||
import { formatThreadTimestamp } from "@/lib/format-date";
|
|
||||||
import { useSetAtom } from "jotai";
|
import { useSetAtom } from "jotai";
|
||||||
import {
|
import {
|
||||||
ArchiveIcon,
|
ArchiveIcon,
|
||||||
|
|
@ -49,6 +48,7 @@ import {
|
||||||
searchThreads,
|
searchThreads,
|
||||||
updateThread,
|
updateThread,
|
||||||
} from "@/lib/chat/thread-persistence";
|
} from "@/lib/chat/thread-persistence";
|
||||||
|
import { formatThreadTimestamp } from "@/lib/format-date";
|
||||||
import { cn } from "@/lib/utils";
|
import { cn } from "@/lib/utils";
|
||||||
import { SidebarSlideOutPanel } from "./SidebarSlideOutPanel";
|
import { SidebarSlideOutPanel } from "./SidebarSlideOutPanel";
|
||||||
|
|
||||||
|
|
@ -389,8 +389,7 @@ export function AllPrivateChatsSidebarContent({
|
||||||
</TooltipTrigger>
|
</TooltipTrigger>
|
||||||
<TooltipContent side="bottom" align="start">
|
<TooltipContent side="bottom" align="start">
|
||||||
<p>
|
<p>
|
||||||
{t("updated") || "Updated"}:{" "}
|
{t("updated") || "Updated"}: {formatThreadTimestamp(thread.updatedAt)}
|
||||||
{formatThreadTimestamp(thread.updatedAt)}
|
|
||||||
</p>
|
</p>
|
||||||
</TooltipContent>
|
</TooltipContent>
|
||||||
</Tooltip>
|
</Tooltip>
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
"use client";
|
"use client";
|
||||||
|
|
||||||
import { useQuery, useQueryClient } from "@tanstack/react-query";
|
import { useQuery, useQueryClient } from "@tanstack/react-query";
|
||||||
import { formatThreadTimestamp } from "@/lib/format-date";
|
|
||||||
import { useSetAtom } from "jotai";
|
import { useSetAtom } from "jotai";
|
||||||
import {
|
import {
|
||||||
ArchiveIcon,
|
ArchiveIcon,
|
||||||
|
|
@ -49,6 +48,7 @@ import {
|
||||||
searchThreads,
|
searchThreads,
|
||||||
updateThread,
|
updateThread,
|
||||||
} from "@/lib/chat/thread-persistence";
|
} from "@/lib/chat/thread-persistence";
|
||||||
|
import { formatThreadTimestamp } from "@/lib/format-date";
|
||||||
import { cn } from "@/lib/utils";
|
import { cn } from "@/lib/utils";
|
||||||
import { SidebarSlideOutPanel } from "./SidebarSlideOutPanel";
|
import { SidebarSlideOutPanel } from "./SidebarSlideOutPanel";
|
||||||
|
|
||||||
|
|
@ -388,8 +388,7 @@ export function AllSharedChatsSidebarContent({
|
||||||
</TooltipTrigger>
|
</TooltipTrigger>
|
||||||
<TooltipContent side="bottom" align="start">
|
<TooltipContent side="bottom" align="start">
|
||||||
<p>
|
<p>
|
||||||
{t("updated") || "Updated"}:{" "}
|
{t("updated") || "Updated"}: {formatThreadTimestamp(thread.updatedAt)}
|
||||||
{formatThreadTimestamp(thread.updatedAt)}
|
|
||||||
</p>
|
</p>
|
||||||
</TooltipContent>
|
</TooltipContent>
|
||||||
</Tooltip>
|
</Tooltip>
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue