mirror of
https://github.com/katanemo/plano.git
synced 2026-05-21 13:55:15 +02:00
Fix request closures during long-running streaming (#899)
This commit is contained in:
parent
78d8c90184
commit
c3c213b2fd
1 changed files with 7 additions and 2 deletions
|
|
@ -91,7 +91,12 @@ def convert_legacy_listeners(
|
||||||
"type": "model",
|
"type": "model",
|
||||||
"port": 12000,
|
"port": 12000,
|
||||||
"address": "0.0.0.0",
|
"address": "0.0.0.0",
|
||||||
"timeout": "30s",
|
# LLM streaming responses routinely exceed 30s (extended thinking,
|
||||||
|
# long tool reasoning, large completions). Match the 300s ceiling
|
||||||
|
# used by the direct upstream-provider routes so Envoy doesn't
|
||||||
|
# abort streams with UT mid-response. Users can override via their
|
||||||
|
# plano_config.yaml `listeners.timeout` field.
|
||||||
|
"timeout": "300s",
|
||||||
"model_providers": model_providers or [],
|
"model_providers": model_providers or [],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -100,7 +105,7 @@ def convert_legacy_listeners(
|
||||||
"type": "prompt",
|
"type": "prompt",
|
||||||
"port": 10000,
|
"port": 10000,
|
||||||
"address": "0.0.0.0",
|
"address": "0.0.0.0",
|
||||||
"timeout": "30s",
|
"timeout": "300s",
|
||||||
}
|
}
|
||||||
|
|
||||||
# Handle None case
|
# Handle None case
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue