diff --git a/tests/e2e/configs/retry_it10_timeout_triggers_retry.yaml b/tests/e2e/configs/retry_it10_timeout_triggers_retry.yaml new file mode 100644 index 00000000..22a340d1 --- /dev/null +++ b/tests/e2e/configs/retry_it10_timeout_triggers_retry.yaml @@ -0,0 +1,27 @@ +version: v0.3.0 + +listeners: + - type: model + name: model_listener + port: 12000 + +model_providers: + - model: openai/gpt-4o + base_url: http://host.docker.internal:${MOCK_PRIMARY_PORT} + access_key: test-key-primary + default: true + retry_policy: + fallback_models: [anthropic/claude-3-5-sonnet] + default_strategy: "different_provider" + default_max_attempts: 2 + on_status_codes: + - codes: [429] + strategy: "different_provider" + max_attempts: 2 + on_timeout: + strategy: "different_provider" + max_attempts: 2 + + - model: anthropic/claude-3-5-sonnet + base_url: http://host.docker.internal:${MOCK_SECONDARY_PORT} + access_key: test-key-secondary diff --git a/tests/e2e/configs/retry_it11_high_latency_failover.yaml b/tests/e2e/configs/retry_it11_high_latency_failover.yaml new file mode 100644 index 00000000..1dc8a7e2 --- /dev/null +++ b/tests/e2e/configs/retry_it11_high_latency_failover.yaml @@ -0,0 +1,33 @@ +version: v0.3.0 + +listeners: + - type: model + name: model_listener + port: 12000 + +model_providers: + - model: openai/gpt-4o + base_url: http://host.docker.internal:${MOCK_PRIMARY_PORT} + access_key: test-key-primary + default: true + retry_policy: + fallback_models: [anthropic/claude-3-5-sonnet] + default_strategy: "different_provider" + default_max_attempts: 2 + on_status_codes: + - codes: [429] + strategy: "different_provider" + max_attempts: 2 + on_high_latency: + threshold_ms: 1000 + measure: "total" + min_triggers: 1 + strategy: "different_provider" + max_attempts: 2 + block_duration_seconds: 60 + scope: "model" + apply_to: "global" + + - model: anthropic/claude-3-5-sonnet + base_url: http://host.docker.internal:${MOCK_SECONDARY_PORT} + access_key: test-key-secondary diff --git a/tests/e2e/configs/retry_it12_streaming.yaml b/tests/e2e/configs/retry_it12_streaming.yaml new file mode 100644 index 00000000..f1933fa0 --- /dev/null +++ b/tests/e2e/configs/retry_it12_streaming.yaml @@ -0,0 +1,23 @@ +version: v0.3.0 + +listeners: + - type: model + name: model_listener + port: 12000 + +model_providers: + - model: openai/gpt-4o + base_url: http://host.docker.internal:${MOCK_PRIMARY_PORT} + access_key: test-key-primary + default: true + retry_policy: + default_strategy: "different_provider" + default_max_attempts: 2 + on_status_codes: + - codes: [429] + strategy: "different_provider" + max_attempts: 2 + + - model: anthropic/claude-3-5-sonnet + base_url: http://host.docker.internal:${MOCK_SECONDARY_PORT} + access_key: test-key-secondary diff --git a/tests/e2e/configs/retry_it13_body_preserved.yaml b/tests/e2e/configs/retry_it13_body_preserved.yaml new file mode 100644 index 00000000..f1933fa0 --- /dev/null +++ b/tests/e2e/configs/retry_it13_body_preserved.yaml @@ -0,0 +1,23 @@ +version: v0.3.0 + +listeners: + - type: model + name: model_listener + port: 12000 + +model_providers: + - model: openai/gpt-4o + base_url: http://host.docker.internal:${MOCK_PRIMARY_PORT} + access_key: test-key-primary + default: true + retry_policy: + default_strategy: "different_provider" + default_max_attempts: 2 + on_status_codes: + - codes: [429] + strategy: "different_provider" + max_attempts: 2 + + - model: anthropic/claude-3-5-sonnet + base_url: http://host.docker.internal:${MOCK_SECONDARY_PORT} + access_key: test-key-secondary diff --git a/tests/e2e/configs/retry_it1_basic_429.yaml b/tests/e2e/configs/retry_it1_basic_429.yaml new file mode 100644 index 00000000..f1933fa0 --- /dev/null +++ b/tests/e2e/configs/retry_it1_basic_429.yaml @@ -0,0 +1,23 @@ +version: v0.3.0 + +listeners: + - type: model + name: model_listener + port: 12000 + +model_providers: + - model: openai/gpt-4o + base_url: http://host.docker.internal:${MOCK_PRIMARY_PORT} + access_key: test-key-primary + default: true + retry_policy: + default_strategy: "different_provider" + default_max_attempts: 2 + on_status_codes: + - codes: [429] + strategy: "different_provider" + max_attempts: 2 + + - model: anthropic/claude-3-5-sonnet + base_url: http://host.docker.internal:${MOCK_SECONDARY_PORT} + access_key: test-key-secondary diff --git a/tests/e2e/configs/retry_it2_503_different_provider.yaml b/tests/e2e/configs/retry_it2_503_different_provider.yaml new file mode 100644 index 00000000..38fe2edb --- /dev/null +++ b/tests/e2e/configs/retry_it2_503_different_provider.yaml @@ -0,0 +1,23 @@ +version: v0.3.0 + +listeners: + - type: model + name: model_listener + port: 12000 + +model_providers: + - model: openai/gpt-4o + base_url: http://host.docker.internal:${MOCK_PRIMARY_PORT} + access_key: test-key-primary + default: true + retry_policy: + default_strategy: "different_provider" + default_max_attempts: 2 + on_status_codes: + - codes: [503] + strategy: "different_provider" + max_attempts: 2 + + - model: anthropic/claude-3-5-sonnet + base_url: http://host.docker.internal:${MOCK_SECONDARY_PORT} + access_key: test-key-secondary diff --git a/tests/e2e/configs/retry_it3_all_exhausted.yaml b/tests/e2e/configs/retry_it3_all_exhausted.yaml new file mode 100644 index 00000000..f1933fa0 --- /dev/null +++ b/tests/e2e/configs/retry_it3_all_exhausted.yaml @@ -0,0 +1,23 @@ +version: v0.3.0 + +listeners: + - type: model + name: model_listener + port: 12000 + +model_providers: + - model: openai/gpt-4o + base_url: http://host.docker.internal:${MOCK_PRIMARY_PORT} + access_key: test-key-primary + default: true + retry_policy: + default_strategy: "different_provider" + default_max_attempts: 2 + on_status_codes: + - codes: [429] + strategy: "different_provider" + max_attempts: 2 + + - model: anthropic/claude-3-5-sonnet + base_url: http://host.docker.internal:${MOCK_SECONDARY_PORT} + access_key: test-key-secondary diff --git a/tests/e2e/configs/retry_it4_no_retry_policy.yaml b/tests/e2e/configs/retry_it4_no_retry_policy.yaml new file mode 100644 index 00000000..26bf31a6 --- /dev/null +++ b/tests/e2e/configs/retry_it4_no_retry_policy.yaml @@ -0,0 +1,17 @@ +version: v0.3.0 + +listeners: + - type: model + name: model_listener + port: 12000 + +model_providers: + - model: openai/gpt-4o + base_url: http://host.docker.internal:${MOCK_PRIMARY_PORT} + access_key: test-key-primary + default: true + # No retry_policy — errors should be returned directly to client + + - model: anthropic/claude-3-5-sonnet + base_url: http://host.docker.internal:${MOCK_SECONDARY_PORT} + access_key: test-key-secondary diff --git a/tests/e2e/configs/retry_it5_max_attempts.yaml b/tests/e2e/configs/retry_it5_max_attempts.yaml new file mode 100644 index 00000000..f1cfa815 --- /dev/null +++ b/tests/e2e/configs/retry_it5_max_attempts.yaml @@ -0,0 +1,27 @@ +version: v0.3.0 + +listeners: + - type: model + name: model_listener + port: 12000 + +model_providers: + - model: openai/gpt-4o + base_url: http://host.docker.internal:${MOCK_PRIMARY_PORT} + access_key: test-key-primary + default: true + retry_policy: + default_strategy: "different_provider" + default_max_attempts: 1 + on_status_codes: + - codes: [429] + strategy: "different_provider" + max_attempts: 1 + + - model: anthropic/claude-3-5-sonnet + base_url: http://host.docker.internal:${MOCK_SECONDARY_PORT} + access_key: test-key-secondary + + - model: mistral/mistral-large + base_url: http://host.docker.internal:${MOCK_TERTIARY_PORT} + access_key: test-key-tertiary diff --git a/tests/e2e/configs/retry_it6_backoff_delay.yaml b/tests/e2e/configs/retry_it6_backoff_delay.yaml new file mode 100644 index 00000000..e7ec474c --- /dev/null +++ b/tests/e2e/configs/retry_it6_backoff_delay.yaml @@ -0,0 +1,24 @@ +version: v0.3.0 + +listeners: + - type: model + name: model_listener + port: 12000 + +model_providers: + - model: openai/gpt-4o + base_url: http://host.docker.internal:${MOCK_PRIMARY_PORT} + access_key: test-key-primary + default: true + retry_policy: + default_strategy: "same_model" + default_max_attempts: 3 + on_status_codes: + - codes: [429] + strategy: "same_model" + max_attempts: 3 + backoff: + apply_to: "same_model" + base_ms: 500 + max_ms: 5000 + jitter: false diff --git a/tests/e2e/configs/retry_it7_fallback_priority.yaml b/tests/e2e/configs/retry_it7_fallback_priority.yaml new file mode 100644 index 00000000..e5bee0c5 --- /dev/null +++ b/tests/e2e/configs/retry_it7_fallback_priority.yaml @@ -0,0 +1,28 @@ +version: v0.3.0 + +listeners: + - type: model + name: model_listener + port: 12000 + +model_providers: + - model: openai/gpt-4o + base_url: http://host.docker.internal:${MOCK_PRIMARY_PORT} + access_key: test-key-primary + default: true + retry_policy: + fallback_models: [anthropic/claude-3-5-sonnet, mistral/mistral-large] + default_strategy: "different_provider" + default_max_attempts: 3 + on_status_codes: + - codes: [429] + strategy: "different_provider" + max_attempts: 3 + + - model: anthropic/claude-3-5-sonnet + base_url: http://host.docker.internal:${MOCK_FALLBACK1_PORT} + access_key: test-key-fallback1 + + - model: mistral/mistral-large + base_url: http://host.docker.internal:${MOCK_FALLBACK2_PORT} + access_key: test-key-fallback2 diff --git a/tests/e2e/configs/retry_it8_retry_after_honored.yaml b/tests/e2e/configs/retry_it8_retry_after_honored.yaml new file mode 100644 index 00000000..3088759d --- /dev/null +++ b/tests/e2e/configs/retry_it8_retry_after_honored.yaml @@ -0,0 +1,23 @@ +version: v0.3.0 + +listeners: + - type: model + name: model_listener + port: 12000 + +model_providers: + - model: openai/gpt-4o + base_url: http://host.docker.internal:${MOCK_PRIMARY_PORT} + access_key: test-key-primary + default: true + retry_policy: + default_strategy: "same_model" + default_max_attempts: 2 + on_status_codes: + - codes: [429] + strategy: "same_model" + max_attempts: 2 + retry_after_handling: + scope: "model" + apply_to: "request" + max_retry_after_seconds: 300 diff --git a/tests/e2e/configs/retry_it9_retry_after_blocks_selection.yaml b/tests/e2e/configs/retry_it9_retry_after_blocks_selection.yaml new file mode 100644 index 00000000..ef3d7ad7 --- /dev/null +++ b/tests/e2e/configs/retry_it9_retry_after_blocks_selection.yaml @@ -0,0 +1,36 @@ +version: v0.3.0 + +listeners: + - type: model + name: model_listener + port: 12000 + +model_providers: + - model: openai/gpt-4o + base_url: http://host.docker.internal:${MOCK_PRIMARY_PORT} + access_key: test-key-primary + default: true + retry_policy: + fallback_models: [anthropic/claude-3-5-sonnet] + default_strategy: "different_provider" + default_max_attempts: 2 + on_status_codes: + - codes: [429] + strategy: "different_provider" + max_attempts: 2 + retry_after_handling: + scope: "model" + apply_to: "global" + max_retry_after_seconds: 300 + + - model: anthropic/claude-3-5-sonnet + base_url: http://host.docker.internal:${MOCK_SECONDARY_PORT} + access_key: test-key-secondary + default: false + retry_policy: + default_strategy: "different_provider" + default_max_attempts: 2 + on_status_codes: + - codes: [429] + strategy: "different_provider" + max_attempts: 2