From 4d9ed74b68449a3888a48d7b63d5bfc76e266514 Mon Sep 17 00:00:00 2001 From: Adil Hafeez Date: Mon, 9 Feb 2026 13:20:06 -0800 Subject: [PATCH] improve e2e tests (#731) * fix build break docs build was breaking because requirements file was getting ignored from .dockerignore * improve e2e tests time * fix: bump GH Actions to latest versions (checkout@v4, setup-python@v5, build-push-action@v6) * more improvements * fix perm * more improvements * parallel runs --- .github/workflows/e2e_tests.yml | 203 +++++++++++++++++++++---- tests/e2e/pyproject.toml | 1 + tests/e2e/run_model_alias_tests.sh | 49 ++++++ tests/e2e/run_prompt_gateway_tests.sh | 57 +++++++ tests/e2e/run_responses_state_tests.sh | 48 ++++++ 5 files changed, 327 insertions(+), 31 deletions(-) create mode 100755 tests/e2e/run_model_alias_tests.sh create mode 100755 tests/e2e/run_prompt_gateway_tests.sh create mode 100755 tests/e2e/run_responses_state_tests.sh diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml index 8696d59b..22dcc910 100644 --- a/.github/workflows/e2e_tests.yml +++ b/.github/workflows/e2e_tests.yml @@ -3,52 +3,94 @@ name: e2e tests on: push: branches: - - main # Run tests on pushes to the main branch + - main pull_request: -jobs: - e2e_tests: - runs-on: ubuntu-latest +permissions: + contents: read +# Shared env vars for all jobs that run tests +env: + PLANO_DOCKER_IMAGE: katanemo/plano:e2e + +jobs: + # ────────────────────────────────────────────── + # Job 1: Build the Docker image once, with cache + # ────────────────────────────────────────────── + build: + runs-on: ubuntu-latest steps: - name: Checkout code - uses: actions/checkout@v3 - - # --- Disk inspection & cleanup section (added to free space on GitHub runner) --- - - name: Check disk usage before cleanup - run: | - echo "=== Disk usage before cleanup ===" - df -h - echo "=== Repo size ===" - du -sh . + uses: actions/checkout@v4 - name: Free disk space on runner run: | - echo "=== Cleaning preinstalled SDKs and toolchains to free space ===" - sudo rm -rf /usr/share/dotnet - sudo rm -rf /usr/local/lib/android - sudo rm -rf /opt/ghc - # If you still hit disk issues, uncomment this to free more space. - # It just removes cached tool versions; setup-python will re-download what it needs. - # sudo rm -rf /opt/hostedtoolcache || true - - echo "=== Docker cleanup (before our builds/compose) ===" + sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc docker system prune -af || true docker volume prune -f || true - echo "=== Disk usage after cleanup ===" - df -h - # --- End disk cleanup section --- + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Build plano image (with GHA cache) + uses: docker/build-push-action@v6 + with: + context: . + file: Dockerfile + load: true + tags: ${{ env.PLANO_DOCKER_IMAGE }} + cache-from: type=gha + cache-to: type=gha,mode=max + + - name: Save image as artifact + run: docker save ${{ env.PLANO_DOCKER_IMAGE }} -o /tmp/plano-image.tar + + - name: Upload image artifact + uses: actions/upload-artifact@v4 + with: + name: plano-image + path: /tmp/plano-image.tar + retention-days: 1 + + # ────────────────────────────────────────────── + # Job 2a: prompt_gateway tests + # ────────────────────────────────────────────── + test-prompt-gateway: + needs: build + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Free disk space on runner + run: | + sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc + docker system prune -af || true + docker volume prune -f || true + + - name: Download plano image + uses: actions/download-artifact@v4 + with: + name: plano-image + path: /tmp + + - name: Load plano image + run: docker load -i /tmp/plano-image.tar - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: "3.12" - name: Install uv - run: curl -LsSf https://astral.sh/uv/install.sh | sh + uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + cache-dependency-glob: | + tests/e2e/uv.lock + cli/uv.lock - - name: Run e2e tests + - name: Run prompt_gateway tests env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }} @@ -56,7 +98,106 @@ jobs: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }} AWS_BEARER_TOKEN_BEDROCK: ${{ secrets.AWS_BEARER_TOKEN_BEDROCK }} - GROK_API_KEY : ${{ secrets.GROK_API_KEY }} + GROK_API_KEY: ${{ secrets.GROK_API_KEY }} run: | - python -mvenv venv - source venv/bin/activate && cd tests/e2e && bash run_e2e_tests.sh + cd tests/e2e && bash run_prompt_gateway_tests.sh + + # ────────────────────────────────────────────── + # Job 2b: model_alias_routing + responses API tests + # ────────────────────────────────────────────── + test-model-alias-routing: + needs: build + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Free disk space on runner + run: | + sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc + docker system prune -af || true + docker volume prune -f || true + + - name: Download plano image + uses: actions/download-artifact@v4 + with: + name: plano-image + path: /tmp + + - name: Load plano image + run: docker load -i /tmp/plano-image.tar + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install uv + uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + cache-dependency-glob: | + tests/e2e/uv.lock + cli/uv.lock + + - name: Run model alias routing tests + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }} + GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }} + AWS_BEARER_TOKEN_BEDROCK: ${{ secrets.AWS_BEARER_TOKEN_BEDROCK }} + GROK_API_KEY: ${{ secrets.GROK_API_KEY }} + run: | + cd tests/e2e && bash run_model_alias_tests.sh + + # ────────────────────────────────────────────── + # Job 2c: responses API with state storage tests + # ────────────────────────────────────────────── + test-responses-api-with-state: + needs: build + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Free disk space on runner + run: | + sudo rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc + docker system prune -af || true + docker volume prune -f || true + + - name: Download plano image + uses: actions/download-artifact@v4 + with: + name: plano-image + path: /tmp + + - name: Load plano image + run: docker load -i /tmp/plano-image.tar + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install uv + uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + cache-dependency-glob: | + tests/e2e/uv.lock + cli/uv.lock + + - name: Run responses API with state tests + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }} + GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }} + AWS_BEARER_TOKEN_BEDROCK: ${{ secrets.AWS_BEARER_TOKEN_BEDROCK }} + GROK_API_KEY: ${{ secrets.GROK_API_KEY }} + run: | + cd tests/e2e && bash run_responses_state_tests.sh diff --git a/tests/e2e/pyproject.toml b/tests/e2e/pyproject.toml index ca4f5f61..131e765e 100644 --- a/tests/e2e/pyproject.toml +++ b/tests/e2e/pyproject.toml @@ -13,6 +13,7 @@ dependencies = [ "pytest-sugar>=1.0.0", "deepdiff>=8.0.1", "pytest-retry>=1.6.3", + "pytest-xdist>=3.5.0", "anthropic>=0.66.0", "openai>=1.0.0", ] diff --git a/tests/e2e/run_model_alias_tests.sh b/tests/e2e/run_model_alias_tests.sh new file mode 100755 index 00000000..80ecf0c5 --- /dev/null +++ b/tests/e2e/run_model_alias_tests.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# Runs the model_alias_routing + openai responses API e2e test suites. +# These share the same gateway config so they run together. +# Requires the plano Docker image to already be built/loaded. +set -e + +. ./common_scripts.sh + +print_disk_usage + +mkdir -p ~/plano_logs +touch ~/plano_logs/modelserver.log + +print_debug() { + log "Received signal to stop" + log "Printing debug logs for docker" + log "====================================" + tail -n 100 ../build.log 2>/dev/null || true + planoai logs --debug 2>/dev/null | tail -n 100 || true +} + +trap 'print_debug' INT TERM ERR + +log starting > ../build.log + +# Install plano CLI +log "building and installing plano cli" +cd ../../cli +uv sync +uv tool install . +cd - + +# Re-sync e2e deps +uv sync + +# Start gateway with model alias routing config +log "startup arch gateway with model alias routing demo" +cd ../../ +planoai down || true +planoai up demos/use_cases/model_alias_routing/config_with_aliases.yaml +cd - + +# Run both test suites that share this config in a single pytest invocation +log "running e2e tests for model alias routing + openai responses api" +uv run pytest -n auto test_model_alias_routing.py test_openai_responses_api_client.py + +# Cleanup +log "shutting down" +planoai down || true diff --git a/tests/e2e/run_prompt_gateway_tests.sh b/tests/e2e/run_prompt_gateway_tests.sh new file mode 100755 index 00000000..08a499f3 --- /dev/null +++ b/tests/e2e/run_prompt_gateway_tests.sh @@ -0,0 +1,57 @@ +#!/bin/bash +# Runs the prompt_gateway e2e test suite. +# Requires the plano Docker image to already be built/loaded. +set -e + +. ./common_scripts.sh + +print_disk_usage + +mkdir -p ~/plano_logs +touch ~/plano_logs/modelserver.log + +print_debug() { + log "Received signal to stop" + log "Printing debug logs for docker" + log "====================================" + tail -n 100 ../build.log 2>/dev/null || true + planoai logs --debug 2>/dev/null | tail -n 100 || true +} + +trap 'print_debug' INT TERM ERR + +log starting > ../build.log + +# Install plano CLI +log "building and installing plano cli" +cd ../../cli +uv sync +uv tool install . +cd - + +# Re-sync e2e deps +uv sync + +# Start weather_forecast service (needed for prompt_gateway tests) +log "building and running weather_forecast service" +cd ../../demos/samples_python/weather_forecast/ +docker compose up weather_forecast_service --build -d +cd - + +# Start gateway with prompt_gateway config +log "startup arch gateway with function calling demo" +cd ../../ +planoai down || true +planoai up demos/samples_python/weather_forecast/config.yaml +cd - + +# Run tests +log "running e2e tests for prompt gateway" +uv run pytest test_prompt_gateway.py + +# Cleanup +log "shutting down" +planoai down || true +cd ../../demos/samples_python/weather_forecast +docker compose down +cd - diff --git a/tests/e2e/run_responses_state_tests.sh b/tests/e2e/run_responses_state_tests.sh new file mode 100755 index 00000000..ad33a058 --- /dev/null +++ b/tests/e2e/run_responses_state_tests.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# Runs the openai responses API with state storage e2e test suite. +# Requires the plano Docker image to already be built/loaded. +set -e + +. ./common_scripts.sh + +print_disk_usage + +mkdir -p ~/plano_logs +touch ~/plano_logs/modelserver.log + +print_debug() { + log "Received signal to stop" + log "Printing debug logs for docker" + log "====================================" + tail -n 100 ../build.log 2>/dev/null || true + planoai logs --debug 2>/dev/null | tail -n 100 || true +} + +trap 'print_debug' INT TERM ERR + +log starting > ../build.log + +# Install plano CLI +log "building and installing plano cli" +cd ../../cli +uv sync +uv tool install . +cd - + +# Re-sync e2e deps +uv sync + +# Start gateway with state storage config +log "startup arch gateway with state storage config" +cd ../../ +planoai down || true +planoai up tests/e2e/config_memory_state_v1_responses.yaml +cd - + +# Run tests +log "running e2e tests for openai responses api with state" +uv run pytest test_openai_responses_api_client_with_state.py + +# Cleanup +log "shutting down" +planoai down || true