diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 3c5f9372..0882479d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -133,13 +133,13 @@ jobs:
load: true
tags: |
${{ env.PLANO_DOCKER_IMAGE }}
- ${{ env.DOCKER_IMAGE }}:0.4.20
+ ${{ env.DOCKER_IMAGE }}:0.4.21
${{ env.DOCKER_IMAGE }}:latest
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Save image as artifact
- run: docker save ${{ env.PLANO_DOCKER_IMAGE }} ${{ env.DOCKER_IMAGE }}:0.4.20 ${{ env.DOCKER_IMAGE }}:latest -o /tmp/plano-image.tar
+ run: docker save ${{ env.PLANO_DOCKER_IMAGE }} ${{ env.DOCKER_IMAGE }}:0.4.21 ${{ env.DOCKER_IMAGE }}:latest -o /tmp/plano-image.tar
- name: Upload image artifact
uses: actions/upload-artifact@v6
diff --git a/apps/www/src/components/Hero.tsx b/apps/www/src/components/Hero.tsx
index bf243ce9..aa9a2298 100644
--- a/apps/www/src/components/Hero.tsx
+++ b/apps/www/src/components/Hero.tsx
@@ -24,7 +24,7 @@ export function Hero() {
>
- v0.4.20
+ v0.4.21
—
diff --git a/build_filter_image.sh b/build_filter_image.sh
index 2fbee244..a0dd2498 100644
--- a/build_filter_image.sh
+++ b/build_filter_image.sh
@@ -1 +1 @@
-docker build -f Dockerfile . -t katanemo/plano -t katanemo/plano:0.4.20
+docker build -f Dockerfile . -t katanemo/plano -t katanemo/plano:0.4.21
diff --git a/cli/planoai/__init__.py b/cli/planoai/__init__.py
index 3ae1c625..ac0015d7 100644
--- a/cli/planoai/__init__.py
+++ b/cli/planoai/__init__.py
@@ -1,3 +1,3 @@
"""Plano CLI - Intelligent Prompt Gateway."""
-__version__ = "0.4.20"
+__version__ = "0.4.21"
diff --git a/cli/planoai/consts.py b/cli/planoai/consts.py
index 05c213e3..fc7b6f1a 100644
--- a/cli/planoai/consts.py
+++ b/cli/planoai/consts.py
@@ -5,7 +5,7 @@ PLANO_COLOR = "#969FF4"
SERVICE_NAME_ARCHGW = "plano"
PLANO_DOCKER_NAME = "plano"
-PLANO_DOCKER_IMAGE = os.getenv("PLANO_DOCKER_IMAGE", "katanemo/plano:0.4.20")
+PLANO_DOCKER_IMAGE = os.getenv("PLANO_DOCKER_IMAGE", "katanemo/plano:0.4.21")
DEFAULT_OTEL_TRACING_GRPC_ENDPOINT = "http://localhost:4317"
# Native mode constants
diff --git a/cli/pyproject.toml b/cli/pyproject.toml
index da297d70..0be85ed5 100644
--- a/cli/pyproject.toml
+++ b/cli/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "planoai"
-version = "0.4.20"
+version = "0.4.21"
description = "Python-based CLI tool to manage Plano."
authors = [{name = "Katanemo Labs, Inc."}]
readme = "README.md"
diff --git a/cli/uv.lock b/cli/uv.lock
index 75105275..8910b0a4 100644
--- a/cli/uv.lock
+++ b/cli/uv.lock
@@ -337,7 +337,7 @@ wheels = [
[[package]]
name = "planoai"
-version = "0.4.20"
+version = "0.4.21"
source = { editable = "." }
dependencies = [
{ name = "click" },
diff --git a/demos/llm_routing/preference_based_routing/README.md b/demos/llm_routing/preference_based_routing/README.md
index 89ea00bb..3208c17c 100644
--- a/demos/llm_routing/preference_based_routing/README.md
+++ b/demos/llm_routing/preference_based_routing/README.md
@@ -3,7 +3,7 @@ This demo shows how you can use user preferences to route user prompts to approp
## How to start the demo
-Make sure you have Plano CLI installed (`pip install planoai==0.4.20` or `uv tool install planoai==0.4.20`).
+Make sure you have Plano CLI installed (`pip install planoai==0.4.21` or `uv tool install planoai==0.4.21`).
```bash
cd demos/llm_routing/preference_based_routing
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 26d8c280..401b80f1 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -17,7 +17,7 @@ from sphinxawesome_theme.postprocess import Icons
project = "Plano Docs"
copyright = "2026, Katanemo Labs, a DigitalOcean Company"
author = "Katanemo Labs, Inc"
-release = " v0.4.20"
+release = " v0.4.21"
# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
diff --git a/docs/source/get_started/quickstart.rst b/docs/source/get_started/quickstart.rst
index 92abef23..509fe3c9 100644
--- a/docs/source/get_started/quickstart.rst
+++ b/docs/source/get_started/quickstart.rst
@@ -43,7 +43,7 @@ Plano's CLI allows you to manage and interact with the Plano efficiently. To ins
.. code-block:: console
- $ uv tool install planoai==0.4.20
+ $ uv tool install planoai==0.4.21
**Option 2: Install with pip (Traditional)**
@@ -51,7 +51,7 @@ Plano's CLI allows you to manage and interact with the Plano efficiently. To ins
$ python -m venv venv
$ source venv/bin/activate # On Windows, use: venv\Scripts\activate
- $ pip install planoai==0.4.20
+ $ pip install planoai==0.4.21
.. _llm_routing_quickstart:
diff --git a/docs/source/resources/deployment.rst b/docs/source/resources/deployment.rst
index fd2a3c7e..18cb93ac 100644
--- a/docs/source/resources/deployment.rst
+++ b/docs/source/resources/deployment.rst
@@ -65,7 +65,7 @@ Create a ``docker-compose.yml`` file with the following configuration:
# docker-compose.yml
services:
plano:
- image: katanemo/plano:0.4.20
+ image: katanemo/plano:0.4.21
container_name: plano
ports:
- "10000:10000" # ingress (client -> plano)
@@ -153,7 +153,7 @@ Create a ``plano-deployment.yaml``:
spec:
containers:
- name: plano
- image: katanemo/plano:0.4.20
+ image: katanemo/plano:0.4.21
ports:
- containerPort: 12000 # LLM gateway (chat completions, model routing)
name: llm-gateway