mirror of
https://github.com/katanemo/plano.git
synced 2026-04-30 19:36:34 +02:00
Adding support for wildcard models in the model_providers config (#696)
* cleaning up plano cli commands * adding support for wildcard model providers * fixing compile errors * fixing bugs related to default model provider, provider hint and duplicates in the model provider list * fixed cargo fmt issues * updating tests to always include the model id * using default for the prompt_gateway path * fixed the model name, as gpt-5-mini-2025-08-07 wasn't in the config * making sure that all aliases and models match the config * fixed the config generator to allow for base_url providers LLMs to include wildcard models * re-ran the models list utility and added a shell script to run it * updating docs to mention wildcard model providers * updated provider_models.json to yaml, added that file to our docs for reference * updating the build docs to use the new root-based build --------- Co-authored-by: Salman Paracha <salmanparacha@MacBook-Pro-342.local>
This commit is contained in:
parent
8428b06e22
commit
2941392ed1
42 changed files with 1748 additions and 202 deletions
44
docs/source/_ext/provider_models.py
Normal file
44
docs/source/_ext/provider_models.py
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
"""Sphinx extension to copy provider_models.yaml to build output."""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
import shutil
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from sphinx.application import Sphinx
|
||||
|
||||
|
||||
def _on_build_finished(app: Sphinx, exception: Exception | None) -> None:
|
||||
"""Copy provider_models.yaml to the build output after build completes."""
|
||||
if exception is not None:
|
||||
return
|
||||
|
||||
# Only generate for HTML-like builders where app.outdir is a website root.
|
||||
if getattr(app.builder, "format", None) != "html":
|
||||
return
|
||||
|
||||
# Source path: provider_models.yaml is copied into the Docker image at /docs/provider_models.yaml
|
||||
# This follows the pattern used for config templates like envoy.template.yaml and arch_config_schema.yaml
|
||||
docs_root = Path(app.srcdir).parent # Goes from source/ to docs/
|
||||
source_path = docs_root / "provider_models.yaml"
|
||||
|
||||
if not source_path.exists():
|
||||
# Silently skip if source file doesn't exist
|
||||
return
|
||||
|
||||
# Per repo convention, place generated artifacts under an `includes/` folder.
|
||||
out_path = Path(app.outdir) / "includes" / "provider_models.yaml"
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
shutil.copy2(source_path, out_path)
|
||||
|
||||
|
||||
def setup(app: Sphinx) -> dict[str, object]:
|
||||
"""Register the extension with Sphinx."""
|
||||
app.connect("build-finished", _on_build_finished)
|
||||
return {
|
||||
"version": "0.1.0",
|
||||
"parallel_read_safe": True,
|
||||
"parallel_write_safe": True,
|
||||
}
|
||||
|
|
@ -20,6 +20,7 @@ Connect to any combination of providers simultaneously (see :ref:`supported_prov
|
|||
|
||||
- First-Class Providers: Native integrations with OpenAI, Anthropic, DeepSeek, Mistral, Groq, Google Gemini, Together AI, xAI, Azure OpenAI, and Ollama
|
||||
- OpenAI-Compatible Providers: Any provider implementing the OpenAI Chat Completions API standard
|
||||
- Wildcard Model Configuration: Automatically configure all models from a provider using ``provider/*`` syntax
|
||||
|
||||
**Intelligent Routing**
|
||||
Three powerful routing approaches to optimize model selection:
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ All providers are configured in the ``llm_providers`` section of your ``plano_co
|
|||
|
||||
**Common Configuration Fields:**
|
||||
|
||||
- ``model``: Provider prefix and model name (format: ``provider/model-name``)
|
||||
- ``model``: Provider prefix and model name (format: ``provider/model-name`` or ``provider/*`` for wildcard expansion)
|
||||
- ``access_key``: API key for authentication (supports environment variables)
|
||||
- ``default``: Mark a model as the default (optional, boolean)
|
||||
- ``name``: Custom name for the provider instance (optional)
|
||||
|
|
@ -108,7 +108,11 @@ OpenAI
|
|||
.. code-block:: yaml
|
||||
|
||||
llm_providers:
|
||||
# Latest models (examples - use any OpenAI chat model)
|
||||
# Configure all OpenAI models with wildcard
|
||||
- model: openai/*
|
||||
access_key: $OPENAI_API_KEY
|
||||
|
||||
# Or configure specific models
|
||||
- model: openai/gpt-5.2
|
||||
access_key: $OPENAI_API_KEY
|
||||
default: true
|
||||
|
|
@ -116,7 +120,6 @@ OpenAI
|
|||
- model: openai/gpt-5
|
||||
access_key: $OPENAI_API_KEY
|
||||
|
||||
# Use any model name from OpenAI's API
|
||||
- model: openai/gpt-4o
|
||||
access_key: $OPENAI_API_KEY
|
||||
|
||||
|
|
@ -156,17 +159,29 @@ Anthropic
|
|||
.. code-block:: yaml
|
||||
|
||||
llm_providers:
|
||||
# Latest models (examples - use any Anthropic chat model)
|
||||
# Configure all Anthropic models with wildcard
|
||||
- model: anthropic/*
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
# Or configure specific models
|
||||
- model: anthropic/claude-opus-4-5
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
- model: anthropic/claude-sonnet-4-5
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
# Use any model name from Anthropic's API
|
||||
- model: anthropic/claude-haiku-4-5
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
# Override specific model with custom routing
|
||||
- model: anthropic/*
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
access_key: $ANTHROPIC_PROD_API_KEY
|
||||
routing_preferences:
|
||||
- name: code_generation
|
||||
|
||||
DeepSeek
|
||||
~~~~~~~~
|
||||
|
||||
|
|
@ -694,6 +709,93 @@ Configure multiple instances of the same provider:
|
|||
access_key: $OPENAI_DEV_KEY
|
||||
name: openai-dev
|
||||
|
||||
Wildcard Model Configuration
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Automatically configure all available models from a provider using wildcard patterns. Plano expands wildcards at configuration load time to include all known models from the provider's registry.
|
||||
|
||||
**Basic Wildcard Usage:**
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
llm_providers:
|
||||
# Expand to all OpenAI models
|
||||
- model: openai/*
|
||||
access_key: $OPENAI_API_KEY
|
||||
|
||||
# Expand to all Anthropic Claude models
|
||||
- model: anthropic/*
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
# Expand to all Mistral models
|
||||
- model: mistral/*
|
||||
access_key: $MISTRAL_API_KEY
|
||||
|
||||
**How Wildcards Work:**
|
||||
|
||||
1. **Known Providers** (OpenAI, Anthropic, DeepSeek, Mistral, Groq, Gemini, Together AI, xAI, Moonshot, Zhipu):
|
||||
|
||||
- Expands at config load time to all models in Plano's provider registry
|
||||
- Creates entries for both canonical (``openai/gpt-4``) and short names (``gpt-4``)
|
||||
- Enables the ``/v1/models`` endpoint to list all available models
|
||||
- **View complete model list**: `provider_models.yaml <../../includes/provider_models.yaml>`_
|
||||
|
||||
2. **Unknown/Custom Providers** (e.g., ``custom-provider/*``):
|
||||
|
||||
- Stores as a wildcard pattern for runtime matching
|
||||
- Requires ``base_url`` and ``provider_interface`` configuration
|
||||
- Matches model requests dynamically (e.g., ``custom-provider/any-model-name``)
|
||||
- Does not appear in ``/v1/models`` endpoint
|
||||
|
||||
**Overriding Wildcard Models:**
|
||||
|
||||
You can configure specific models with custom settings even when using wildcards. Specific configurations take precedence and are excluded from wildcard expansion:
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
llm_providers:
|
||||
# Expand to all Anthropic models
|
||||
- model: anthropic/*
|
||||
access_key: $ANTHROPIC_API_KEY
|
||||
|
||||
# Override specific model with custom settings
|
||||
# This model will NOT be included in the wildcard expansion above
|
||||
- model: anthropic/claude-sonnet-4-20250514
|
||||
access_key: $ANTHROPIC_PROD_API_KEY
|
||||
routing_preferences:
|
||||
- name: code_generation
|
||||
priority: 1
|
||||
|
||||
# Another specific override
|
||||
- model: anthropic/claude-3-haiku-20240307
|
||||
access_key: $ANTHROPIC_DEV_API_KEY
|
||||
|
||||
**Custom Provider Wildcards:**
|
||||
|
||||
For providers not in Plano's registry, wildcards enable dynamic model routing:
|
||||
|
||||
.. code-block:: yaml
|
||||
|
||||
llm_providers:
|
||||
# Custom LiteLLM deployment
|
||||
- model: litellm/*
|
||||
base_url: https://litellm.example.com
|
||||
provider_interface: openai
|
||||
passthrough_auth: true
|
||||
|
||||
# Custom provider with all models
|
||||
- model: custom-provider/*
|
||||
access_key: $CUSTOM_API_KEY
|
||||
base_url: https://api.custom-provider.com
|
||||
provider_interface: openai
|
||||
|
||||
**Benefits:**
|
||||
|
||||
- **Simplified Configuration**: One line instead of listing dozens of models
|
||||
- **Future-Proof**: Automatically includes new models as they're released
|
||||
- **Flexible Overrides**: Customize specific models while using wildcards for others
|
||||
- **Selective Expansion**: Control which models get custom configurations
|
||||
|
||||
Default Model Configuration
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ extensions = [
|
|||
"sphinx_design",
|
||||
# Local extensions
|
||||
"llms_txt",
|
||||
"provider_models",
|
||||
]
|
||||
|
||||
# Paths that contain templates, relative to this directory.
|
||||
|
|
|
|||
|
|
@ -105,7 +105,7 @@ Step 3.1: Using curl command
|
|||
.. code-block:: bash
|
||||
|
||||
$ curl --header 'Content-Type: application/json' \
|
||||
--data '{"messages": [{"role": "user","content": "What is the capital of France?"}], "model": "none"}' \
|
||||
--data '{"messages": [{"role": "user","content": "What is the capital of France?"}], "model": "gpt-4o"}' \
|
||||
http://localhost:12000/v1/chat/completions
|
||||
|
||||
{
|
||||
|
|
@ -315,7 +315,7 @@ Here is a sample curl command you can use to interact:
|
|||
.. code-block:: bash
|
||||
|
||||
$ curl --header 'Content-Type: application/json' \
|
||||
--data '{"messages": [{"role": "user","content": "what is exchange rate for gbp"}], "model": "none"}' \
|
||||
--data '{"messages": [{"role": "user","content": "what is exchange rate for gbp"}], "model": "gpt-4o"}' \
|
||||
http://localhost:10000/v1/chat/completions | jq ".choices[0].message.content"
|
||||
|
||||
"As of the date provided in your context, December 5, 2024, the exchange rate for GBP (British Pound) from USD (United States Dollar) is 0.78558. This means that 1 USD is equivalent to 0.78558 GBP."
|
||||
|
|
@ -325,7 +325,7 @@ And to get the list of supported currencies:
|
|||
.. code-block:: bash
|
||||
|
||||
$ curl --header 'Content-Type: application/json' \
|
||||
--data '{"messages": [{"role": "user","content": "show me list of currencies that are supported for conversion"}], "model": "none"}' \
|
||||
--data '{"messages": [{"role": "user","content": "show me list of currencies that are supported for conversion"}], "model": "gpt-4o"}' \
|
||||
http://localhost:10000/v1/chat/completions | jq ".choices[0].message.content"
|
||||
|
||||
"Here is a list of the currencies that are supported for conversion from USD, along with their symbols:\n\n1. AUD - Australian Dollar\n2. BGN - Bulgarian Lev\n3. BRL - Brazilian Real\n4. CAD - Canadian Dollar\n5. CHF - Swiss Franc\n6. CNY - Chinese Renminbi Yuan\n7. CZK - Czech Koruna\n8. DKK - Danish Krone\n9. EUR - Euro\n10. GBP - British Pound\n11. HKD - Hong Kong Dollar\n12. HUF - Hungarian Forint\n13. IDR - Indonesian Rupiah\n14. ILS - Israeli New Sheqel\n15. INR - Indian Rupee\n16. ISK - Icelandic Króna\n17. JPY - Japanese Yen\n18. KRW - South Korean Won\n19. MXN - Mexican Peso\n20. MYR - Malaysian Ringgit\n21. NOK - Norwegian Krone\n22. NZD - New Zealand Dollar\n23. PHP - Philippine Peso\n24. PLN - Polish Złoty\n25. RON - Romanian Leu\n26. SEK - Swedish Krona\n27. SGD - Singapore Dollar\n28. THB - Thai Baht\n29. TRY - Turkish Lira\n30. USD - United States Dollar\n31. ZAR - South African Rand\n\nIf you want to convert USD to any of these currencies, you can select the one you are interested in."
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue