diff --git a/cli/planoai/templates/coding_agent_routing.yaml b/cli/planoai/templates/coding_agent_routing.yaml index e41db0c0..b0e40000 100644 --- a/cli/planoai/templates/coding_agent_routing.yaml +++ b/cli/planoai/templates/coding_agent_routing.yaml @@ -1,13 +1,6 @@ -version: v0.1 +version: v0.3.0 -listeners: - egress_traffic: - address: 0.0.0.0 - port: 12000 - message_format: openai - timeout: 30s - -llm_providers: +model_providers: # OpenAI Models - model: openai/gpt-5-2025-08-07 access_key: $OPENAI_API_KEY @@ -39,5 +32,10 @@ model_aliases: arch.claude.code.small.fast: target: claude-haiku-4-5 +listeners: + - type: model + name: model_listener + port: 12000 + tracing: random_sampling: 100 diff --git a/cli/planoai/templates/conversational_state_v1_responses.yaml b/cli/planoai/templates/conversational_state_v1_responses.yaml index afc40910..403278a9 100644 --- a/cli/planoai/templates/conversational_state_v1_responses.yaml +++ b/cli/planoai/templates/conversational_state_v1_responses.yaml @@ -1,25 +1,36 @@ -version: v0.1 +version: v0.3.0 -listeners: - egress_traffic: - address: 0.0.0.0 - port: 12000 - message_format: openai - timeout: 30s - -llm_providers: +agents: + - id: assistant + url: http://localhost:10510 +model_providers: # OpenAI Models - model: openai/gpt-5-mini-2025-08-07 access_key: $OPENAI_API_KEY default: true - # Anthropic Models + # Anthropic Models - model: anthropic/claude-sonnet-4-20250514 access_key: $ANTHROPIC_API_KEY +listeners: + - type: agent + name: conversation_service + port: 8001 + router: plano_orchestrator_v1 + agents: + - id: assistant + description: | + A conversational assistant that maintains context across multi-turn + conversations. It can answer follow-up questions, remember previous + context, and provide coherent responses in ongoing dialogues. + # State storage configuration for v1/responses API # Manages conversation state for multi-turn conversations state_storage: # Type: memory | postgres type: memory + +tracing: + random_sampling: 100 diff --git a/cli/planoai/templates/preference_aware_routing.yaml b/cli/planoai/templates/preference_aware_routing.yaml index cb9f685a..e38b3881 100644 --- a/cli/planoai/templates/preference_aware_routing.yaml +++ b/cli/planoai/templates/preference_aware_routing.yaml @@ -1,13 +1,6 @@ -version: v0.1.0 +version: v0.3.0 -listeners: - egress_traffic: - address: 0.0.0.0 - port: 12000 - message_format: openai - timeout: 30s - -llm_providers: +model_providers: - model: openai/gpt-4o-mini access_key: $OPENAI_API_KEY @@ -25,5 +18,10 @@ llm_providers: - name: code generation description: generating new code snippets, functions, or boilerplate based on user prompts or requirements +listeners: + - type: model + name: model_listener + port: 12000 + tracing: random_sampling: 100 diff --git a/cli/test/test_init.py b/cli/test/test_init.py index b9665a2a..cfb7e6cb 100644 --- a/cli/test/test_init.py +++ b/cli/test/test_init.py @@ -26,7 +26,7 @@ def test_init_template_builtin_writes_config(tmp_path, monkeypatch): config_path = tmp_path / "config.yaml" assert config_path.exists() config_text = config_path.read_text(encoding="utf-8") - assert "llm_providers:" in config_text + assert "model_providers:" in config_text def test_init_refuses_overwrite_without_force(tmp_path, monkeypatch): diff --git a/demos/advanced/currency_exchange/config.yaml b/demos/advanced/currency_exchange/config.yaml index 064befa1..f99da77b 100644 --- a/demos/advanced/currency_exchange/config.yaml +++ b/demos/advanced/currency_exchange/config.yaml @@ -1,13 +1,11 @@ -version: v0.1.0 +version: v0.3.0 listeners: - ingress_traffic: - address: 0.0.0.0 + - type: prompt + name: prompt_listener port: 10000 - message_format: openai - timeout: 30s -llm_providers: +model_providers: - model: openai/gpt-4o-mini access_key: $OPENAI_API_KEY default: true diff --git a/demos/advanced/currency_exchange/docker-compose.yaml b/demos/advanced/currency_exchange/docker-compose.yaml index bde2b082..ff161662 100644 --- a/demos/advanced/currency_exchange/docker-compose.yaml +++ b/demos/advanced/currency_exchange/docker-compose.yaml @@ -1,16 +1,20 @@ services: - chatbot_ui: - build: - context: ../../shared/chatbot_ui + anythingllm: + image: mintplexlabs/anythingllm + restart: always ports: - - "18080:8080" + - "3001:3001" + cap_add: + - SYS_ADMIN environment: - # this is only because we are running the sample app in the same docker container environemtn as archgw - - CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1 + - STORAGE_DIR=/app/server/storage + - LLM_PROVIDER=generic-openai + - GENERIC_OPEN_AI_BASE_PATH=http://host.docker.internal:10000/v1 + - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini + - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000 + - GENERIC_OPEN_AI_API_KEY=sk-placeholder extra_hosts: - "host.docker.internal:host-gateway" - volumes: - - ./config.yaml:/app/plano_config.yaml jaeger: build: diff --git a/demos/advanced/model_choice_test_harness/plano_config_with_aliases.yaml b/demos/advanced/model_choice_test_harness/plano_config_with_aliases.yaml index 3bd84b34..b25331e1 100644 --- a/demos/advanced/model_choice_test_harness/plano_config_with_aliases.yaml +++ b/demos/advanced/model_choice_test_harness/plano_config_with_aliases.yaml @@ -1,13 +1,11 @@ -version: v0.1.0 +version: v0.3.0 listeners: - egress_traffic: - address: 0.0.0.0 + - type: model + name: model_listener port: 12000 - message_format: openai - timeout: 30s -llm_providers: +model_providers: - model: openai/gpt-4o-mini access_key: $OPENAI_API_KEY default: true @@ -20,3 +18,6 @@ model_aliases: target: gpt-4o-mini arch.reason.v1: target: o3 + +tracing: + random_sampling: 100 diff --git a/demos/advanced/multi_turn_rag/config.yaml b/demos/advanced/multi_turn_rag/config.yaml index a29622ec..2c677eec 100644 --- a/demos/advanced/multi_turn_rag/config.yaml +++ b/demos/advanced/multi_turn_rag/config.yaml @@ -1,18 +1,16 @@ -version: v0.1.0 +version: v0.3.0 listeners: - ingress_traffic: - address: 0.0.0.0 + - type: prompt + name: prompt_listener port: 10000 - message_format: openai - timeout: 30s endpoints: rag_energy_source_agent: endpoint: host.docker.internal:18083 connect_timeout: 0.005s -llm_providers: +model_providers: - access_key: $OPENAI_API_KEY model: openai/gpt-4o-mini default: true diff --git a/demos/advanced/multi_turn_rag/docker-compose.yaml b/demos/advanced/multi_turn_rag/docker-compose.yaml index 4232ff9e..1c3ed73c 100644 --- a/demos/advanced/multi_turn_rag/docker-compose.yaml +++ b/demos/advanced/multi_turn_rag/docker-compose.yaml @@ -10,15 +10,19 @@ services: interval: 5s retries: 20 - chatbot_ui: - build: - context: ../../shared/chatbot_ui - dockerfile: Dockerfile + anythingllm: + image: mintplexlabs/anythingllm + restart: always ports: - - "18080:8080" + - "3001:3001" + cap_add: + - SYS_ADMIN environment: - - CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1 + - STORAGE_DIR=/app/server/storage + - LLM_PROVIDER=generic-openai + - GENERIC_OPEN_AI_BASE_PATH=http://host.docker.internal:10000/v1 + - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini + - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000 + - GENERIC_OPEN_AI_API_KEY=sk-placeholder extra_hosts: - "host.docker.internal:host-gateway" - volumes: - - ./config.yaml:/app/plano_config.yaml diff --git a/demos/advanced/stock_quote/config.yaml b/demos/advanced/stock_quote/config.yaml index 5cd20a44..bef46082 100644 --- a/demos/advanced/stock_quote/config.yaml +++ b/demos/advanced/stock_quote/config.yaml @@ -1,13 +1,11 @@ -version: v0.1.0 +version: v0.3.0 listeners: - ingress_traffic: - address: 0.0.0.0 + - type: prompt + name: prompt_listener port: 10000 - message_format: openai - timeout: 30s -llm_providers: +model_providers: - access_key: $OPENAI_API_KEY model: openai/gpt-4o diff --git a/demos/advanced/stock_quote/docker-compose.yaml b/demos/advanced/stock_quote/docker-compose.yaml index 6e02922d..ff161662 100644 --- a/demos/advanced/stock_quote/docker-compose.yaml +++ b/demos/advanced/stock_quote/docker-compose.yaml @@ -1,16 +1,20 @@ services: - chatbot_ui: - build: - context: ../../shared/chatbot_ui + anythingllm: + image: mintplexlabs/anythingllm + restart: always ports: - - "18080:8080" + - "3001:3001" + cap_add: + - SYS_ADMIN environment: - # this is only because we are running the sample app in the same docker container environment as archgw - - CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1 + - STORAGE_DIR=/app/server/storage + - LLM_PROVIDER=generic-openai + - GENERIC_OPEN_AI_BASE_PATH=http://host.docker.internal:10000/v1 + - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini + - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000 + - GENERIC_OPEN_AI_API_KEY=sk-placeholder extra_hosts: - "host.docker.internal:host-gateway" - volumes: - - ./config.yaml:/app/plano_config.yaml jaeger: build: diff --git a/demos/getting_started/llm_gateway/docker-compose.yaml b/demos/getting_started/llm_gateway/docker-compose.yaml index 784578d0..52723fbf 100644 --- a/demos/getting_started/llm_gateway/docker-compose.yaml +++ b/demos/getting_started/llm_gateway/docker-compose.yaml @@ -37,13 +37,3 @@ services: - "16686:16686" - "4317:4317" - "4318:4318" - - prometheus: - build: - context: ../../shared/prometheus - - grafana: - build: - context: ../../shared/grafana - ports: - - "3000:3000" diff --git a/demos/getting_started/weather_forecast/config.yaml b/demos/getting_started/weather_forecast/config.yaml index eba38764..69451552 100644 --- a/demos/getting_started/weather_forecast/config.yaml +++ b/demos/getting_started/weather_forecast/config.yaml @@ -1,17 +1,13 @@ -version: v0.1.0 +version: v0.3.0 listeners: - ingress_traffic: - address: 0.0.0.0 + - type: prompt + name: prompt_listener port: 10000 - message_format: openai - timeout: 30s - egress_traffic: - address: 0.0.0.0 + - type: model + name: model_listener port: 12000 - message_format: openai - timeout: 30s endpoints: weather_forecast_service: @@ -22,7 +18,7 @@ overrides: # confidence threshold for prompt target intent matching prompt_target_intent_matching_threshold: 0.6 -llm_providers: +model_providers: - access_key: $GROQ_API_KEY model: groq/llama-3.2-3b-preview diff --git a/demos/getting_started/weather_forecast/docker-compose.yaml b/demos/getting_started/weather_forecast/docker-compose.yaml index 36c2e74a..84074ab9 100644 --- a/demos/getting_started/weather_forecast/docker-compose.yaml +++ b/demos/getting_started/weather_forecast/docker-compose.yaml @@ -9,15 +9,19 @@ services: ports: - "18083:80" - chatbot_ui: - build: - context: ../../shared/chatbot_ui + anythingllm: + image: mintplexlabs/anythingllm + restart: always ports: - - "18080:8080" + - "3001:3001" + cap_add: + - SYS_ADMIN environment: - # this is only because we are running the sample app in the same docker container environemtn as archgw - - CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1 + - STORAGE_DIR=/app/server/storage + - LLM_PROVIDER=generic-openai + - GENERIC_OPEN_AI_BASE_PATH=http://host.docker.internal:10000/v1 + - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini + - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000 + - GENERIC_OPEN_AI_API_KEY=sk-placeholder extra_hosts: - "host.docker.internal:host-gateway" - volumes: - - ./config.yaml:/app/plano_config.yaml diff --git a/demos/integrations/ollama/config.yaml b/demos/integrations/ollama/config.yaml index 911af238..c86fe002 100644 --- a/demos/integrations/ollama/config.yaml +++ b/demos/integrations/ollama/config.yaml @@ -1,13 +1,11 @@ -version: v0.1.0 +version: v0.3.0 listeners: - egress_traffic: - address: 0.0.0.0 + - type: model + name: model_listener port: 12000 - message_format: openai - timeout: 30s -llm_providers: +model_providers: - model: my_llm_provider/llama3.2 provider_interface: openai diff --git a/demos/integrations/ollama/docker-compose.yaml b/demos/integrations/ollama/docker-compose.yaml index 3f123d84..c272dbca 100644 --- a/demos/integrations/ollama/docker-compose.yaml +++ b/demos/integrations/ollama/docker-compose.yaml @@ -1,16 +1,20 @@ services: - chatbot_ui: - build: - context: ../../shared/chatbot_ui + anythingllm: + image: mintplexlabs/anythingllm + restart: always ports: - - "18080:8080" + - "3001:3001" + cap_add: + - SYS_ADMIN environment: - # this is only because we are running the sample app in the same docker container environemtn as archgw - - CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:12000/v1 + - STORAGE_DIR=/app/server/storage + - LLM_PROVIDER=generic-openai + - GENERIC_OPEN_AI_BASE_PATH=http://host.docker.internal:12000/v1 + - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini + - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000 + - GENERIC_OPEN_AI_API_KEY=sk-placeholder extra_hosts: - "host.docker.internal:host-gateway" - volumes: - - ./config.yaml:/app/plano_config.yaml jaeger: build: diff --git a/demos/integrations/spotify_bearer_auth/config.yaml b/demos/integrations/spotify_bearer_auth/config.yaml index b848d718..5b1f82a9 100644 --- a/demos/integrations/spotify_bearer_auth/config.yaml +++ b/demos/integrations/spotify_bearer_auth/config.yaml @@ -1,11 +1,9 @@ -version: v0.1.0 +version: v0.3.0 listeners: - ingress_traffic: - address: 0.0.0.0 + - type: prompt + name: prompt_listener port: 10000 - message_format: openai - timeout: 30s overrides: optimize_context_window: true @@ -84,14 +82,14 @@ system_prompt: | Make sure your output is valid Markdown. And don't say "formatted in Markdown". Thanks! -llm_providers: +model_providers: - access_key: $OPENAI_API_KEY model: openai/gpt-4o default: true prompt_targets: - name: get_new_releases - description: Get a list of new album releases featured in Spotify (shown, for example, on a Spotify player’s “Browse” tab). + description: Get a list of new album releases featured in Spotify (shown, for example, on a Spotify player's "Browse" tab). parameters: - name: country description: the country where the album is released @@ -121,3 +119,6 @@ prompt_targets: path: /v1/artists/{artist_id}/top-tracks http_headers: Authorization: "Bearer $SPOTIFY_CLIENT_KEY" + +tracing: + random_sampling: 100 diff --git a/demos/integrations/spotify_bearer_auth/docker-compose.yaml b/demos/integrations/spotify_bearer_auth/docker-compose.yaml index bde2b082..ff161662 100644 --- a/demos/integrations/spotify_bearer_auth/docker-compose.yaml +++ b/demos/integrations/spotify_bearer_auth/docker-compose.yaml @@ -1,16 +1,20 @@ services: - chatbot_ui: - build: - context: ../../shared/chatbot_ui + anythingllm: + image: mintplexlabs/anythingllm + restart: always ports: - - "18080:8080" + - "3001:3001" + cap_add: + - SYS_ADMIN environment: - # this is only because we are running the sample app in the same docker container environemtn as archgw - - CHAT_COMPLETION_ENDPOINT=http://host.docker.internal:10000/v1 + - STORAGE_DIR=/app/server/storage + - LLM_PROVIDER=generic-openai + - GENERIC_OPEN_AI_BASE_PATH=http://host.docker.internal:10000/v1 + - GENERIC_OPEN_AI_MODEL_PREF=gpt-4o-mini + - GENERIC_OPEN_AI_MODEL_TOKEN_LIMIT=128000 + - GENERIC_OPEN_AI_API_KEY=sk-placeholder extra_hosts: - "host.docker.internal:host-gateway" - volumes: - - ./config.yaml:/app/plano_config.yaml jaeger: build: diff --git a/demos/llm_routing/claude_code_router/config.yaml b/demos/llm_routing/claude_code_router/config.yaml index e41db0c0..be763325 100644 --- a/demos/llm_routing/claude_code_router/config.yaml +++ b/demos/llm_routing/claude_code_router/config.yaml @@ -1,13 +1,11 @@ -version: v0.1 +version: v0.3.0 listeners: - egress_traffic: - address: 0.0.0.0 + - type: model + name: model_listener port: 12000 - message_format: openai - timeout: 30s -llm_providers: +model_providers: # OpenAI Models - model: openai/gpt-5-2025-08-07 access_key: $OPENAI_API_KEY diff --git a/demos/llm_routing/model_alias_routing/config_with_aliases.yaml b/demos/llm_routing/model_alias_routing/config_with_aliases.yaml index 891fa4fe..53b679ae 100644 --- a/demos/llm_routing/model_alias_routing/config_with_aliases.yaml +++ b/demos/llm_routing/model_alias_routing/config_with_aliases.yaml @@ -1,13 +1,11 @@ -version: v0.1 +version: v0.3.0 listeners: - egress_traffic: - address: 0.0.0.0 + - type: model + name: model_listener port: 12000 - message_format: openai - timeout: 30s -llm_providers: +model_providers: # OpenAI Models - model: openai/gpt-5-mini-2025-08-07 @@ -95,3 +93,6 @@ model_aliases: # Alias for grok testing arch.grok.v1: target: grok-4-0709 + +tracing: + random_sampling: 100 diff --git a/demos/llm_routing/preference_based_routing/config.yaml b/demos/llm_routing/preference_based_routing/config.yaml index cb9f685a..38e8920a 100644 --- a/demos/llm_routing/preference_based_routing/config.yaml +++ b/demos/llm_routing/preference_based_routing/config.yaml @@ -1,13 +1,11 @@ -version: v0.1.0 +version: v0.3.0 listeners: - egress_traffic: - address: 0.0.0.0 + - type: model + name: model_listener port: 12000 - message_format: openai - timeout: 30s -llm_providers: +model_providers: - model: openai/gpt-4o-mini access_key: $OPENAI_API_KEY diff --git a/demos/llm_routing/preference_based_routing/docker-compose.yaml b/demos/llm_routing/preference_based_routing/docker-compose.yaml index 37af57e2..7c88594a 100644 --- a/demos/llm_routing/preference_based_routing/docker-compose.yaml +++ b/demos/llm_routing/preference_based_routing/docker-compose.yaml @@ -40,13 +40,3 @@ services: - "16686:16686" - "4317:4317" - "4318:4318" - - # prometheus: - # build: - # context: ../../shared/prometheus - - # grafana: - # build: - # context: ../../shared/grafana - # ports: - # - "3000:3000" diff --git a/demos/llm_routing/preference_based_routing/plano_config_local.yaml b/demos/llm_routing/preference_based_routing/plano_config_local.yaml index b965cd0c..0a3db8bf 100644 --- a/demos/llm_routing/preference_based_routing/plano_config_local.yaml +++ b/demos/llm_routing/preference_based_routing/plano_config_local.yaml @@ -1,17 +1,15 @@ -version: v0.1.0 +version: v0.3.0 routing: model: Arch-Router llm_provider: arch-router listeners: - egress_traffic: - address: 0.0.0.0 + - type: model + name: model_listener port: 12000 - message_format: openai - timeout: 30s -llm_providers: +model_providers: - name: arch-router model: arch/hf.co/katanemo/Arch-Router-1.5B.gguf:Q4_K_M