refactor(llm): move timing to dispatch layer; keep CLI eprintln

Add tracing::info! timing at ProviderChain::complete() — the single chokepoint for all LLM calls. Covers CLI, MCP server, and any future callers automatically. CLI keeps compact eprintln! for user-facing output; MCP and structured logging consumers get the info log.
2026-07-21 07:01:01 +02:00 · 2026-04-11 17:06:53 -04:00 · 2026-04-11 17:06:53 -04:00 · 042feb7887
commit 042feb7887
parent 534855955b
1 changed files with 3 additions and 2 deletions
--- a/crates/noxa-llm/src/chain.rs
+++ b/crates/noxa-llm/src/chain.rs
@ -2,7 +2,7 @@
 /// Default order: Ollama (local, free) -> OpenAI -> Anthropic.
 /// Only includes providers that are actually configured/available.
 use async_trait::async_trait;
-use tracing::{debug, warn};
+use tracing::{debug, info, warn};

 use crate::error::LlmError;
 use crate::provider::{CompletionRequest, LlmProvider};
@ -91,9 +91,10 @@ impl LlmProvider for ProviderChain {
        for provider in &self.providers {
            debug!(provider = provider.name(), "attempting completion");

+            let t = std::time::Instant::now();
            match provider.complete(request).await {
                Ok(response) => {
-                    debug!(provider = provider.name(), "completion succeeded");
+                    info!(provider = provider.name(), elapsed_ms = t.elapsed().as_millis(), "completion succeeded");
                    return Ok(response);
                }
                Err(e) => {