""" LLM pricing models for different providers. Prices are per 1000 tokens for most models, with some newer models priced per million tokens. """ from decimal import Decimal from typing import Dict from api.services.configuration.registry import ServiceProviders from .models import TokenPricingModel # LLM pricing registry LLM_PRICING: Dict[str, Dict[str, TokenPricingModel]] = { ServiceProviders.OPENAI: { "gpt-3.5-turbo": TokenPricingModel( prompt_token_price=Decimal("0.0015") / 1000, # $0.0015 per 1K tokens completion_token_price=Decimal("0.002") / 1000, # $0.002 per 1K tokens ), "gpt-4": TokenPricingModel( prompt_token_price=Decimal("0.03") / 1000, # $0.03 per 1K tokens completion_token_price=Decimal("0.06") / 1000, # $0.06 per 1K tokens ), "gpt-4.1": TokenPricingModel( prompt_token_price=Decimal("2.00") / 1000000, # $2.00 per 1M tokens completion_token_price=Decimal("8.00") / 1000000, # $8.00 per 1M tokens ), "gpt-4.1-mini": TokenPricingModel( prompt_token_price=Decimal("0.40") / 1000000, # $0.40 per 1M tokens completion_token_price=Decimal("1.60") / 1000000, # $1.60 per 1M tokens ), "gpt-4.1-nano": TokenPricingModel( prompt_token_price=Decimal("0.10") / 1000000, # $0.10 per 1M tokens completion_token_price=Decimal("0.40") / 1000000, # $0.40 per 1M tokens ), "gpt-4.5-preview": TokenPricingModel( prompt_token_price=Decimal("75.00") / 1000000, # $75.00 per 1M tokens completion_token_price=Decimal("150.00") / 1000000, # $150.00 per 1M tokens ), "gpt-4o": TokenPricingModel( prompt_token_price=Decimal("2.50") / 1000000, # $2.50 per 1M tokens - FIXED completion_token_price=Decimal("10.00") / 1000000, # $10.00 per 1M tokens - FIXED ), "gpt-4o-audio-preview": TokenPricingModel( prompt_token_price=Decimal("2.50") / 1000000, # $2.50 per 1M tokens completion_token_price=Decimal("10.00") / 1000000, # $10.00 per 1M tokens ), "gpt-4o-realtime-preview": TokenPricingModel( prompt_token_price=Decimal("5.00") / 1000000, # $5.00 per 1M tokens completion_token_price=Decimal("20.00") / 1000000, # $20.00 per 1M tokens ), "gpt-4o-mini": TokenPricingModel( prompt_token_price=Decimal("0.15") / 1000000, # $0.15 per 1M tokens completion_token_price=Decimal("0.60") / 1000000, # $0.60 per 1M tokens ), "gpt-4o-mini-audio-preview": TokenPricingModel( prompt_token_price=Decimal("0.15") / 1000000, # $0.15 per 1M tokens completion_token_price=Decimal("0.60") / 1000000, # $0.60 per 1M tokens ), "gpt-4o-mini-realtime-preview": TokenPricingModel( prompt_token_price=Decimal("0.60") / 1000000, # $0.60 per 1M tokens completion_token_price=Decimal("2.40") / 1000000, # $2.40 per 1M tokens ), "gpt-4o-search-preview": TokenPricingModel( prompt_token_price=Decimal("2.50") / 1000000, # $2.50 per 1M tokens completion_token_price=Decimal("10.00") / 1000000, # $10.00 per 1M tokens ), "gpt-4o-mini-search-preview": TokenPricingModel( prompt_token_price=Decimal("0.15") / 1000000, # $0.15 per 1M tokens completion_token_price=Decimal("0.60") / 1000000, # $0.60 per 1M tokens ), "o1": TokenPricingModel( prompt_token_price=Decimal("15.00") / 1000000, # $15.00 per 1M tokens completion_token_price=Decimal("60.00") / 1000000, # $60.00 per 1M tokens ), "o1-pro": TokenPricingModel( prompt_token_price=Decimal("150.00") / 1000000, # $150.00 per 1M tokens completion_token_price=Decimal("600.00") / 1000000, # $600.00 per 1M tokens ), "o1-mini": TokenPricingModel( prompt_token_price=Decimal("1.10") / 1000000, # $1.10 per 1M tokens completion_token_price=Decimal("4.40") / 1000000, # $4.40 per 1M tokens ), "o3": TokenPricingModel( prompt_token_price=Decimal("10.00") / 1000000, # $10.00 per 1M tokens completion_token_price=Decimal("40.00") / 1000000, # $40.00 per 1M tokens ), "o3-mini": TokenPricingModel( prompt_token_price=Decimal("1.10") / 1000000, # $1.10 per 1M tokens completion_token_price=Decimal("4.40") / 1000000, # $4.40 per 1M tokens ), "o4-mini": TokenPricingModel( prompt_token_price=Decimal("1.10") / 1000000, # $1.10 per 1M tokens completion_token_price=Decimal("4.40") / 1000000, # $4.40 per 1M tokens ), "computer-use-preview": TokenPricingModel( prompt_token_price=Decimal("3.00") / 1000000, # $3.00 per 1M tokens completion_token_price=Decimal("12.00") / 1000000, # $12.00 per 1M tokens ), "gpt-image-1": TokenPricingModel( prompt_token_price=Decimal("5.00") / 1000000, # $5.00 per 1M tokens completion_token_price=Decimal("0") / 1000000, # No output pricing shown ), "codex-mini-latest": TokenPricingModel( prompt_token_price=Decimal("1.50") / 1000000, # $1.50 per 1M tokens completion_token_price=Decimal("6.00") / 1000000, # $6.00 per 1M tokens ), # Transcription models "gpt-4o-transcribe": TokenPricingModel( prompt_token_price=Decimal("2.50") / 1000000, # $2.50 per 1M tokens completion_token_price=Decimal("10.00") / 1000000, # $10.00 per 1M tokens ), "gpt-4o-mini-transcribe": TokenPricingModel( prompt_token_price=Decimal("1.25") / 1000000, # $1.25 per 1M tokens completion_token_price=Decimal("5.00") / 1000000, # $5.00 per 1M tokens ), # TTS models with token-based pricing "gpt-4o-mini-tts": TokenPricingModel( prompt_token_price=Decimal("0.60") / 1000000, # $0.60 per 1M tokens completion_token_price=Decimal("0") / 1000000, # No completion tokens for TTS ), }, ServiceProviders.GROQ: { "llama-3.3-70b-versatile": TokenPricingModel( prompt_token_price=Decimal("0.00059") / 1000, # $0.00059 per 1K tokens completion_token_price=Decimal("0.00079") / 1000, # $0.00079 per 1K tokens ), "deepseek-r1-distill-llama-70b": TokenPricingModel( prompt_token_price=Decimal("0.00059") / 1000, # Assuming similar pricing completion_token_price=Decimal("0.00079") / 1000, ), }, ServiceProviders.AZURE: { "gpt-4.1-mini": TokenPricingModel( prompt_token_price=Decimal("0.44") / 1000000, # $0.40 per 1M tokens completion_token_price=Decimal("8.80") / 1000000, # $1.60 per 1M tokens if using data zone ) }, }