feat: unified credits and its cost calculations

This commit is contained in:
DESKTOP-RTLN3BA\$punk 2026-05-02 14:34:23 -07:00
parent 451a98936e
commit ae9d36d77f
61 changed files with 5835 additions and 272 deletions

View file

@ -54,11 +54,15 @@ STRIPE_PAGES_PER_UNIT=1000
# Set FALSE to disable new checkout session creation temporarily
STRIPE_PAGE_BUYING_ENABLED=TRUE
# Premium token purchases via Stripe (for premium-tier model usage)
# Set TRUE to allow users to buy premium token packs ($1 per 1M tokens)
# Premium credit purchases via Stripe (for premium-tier model usage).
# Each pack grants STRIPE_CREDIT_MICROS_PER_UNIT micro-USD of credit
# (default 1_000_000 = $1.00). Premium turns are billed at the actual
# per-call provider cost reported by LiteLLM.
STRIPE_TOKEN_BUYING_ENABLED=FALSE
STRIPE_PREMIUM_TOKEN_PRICE_ID=price_...
STRIPE_TOKENS_PER_UNIT=1000000
STRIPE_CREDIT_MICROS_PER_UNIT=1000000
# DEPRECATED — use STRIPE_CREDIT_MICROS_PER_UNIT (1:1 numerical mapping):
# STRIPE_TOKENS_PER_UNIT=1000000
# Periodic Stripe safety net for purchases left in PENDING (minutes old)
STRIPE_RECONCILIATION_LOOKBACK_MINUTES=10
@ -184,9 +188,35 @@ VIDEO_PRESENTATION_DEFAULT_DURATION_IN_FRAMES=300
# (Optional) Maximum pages limit per user for ETL services (default: `999999999` for unlimited in OSS version)
PAGES_LIMIT=500
# Premium token quota per registered user (default: 3,000,000)
# Applies only to models with billing_tier=premium in global_llm_config.yaml
PREMIUM_TOKEN_LIMIT=3000000
# Premium credit quota per registered user, in micro-USD
# (default: 5,000,000 == $5.00 of credit). Premium turns are debited at the
# actual per-call provider cost reported by LiteLLM, so cheap and expensive
# models bill proportionally. Applies only to models with
# billing_tier=premium in global_llm_config.yaml.
PREMIUM_CREDIT_MICROS_LIMIT=5000000
# DEPRECATED — use PREMIUM_CREDIT_MICROS_LIMIT (1:1 numerical mapping):
# PREMIUM_TOKEN_LIMIT=5000000
# Safety ceiling on per-call premium reservation, in micro-USD.
# stream_new_chat estimates an upper-bound cost from the model's
# litellm-published per-token rates × the config's quota_reserve_tokens
# and clamps to this value so a misconfigured model can't lock the
# user's whole balance on one call. Default $1.00.
QUOTA_MAX_RESERVE_MICROS=1000000
# Per-image reservation (in micro-USD) for the POST /image-generations
# endpoint. Bypassed for free configs. Default $0.05.
QUOTA_DEFAULT_IMAGE_RESERVE_MICROS=50000
# Per-podcast reservation (in micro-USD) used by the podcast Celery task.
# Single envelope covers one transcript-generation LLM call. Default $0.20.
QUOTA_DEFAULT_PODCAST_RESERVE_MICROS=200000
# Per-video-presentation reservation (in micro-USD) used by the video
# presentation Celery task. Covers worst-case fan-out of N slide-scene
# generations + refines. Default $1.00. NOTE: tasks using the override
# path bypass the QUOTA_MAX_RESERVE_MICROS clamp — raise with care.
QUOTA_DEFAULT_VIDEO_PRESENTATION_RESERVE_MICROS=1000000
# No-login (anonymous) mode — allows public users to chat without an account
# Set TRUE to enable /free pages and anonymous chat API