feat: Budget-aware model selection — book cost ceiling with per-role cost calculations
This commit is contained in:
92
ai/setup.py
92
ai/setup.py
@@ -74,47 +74,55 @@ def select_best_models(force_refresh=False):
|
|||||||
model = genai.GenerativeModel(bootstrapper)
|
model = genai.GenerativeModel(bootstrapper)
|
||||||
prompt = f"""
|
prompt = f"""
|
||||||
ROLE: AI Model Architect
|
ROLE: AI Model Architect
|
||||||
TASK: Select the optimal Gemini models for a book-writing application. Prefer newer Gemini 2.x models when available.
|
TASK: Select the optimal Gemini models for a book-writing application.
|
||||||
|
PRIMARY OBJECTIVE: Keep total book generation cost under $2.00. Quality is secondary to this budget.
|
||||||
|
|
||||||
AVAILABLE_MODELS:
|
AVAILABLE_MODELS:
|
||||||
{json.dumps(compatible)}
|
{json.dumps(compatible)}
|
||||||
|
|
||||||
PRICING_CONTEXT (USD per 1M tokens, approximate — TIER determines preference):
|
PRICING_CONTEXT (USD per 1M tokens — use these to calculate actual book cost):
|
||||||
- FREE TIER: Models with 'exp', 'beta', or 'preview' in name are typically free experimental tiers.
|
- FREE TIER: Any model with 'exp', 'beta', or 'preview' in name = $0.00. Always prefer these.
|
||||||
e.g. gemini-2.0-pro-exp = FREE. Use these whenever they exist.
|
e.g. gemini-2.0-pro-exp = FREE, gemini-2.5-pro-preview = FREE.
|
||||||
- Gemini 2.5 Flash: ~$0.075 Input / $0.30 Output. Fast and very capable.
|
- gemini-2.5-flash / gemini-2.5-flash-preview: ~$0.075 Input / $0.30 Output.
|
||||||
- Gemini 2.0 Flash: ~$0.10 Input / $0.40 Output. Cost-effective, excellent quality.
|
- gemini-2.0-flash: ~$0.10 Input / $0.40 Output.
|
||||||
- Gemini 1.5 Flash: ~$0.075 Input / $0.30 Output. Legacy, still reliable.
|
- gemini-1.5-flash: ~$0.075 Input / $0.30 Output.
|
||||||
- Gemini 2.5 Pro (stable/latest): ~$1.25+ Input / $5.00+ Output. EXPENSIVE — avoid unless free/exp.
|
- gemini-2.5-pro (stable, non-preview): ~$1.25 Input / $10.00 Output. BUDGET BREAKER.
|
||||||
- Gemini 1.5 Pro (stable): ~$1.25 Input / $5.00 Output. EXPENSIVE — avoid.
|
- gemini-1.5-pro (stable): ~$1.25 Input / $5.00 Output. BUDGET BREAKER.
|
||||||
|
|
||||||
CRITERIA (cost is the primary constraint):
|
BOOK TOKEN BUDGET (30-chapter novel — use this to calculate real cost before deciding):
|
||||||
- LOGIC: Needs JSON adherence, plot consistency, instruction following.
|
Logic role total: ~265,000 input tokens + ~55,000 output tokens
|
||||||
-> Prefer by EFFECTIVE COST: Free/Exp Pro (e.g. 2.0-pro-exp) > Flash 2.5 > Flash 2.0 > Flash 1.5 > paid Pro (AVOID)
|
(planning, state tracking, consistency checks, director treatments per chapter)
|
||||||
-> Rule: A free Pro beats Flash. A paid Pro loses to any Flash.
|
Writer role total: ~450,000 input tokens + ~135,000 output tokens
|
||||||
- WRITER: Needs creativity, prose quality, long-form text generation. Flash is sufficient for prose.
|
(drafting, evaluation, refinement per chapter — 2 passes max)
|
||||||
-> Prefer: Flash 2.5 > Flash 2.0 > Flash 1.5. Do NOT use Pro — Flash quality is adequate for fiction writing.
|
Artist role total: ~20,000 input tokens + ~5,000 output tokens (cover + marketing copy)
|
||||||
- ARTIST: Needs visual description and prompt quality for cover art design.
|
|
||||||
-> Prefer: Flash 2.0 > Flash 1.5 (speed and visual understanding)
|
|
||||||
|
|
||||||
CONSTRAINTS:
|
COST FORMULA: cost = (input_tokens / 1,000,000 * input_price) + (output_tokens / 1,000,000 * output_price)
|
||||||
- A free/experimental Pro model is ALWAYS preferred over Flash for the Logic role.
|
HARD BUDGET: Logic_cost + Writer_cost + Artist_cost must be < $2.00 total.
|
||||||
- Flash is ALWAYS preferred over a paid Pro model for ALL roles.
|
|
||||||
- Strongly prefer Gemini 2.x over 1.5 where available.
|
SELECTION RULES (apply in order):
|
||||||
- 'thinking' models are too slow/expensive for any role.
|
1. FREE FIRST: If a free/exp model exists (any tier, any quality), pick it for Logic. Cost = $0.
|
||||||
- Provide a ranking of ALL available models from best to worst overall.
|
2. FLASH FOR WRITER: Flash is sufficient for fiction prose. Never pick a paid Pro for Writer.
|
||||||
|
3. CALCULATE: For non-free models, compute the actual book cost using the token budget above.
|
||||||
|
Reject any combination that exceeds $2.00 total.
|
||||||
|
4. QUALITY TIEBREAK: Among models with similar cost, prefer newer generation (2.x > 1.5).
|
||||||
|
5. NO THINKING MODELS: Too slow and expensive for any role.
|
||||||
|
|
||||||
|
ROLES:
|
||||||
|
- LOGIC: Planning, JSON adherence, plot consistency. Free/exp Pro ideal; Flash acceptable.
|
||||||
|
- WRITER: Creative prose, chapter drafting. Flash 2.x is sufficient — do NOT use paid Pro.
|
||||||
|
- ARTIST: Visual prompts for cover art. Cheapest capable Flash model.
|
||||||
|
- PRO_REWRITE: Emergency full-chapter rewrite (rare, ~1-2x per book). Best free/exp Pro available.
|
||||||
|
If no free Pro exists, use best Flash — do not use paid Pro even here.
|
||||||
|
|
||||||
OUTPUT_FORMAT (JSON only, no markdown):
|
OUTPUT_FORMAT (JSON only, no markdown):
|
||||||
{{
|
{{
|
||||||
"logic": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX/1M" }},
|
"logic": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX/1M", "book_cost": "$X.XX" }},
|
||||||
"writer": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX/1M" }},
|
"writer": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX/1M", "book_cost": "$X.XX" }},
|
||||||
"artist": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX/1M" }},
|
"artist": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX/1M", "book_cost": "$X.XX" }},
|
||||||
"pro_rewrite": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX/1M" }},
|
"pro_rewrite": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX/1M", "book_cost": "$X.XX" }},
|
||||||
|
"total_estimated_book_cost": "$X.XX",
|
||||||
"ranking": [ {{ "model": "string", "reason": "string", "estimated_cost": "string" }} ]
|
"ranking": [ {{ "model": "string", "reason": "string", "estimated_cost": "string" }} ]
|
||||||
}}
|
}}
|
||||||
|
|
||||||
NOTE: "pro_rewrite" is the best available Pro model for rare, critical chapter rewrites.
|
|
||||||
Prefer free/experimental Pro (e.g. gemini-2.0-pro-exp) over paid Pro. If no Pro exists, use best Flash.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -182,20 +190,27 @@ def init_models(force=False):
|
|||||||
if not force:
|
if not force:
|
||||||
missing_costs = False
|
missing_costs = False
|
||||||
for role in ['logic', 'writer', 'artist']:
|
for role in ['logic', 'writer', 'artist']:
|
||||||
if 'estimated_cost' not in selected_models.get(role, {}) or selected_models[role].get('estimated_cost') == 'N/A':
|
role_data = selected_models.get(role, {})
|
||||||
|
if 'estimated_cost' not in role_data or role_data.get('estimated_cost') == 'N/A':
|
||||||
missing_costs = True
|
missing_costs = True
|
||||||
|
if 'book_cost' not in role_data:
|
||||||
|
missing_costs = True
|
||||||
|
if 'total_estimated_book_cost' not in selected_models:
|
||||||
|
missing_costs = True
|
||||||
if missing_costs:
|
if missing_costs:
|
||||||
utils.log("SYSTEM", "⚠️ Missing cost info in cached models. Forcing refresh.")
|
utils.log("SYSTEM", "⚠️ Missing cost info in cached models. Forcing refresh.")
|
||||||
return init_models(force=True)
|
return init_models(force=True)
|
||||||
|
|
||||||
def get_model_details(role_data):
|
def get_model_details(role_data):
|
||||||
if isinstance(role_data, dict): return role_data.get('model'), role_data.get('estimated_cost', 'N/A')
|
if isinstance(role_data, dict):
|
||||||
return role_data, 'N/A'
|
return role_data.get('model'), role_data.get('estimated_cost', 'N/A'), role_data.get('book_cost', 'N/A')
|
||||||
|
return role_data, 'N/A', 'N/A'
|
||||||
|
|
||||||
logic_name, logic_cost = get_model_details(selected_models['logic'])
|
logic_name, logic_cost, logic_book = get_model_details(selected_models['logic'])
|
||||||
writer_name, writer_cost = get_model_details(selected_models['writer'])
|
writer_name, writer_cost, writer_book = get_model_details(selected_models['writer'])
|
||||||
artist_name, artist_cost = get_model_details(selected_models['artist'])
|
artist_name, artist_cost, artist_book = get_model_details(selected_models['artist'])
|
||||||
pro_name, pro_cost = get_model_details(selected_models.get('pro_rewrite', {'model': 'models/gemini-2.0-pro-exp', 'estimated_cost': 'Free'}))
|
pro_name, pro_cost, _ = get_model_details(selected_models.get('pro_rewrite', {'model': 'models/gemini-2.0-pro-exp', 'estimated_cost': 'Free', 'book_cost': '$0.00'}))
|
||||||
|
total_book_cost = selected_models.get('total_estimated_book_cost', 'N/A')
|
||||||
|
|
||||||
logic_name = logic_name if config.MODEL_LOGIC_HINT == "AUTO" else config.MODEL_LOGIC_HINT
|
logic_name = logic_name if config.MODEL_LOGIC_HINT == "AUTO" else config.MODEL_LOGIC_HINT
|
||||||
writer_name = writer_name if config.MODEL_WRITER_HINT == "AUTO" else config.MODEL_WRITER_HINT
|
writer_name = writer_name if config.MODEL_WRITER_HINT == "AUTO" else config.MODEL_WRITER_HINT
|
||||||
@@ -206,7 +221,8 @@ def init_models(force=False):
|
|||||||
models.artist_model_name = artist_name
|
models.artist_model_name = artist_name
|
||||||
models.pro_model_name = pro_name
|
models.pro_model_name = pro_name
|
||||||
|
|
||||||
utils.log("SYSTEM", f"Models: Logic={logic_name} ({logic_cost}) | Writer={writer_name} ({writer_cost}) | Artist={artist_name} | Pro-Rewrite={pro_name} ({pro_cost})")
|
utils.log("SYSTEM", f"Models: Logic={logic_name} ({logic_cost}, {logic_book}/book) | Writer={writer_name} ({writer_cost}, {writer_book}/book) | Artist={artist_name} | Pro-Rewrite={pro_name} ({pro_cost})")
|
||||||
|
utils.log("SYSTEM", f"💰 Estimated book generation cost: {total_book_cost} (budget: $2.00)")
|
||||||
|
|
||||||
utils.update_pricing(logic_name, logic_cost)
|
utils.update_pricing(logic_name, logic_cost)
|
||||||
utils.update_pricing(writer_name, writer_cost)
|
utils.update_pricing(writer_name, writer_cost)
|
||||||
|
|||||||
Reference in New Issue
Block a user