feat: Cost-aware Pro model selection — free Pro beats Flash, paid Pro loses to Flash
This commit is contained in:
44
ai/setup.py
44
ai/setup.py
@@ -34,9 +34,10 @@ def get_optimal_model(base_type="pro"):
|
||||
|
||||
def get_default_models():
|
||||
return {
|
||||
"logic": {"model": "models/gemini-2.0-flash", "reason": "Fallback: Gemini 2.0 Flash for cost-effective logic and JSON adherence.", "estimated_cost": "$0.10/1M"},
|
||||
"logic": {"model": "models/gemini-2.0-pro-exp", "reason": "Fallback: Gemini 2.0 Pro Exp (free) for cost-effective logic and JSON adherence.", "estimated_cost": "Free"},
|
||||
"writer": {"model": "models/gemini-2.0-flash", "reason": "Fallback: Gemini 2.0 Flash for fast, high-quality creative writing.", "estimated_cost": "$0.10/1M"},
|
||||
"artist": {"model": "models/gemini-2.0-flash", "reason": "Fallback: Gemini 2.0 Flash for visual prompt design.", "estimated_cost": "$0.10/1M"},
|
||||
"pro_rewrite": {"model": "models/gemini-2.0-pro-exp", "reason": "Fallback: Gemini 2.0 Pro Exp (free) for critical chapter rewrites.", "estimated_cost": "Free"},
|
||||
"ranking": []
|
||||
}
|
||||
|
||||
@@ -78,25 +79,28 @@ def select_best_models(force_refresh=False):
|
||||
AVAILABLE_MODELS:
|
||||
{json.dumps(compatible)}
|
||||
|
||||
PRICING_CONTEXT (USD per 1M tokens, approximate):
|
||||
- Gemini 2.5 Pro/Flash: Best quality/speed; check current pricing.
|
||||
- Gemini 2.0 Flash: ~$0.10 Input / $0.40 Output. (Fast, cost-effective, excellent quality).
|
||||
- Gemini 2.0 Pro Exp: Free experimental tier with strong reasoning.
|
||||
- Gemini 1.5 Flash: ~$0.075 Input / $0.30 Output. (Legacy, still reliable).
|
||||
- Gemini 1.5 Pro: ~$1.25 Input / $5.00 Output. (Legacy, expensive).
|
||||
PRICING_CONTEXT (USD per 1M tokens, approximate — TIER determines preference):
|
||||
- FREE TIER: Models with 'exp', 'beta', or 'preview' in name are typically free experimental tiers.
|
||||
e.g. gemini-2.0-pro-exp = FREE. Use these whenever they exist.
|
||||
- Gemini 2.5 Flash: ~$0.075 Input / $0.30 Output. Fast and very capable.
|
||||
- Gemini 2.0 Flash: ~$0.10 Input / $0.40 Output. Cost-effective, excellent quality.
|
||||
- Gemini 1.5 Flash: ~$0.075 Input / $0.30 Output. Legacy, still reliable.
|
||||
- Gemini 2.5 Pro (stable/latest): ~$1.25+ Input / $5.00+ Output. EXPENSIVE — avoid unless free/exp.
|
||||
- Gemini 1.5 Pro (stable): ~$1.25 Input / $5.00 Output. EXPENSIVE — avoid.
|
||||
|
||||
CRITERIA:
|
||||
- LOGIC: Needs JSON adherence, plot consistency, and instruction following. COST IS THE PRIORITY — use Flash unless no Flash 2.x exists.
|
||||
-> Prefer: Gemini 2.5 Flash > 2.0 Flash > 1.5 Flash > 2.5 Pro > 2.0 Pro > 1.5 Pro
|
||||
- WRITER: Needs creativity, prose quality, long-form text generation, and speed.
|
||||
-> Prefer: Gemini 2.5 Flash > 2.0 Flash > 1.5 Flash (balance quality/cost; avoid Pro)
|
||||
- ARTIST: Needs rich visual description, prompt understanding for cover art design.
|
||||
-> Prefer: Gemini 2.0 Flash > 1.5 Flash (speed and visual understanding)
|
||||
CRITERIA (cost is the primary constraint):
|
||||
- LOGIC: Needs JSON adherence, plot consistency, instruction following.
|
||||
-> Prefer by EFFECTIVE COST: Free/Exp Pro (e.g. 2.0-pro-exp) > Flash 2.5 > Flash 2.0 > Flash 1.5 > paid Pro (AVOID)
|
||||
-> Rule: A free Pro beats Flash. A paid Pro loses to any Flash.
|
||||
- WRITER: Needs creativity, prose quality, long-form text generation. Flash is sufficient for prose.
|
||||
-> Prefer: Flash 2.5 > Flash 2.0 > Flash 1.5. Do NOT use Pro — Flash quality is adequate for fiction writing.
|
||||
- ARTIST: Needs visual description and prompt quality for cover art design.
|
||||
-> Prefer: Flash 2.0 > Flash 1.5 (speed and visual understanding)
|
||||
|
||||
CONSTRAINTS:
|
||||
- Strongly prefer Flash over Pro for all roles — Pro models are expensive and only used selectively at runtime for critical rewrites.
|
||||
- A free/experimental Pro model is ALWAYS preferred over Flash for the Logic role.
|
||||
- Flash is ALWAYS preferred over a paid Pro model for ALL roles.
|
||||
- Strongly prefer Gemini 2.x over 1.5 where available.
|
||||
- Avoid 'experimental' or 'preview' only if a stable 2.x version exists; otherwise experimental 2.x is fine.
|
||||
- 'thinking' models are too slow/expensive for any role.
|
||||
- Provide a ranking of ALL available models from best to worst overall.
|
||||
|
||||
@@ -105,8 +109,12 @@ def select_best_models(force_refresh=False):
|
||||
"logic": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX/1M" }},
|
||||
"writer": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX/1M" }},
|
||||
"artist": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX/1M" }},
|
||||
"pro_rewrite": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX/1M" }},
|
||||
"ranking": [ {{ "model": "string", "reason": "string", "estimated_cost": "string" }} ]
|
||||
}}
|
||||
|
||||
NOTE: "pro_rewrite" is the best available Pro model for rare, critical chapter rewrites.
|
||||
Prefer free/experimental Pro (e.g. gemini-2.0-pro-exp) over paid Pro. If no Pro exists, use best Flash.
|
||||
"""
|
||||
|
||||
try:
|
||||
@@ -187,6 +195,7 @@ def init_models(force=False):
|
||||
logic_name, logic_cost = get_model_details(selected_models['logic'])
|
||||
writer_name, writer_cost = get_model_details(selected_models['writer'])
|
||||
artist_name, artist_cost = get_model_details(selected_models['artist'])
|
||||
pro_name, pro_cost = get_model_details(selected_models.get('pro_rewrite', {'model': 'models/gemini-2.0-pro-exp', 'estimated_cost': 'Free'}))
|
||||
|
||||
logic_name = logic_name if config.MODEL_LOGIC_HINT == "AUTO" else config.MODEL_LOGIC_HINT
|
||||
writer_name = writer_name if config.MODEL_WRITER_HINT == "AUTO" else config.MODEL_WRITER_HINT
|
||||
@@ -195,8 +204,9 @@ def init_models(force=False):
|
||||
models.logic_model_name = logic_name
|
||||
models.writer_model_name = writer_name
|
||||
models.artist_model_name = artist_name
|
||||
models.pro_model_name = pro_name
|
||||
|
||||
utils.log("SYSTEM", f"Models: Logic={logic_name} ({logic_cost}) | Writer={writer_name} ({writer_cost}) | Artist={artist_name}")
|
||||
utils.log("SYSTEM", f"Models: Logic={logic_name} ({logic_cost}) | Writer={writer_name} ({writer_cost}) | Artist={artist_name} | Pro-Rewrite={pro_name} ({pro_cost})")
|
||||
|
||||
utils.update_pricing(logic_name, logic_cost)
|
||||
utils.update_pricing(writer_name, writer_cost)
|
||||
|
||||
Reference in New Issue
Block a user