From c3724a6761b2c9904b649dbc2073c72f6065d324 Mon Sep 17 00:00:00 2001 From: Mike Wichers Date: Sun, 22 Feb 2026 10:38:57 -0500 Subject: [PATCH] =?UTF-8?q?feat:=20Cost-aware=20Pro=20model=20selection=20?= =?UTF-8?q?=E2=80=94=20free=20Pro=20beats=20Flash,=20paid=20Pro=20loses=20?= =?UTF-8?q?to=20Flash?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ai/models.py | 1 + ai/setup.py | 44 +++++++++++++++++++++++++++----------------- story/writer.py | 7 ++++--- 3 files changed, 32 insertions(+), 20 deletions(-) diff --git a/ai/models.py b/ai/models.py index 0333b23..1ef58c6 100644 --- a/ai/models.py +++ b/ai/models.py @@ -30,6 +30,7 @@ model_image = None logic_model_name = "models/gemini-1.5-flash" writer_model_name = "models/gemini-1.5-flash" artist_model_name = "models/gemini-1.5-flash" +pro_model_name = "models/gemini-2.0-pro-exp" # Best available Pro for critical rewrites (prefer free/exp) image_model_name = None image_model_source = "None" diff --git a/ai/setup.py b/ai/setup.py index e30a0f6..04ac4ec 100644 --- a/ai/setup.py +++ b/ai/setup.py @@ -34,9 +34,10 @@ def get_optimal_model(base_type="pro"): def get_default_models(): return { - "logic": {"model": "models/gemini-2.0-flash", "reason": "Fallback: Gemini 2.0 Flash for cost-effective logic and JSON adherence.", "estimated_cost": "$0.10/1M"}, + "logic": {"model": "models/gemini-2.0-pro-exp", "reason": "Fallback: Gemini 2.0 Pro Exp (free) for cost-effective logic and JSON adherence.", "estimated_cost": "Free"}, "writer": {"model": "models/gemini-2.0-flash", "reason": "Fallback: Gemini 2.0 Flash for fast, high-quality creative writing.", "estimated_cost": "$0.10/1M"}, "artist": {"model": "models/gemini-2.0-flash", "reason": "Fallback: Gemini 2.0 Flash for visual prompt design.", "estimated_cost": "$0.10/1M"}, + "pro_rewrite": {"model": "models/gemini-2.0-pro-exp", "reason": "Fallback: Gemini 2.0 Pro Exp (free) for critical chapter rewrites.", "estimated_cost": "Free"}, "ranking": [] } @@ -78,25 +79,28 @@ def select_best_models(force_refresh=False): AVAILABLE_MODELS: {json.dumps(compatible)} - PRICING_CONTEXT (USD per 1M tokens, approximate): - - Gemini 2.5 Pro/Flash: Best quality/speed; check current pricing. - - Gemini 2.0 Flash: ~$0.10 Input / $0.40 Output. (Fast, cost-effective, excellent quality). - - Gemini 2.0 Pro Exp: Free experimental tier with strong reasoning. - - Gemini 1.5 Flash: ~$0.075 Input / $0.30 Output. (Legacy, still reliable). - - Gemini 1.5 Pro: ~$1.25 Input / $5.00 Output. (Legacy, expensive). + PRICING_CONTEXT (USD per 1M tokens, approximate — TIER determines preference): + - FREE TIER: Models with 'exp', 'beta', or 'preview' in name are typically free experimental tiers. + e.g. gemini-2.0-pro-exp = FREE. Use these whenever they exist. + - Gemini 2.5 Flash: ~$0.075 Input / $0.30 Output. Fast and very capable. + - Gemini 2.0 Flash: ~$0.10 Input / $0.40 Output. Cost-effective, excellent quality. + - Gemini 1.5 Flash: ~$0.075 Input / $0.30 Output. Legacy, still reliable. + - Gemini 2.5 Pro (stable/latest): ~$1.25+ Input / $5.00+ Output. EXPENSIVE — avoid unless free/exp. + - Gemini 1.5 Pro (stable): ~$1.25 Input / $5.00 Output. EXPENSIVE — avoid. - CRITERIA: - - LOGIC: Needs JSON adherence, plot consistency, and instruction following. COST IS THE PRIORITY — use Flash unless no Flash 2.x exists. - -> Prefer: Gemini 2.5 Flash > 2.0 Flash > 1.5 Flash > 2.5 Pro > 2.0 Pro > 1.5 Pro - - WRITER: Needs creativity, prose quality, long-form text generation, and speed. - -> Prefer: Gemini 2.5 Flash > 2.0 Flash > 1.5 Flash (balance quality/cost; avoid Pro) - - ARTIST: Needs rich visual description, prompt understanding for cover art design. - -> Prefer: Gemini 2.0 Flash > 1.5 Flash (speed and visual understanding) + CRITERIA (cost is the primary constraint): + - LOGIC: Needs JSON adherence, plot consistency, instruction following. + -> Prefer by EFFECTIVE COST: Free/Exp Pro (e.g. 2.0-pro-exp) > Flash 2.5 > Flash 2.0 > Flash 1.5 > paid Pro (AVOID) + -> Rule: A free Pro beats Flash. A paid Pro loses to any Flash. + - WRITER: Needs creativity, prose quality, long-form text generation. Flash is sufficient for prose. + -> Prefer: Flash 2.5 > Flash 2.0 > Flash 1.5. Do NOT use Pro — Flash quality is adequate for fiction writing. + - ARTIST: Needs visual description and prompt quality for cover art design. + -> Prefer: Flash 2.0 > Flash 1.5 (speed and visual understanding) CONSTRAINTS: - - Strongly prefer Flash over Pro for all roles — Pro models are expensive and only used selectively at runtime for critical rewrites. + - A free/experimental Pro model is ALWAYS preferred over Flash for the Logic role. + - Flash is ALWAYS preferred over a paid Pro model for ALL roles. - Strongly prefer Gemini 2.x over 1.5 where available. - - Avoid 'experimental' or 'preview' only if a stable 2.x version exists; otherwise experimental 2.x is fine. - 'thinking' models are too slow/expensive for any role. - Provide a ranking of ALL available models from best to worst overall. @@ -105,8 +109,12 @@ def select_best_models(force_refresh=False): "logic": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX/1M" }}, "writer": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX/1M" }}, "artist": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX/1M" }}, + "pro_rewrite": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX/1M" }}, "ranking": [ {{ "model": "string", "reason": "string", "estimated_cost": "string" }} ] }} + + NOTE: "pro_rewrite" is the best available Pro model for rare, critical chapter rewrites. + Prefer free/experimental Pro (e.g. gemini-2.0-pro-exp) over paid Pro. If no Pro exists, use best Flash. """ try: @@ -187,6 +195,7 @@ def init_models(force=False): logic_name, logic_cost = get_model_details(selected_models['logic']) writer_name, writer_cost = get_model_details(selected_models['writer']) artist_name, artist_cost = get_model_details(selected_models['artist']) + pro_name, pro_cost = get_model_details(selected_models.get('pro_rewrite', {'model': 'models/gemini-2.0-pro-exp', 'estimated_cost': 'Free'})) logic_name = logic_name if config.MODEL_LOGIC_HINT == "AUTO" else config.MODEL_LOGIC_HINT writer_name = writer_name if config.MODEL_WRITER_HINT == "AUTO" else config.MODEL_WRITER_HINT @@ -195,8 +204,9 @@ def init_models(force=False): models.logic_model_name = logic_name models.writer_model_name = writer_name models.artist_model_name = artist_name + models.pro_model_name = pro_name - utils.log("SYSTEM", f"Models: Logic={logic_name} ({logic_cost}) | Writer={writer_name} ({writer_cost}) | Artist={artist_name}") + utils.log("SYSTEM", f"Models: Logic={logic_name} ({logic_cost}) | Writer={writer_name} ({writer_cost}) | Artist={artist_name} | Pro-Rewrite={pro_name} ({pro_cost})") utils.update_pricing(logic_name, logic_cost) utils.update_pricing(writer_name, writer_cost) diff --git a/story/writer.py b/story/writer.py index 170edd6..261230d 100644 --- a/story/writer.py +++ b/story/writer.py @@ -378,14 +378,15 @@ def write_chapter(chap, bp, folder, prev_sum, tracking=None, prev_content=None, """ try: - ai_models.model_logic.update("models/gemini-1.5-pro") + _pro = getattr(ai_models, 'pro_model_name', 'models/gemini-2.0-pro-exp') + ai_models.model_logic.update(_pro) resp_rewrite = ai_models.model_logic.generate_content(full_rewrite_prompt) utils.log_usage(folder, ai_models.model_logic.name, resp_rewrite.usage_metadata) current_text = resp_rewrite.text - ai_models.model_logic.update("models/gemini-1.5-flash") + ai_models.model_logic.update(ai_models.logic_model_name) continue except Exception as e: - ai_models.model_logic.update("models/gemini-1.5-flash") + ai_models.model_logic.update(ai_models.logic_model_name) utils.log("WRITER", f"Full rewrite failed: {e}. Falling back to refinement.") utils.log("WRITER", f" -> Refining Ch {chap['chapter_number']} based on feedback...")