feat: Prefer Flash models in auto-selection criteria for cost reduction
This commit is contained in:
11
ai/setup.py
11
ai/setup.py
@@ -34,7 +34,7 @@ def get_optimal_model(base_type="pro"):
|
|||||||
|
|
||||||
def get_default_models():
|
def get_default_models():
|
||||||
return {
|
return {
|
||||||
"logic": {"model": "models/gemini-2.0-pro-exp", "reason": "Fallback: Gemini 2.0 Pro for complex reasoning and JSON adherence.", "estimated_cost": "$0.00/1M (Experimental)"},
|
"logic": {"model": "models/gemini-2.0-flash", "reason": "Fallback: Gemini 2.0 Flash for cost-effective logic and JSON adherence.", "estimated_cost": "$0.10/1M"},
|
||||||
"writer": {"model": "models/gemini-2.0-flash", "reason": "Fallback: Gemini 2.0 Flash for fast, high-quality creative writing.", "estimated_cost": "$0.10/1M"},
|
"writer": {"model": "models/gemini-2.0-flash", "reason": "Fallback: Gemini 2.0 Flash for fast, high-quality creative writing.", "estimated_cost": "$0.10/1M"},
|
||||||
"artist": {"model": "models/gemini-2.0-flash", "reason": "Fallback: Gemini 2.0 Flash for visual prompt design.", "estimated_cost": "$0.10/1M"},
|
"artist": {"model": "models/gemini-2.0-flash", "reason": "Fallback: Gemini 2.0 Flash for visual prompt design.", "estimated_cost": "$0.10/1M"},
|
||||||
"ranking": []
|
"ranking": []
|
||||||
@@ -86,17 +86,18 @@ def select_best_models(force_refresh=False):
|
|||||||
- Gemini 1.5 Pro: ~$1.25 Input / $5.00 Output. (Legacy, expensive).
|
- Gemini 1.5 Pro: ~$1.25 Input / $5.00 Output. (Legacy, expensive).
|
||||||
|
|
||||||
CRITERIA:
|
CRITERIA:
|
||||||
- LOGIC: Needs complex reasoning, strict JSON adherence, plot consistency, and instruction following.
|
- LOGIC: Needs JSON adherence, plot consistency, and instruction following. COST IS THE PRIORITY — use Flash unless no Flash 2.x exists.
|
||||||
-> Prefer: Gemini 2.5 Pro > 2.0 Pro > 2.0 Flash > 1.5 Pro
|
-> Prefer: Gemini 2.5 Flash > 2.0 Flash > 1.5 Flash > 2.5 Pro > 2.0 Pro > 1.5 Pro
|
||||||
- WRITER: Needs creativity, prose quality, long-form text generation, and speed.
|
- WRITER: Needs creativity, prose quality, long-form text generation, and speed.
|
||||||
-> Prefer: Gemini 2.5 Flash/Pro > 2.0 Flash > 1.5 Flash (balance quality/cost)
|
-> Prefer: Gemini 2.5 Flash > 2.0 Flash > 1.5 Flash (balance quality/cost; avoid Pro)
|
||||||
- ARTIST: Needs rich visual description, prompt understanding for cover art design.
|
- ARTIST: Needs rich visual description, prompt understanding for cover art design.
|
||||||
-> Prefer: Gemini 2.0 Flash > 1.5 Flash (speed and visual understanding)
|
-> Prefer: Gemini 2.0 Flash > 1.5 Flash (speed and visual understanding)
|
||||||
|
|
||||||
CONSTRAINTS:
|
CONSTRAINTS:
|
||||||
|
- Strongly prefer Flash over Pro for all roles — Pro models are expensive and only used selectively at runtime for critical rewrites.
|
||||||
- Strongly prefer Gemini 2.x over 1.5 where available.
|
- Strongly prefer Gemini 2.x over 1.5 where available.
|
||||||
- Avoid 'experimental' or 'preview' only if a stable 2.x version exists; otherwise experimental 2.x is fine.
|
- Avoid 'experimental' or 'preview' only if a stable 2.x version exists; otherwise experimental 2.x is fine.
|
||||||
- 'thinking' models are too slow/expensive for Writer/Artist roles.
|
- 'thinking' models are too slow/expensive for any role.
|
||||||
- Provide a ranking of ALL available models from best to worst overall.
|
- Provide a ranking of ALL available models from best to worst overall.
|
||||||
|
|
||||||
OUTPUT_FORMAT (JSON only, no markdown):
|
OUTPUT_FORMAT (JSON only, no markdown):
|
||||||
|
|||||||
Reference in New Issue
Block a user