v1.2.0: Prefer Gemini 2.x models, improve cover generation and Docker health

Model selection (ai.py): - get_optimal_model() now scores Gemini 2.5 > 2.0 > 1.5 when ranking candidates - get_default_models() fallbacks updated to gemini-2.0-pro-exp (logic) and gemini-2.0-flash (writer/artist) - AI selection prompt rewritten: includes Gemini 2.x pricing context, guidance to avoid 'thinking' models for writer/artist roles, and instructions to prefer 2.x over 1.5 - Added image_model_name and image_model_source globals for UI visibility - init_models() now reads MODEL_IMAGE_HINT; tries imagen-3.0-generate-001 then imagen-3.0-fast-generate-001 on both Gemini API and Vertex AI paths Cover generation (marketing.py): - Fixed display bug: "Attempt X/5" now correctly reads "Attempt X/3" - Added imagen-3.0-fast-generate-001 as intermediate fallback before legacy Imagen 2 - Quality threshold: images with score < 5 are only kept if nothing better exists - Smarter prompt refinement on retry: deformity, blur, and watermark critique keywords each append targeted corrections to the art prompt - Fixed missing sys import (sys.platform check for macOS was silently broken) Config / Docker: - config.py: added MODEL_IMAGE_HINT env var, bumped version to 1.2.0 - docker-compose.yml: added MODEL_IMAGE environment variable - Dockerfile: added libpng-dev and libfreetype6-dev for better font/PNG rendering; added HEALTHCHECK so Portainer detects unhealthy containers System status UI: - system_status.html: added Image row showing active Imagen model and provider (Gemini API / Vertex AI) - Added cache expiry countdown with colour-coded badges Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-20 10:31:02 -05:00
parent 5e0def99c1
commit 2a9a605800
7 changed files with 171 additions and 70 deletions
--- a/modules/ai.py
+++ b/modules/ai.py
@@ -31,6 +31,8 @@ model_image = None
 logic_model_name = "models/gemini-1.5-pro"
 writer_model_name = "models/gemini-1.5-flash"
 artist_model_name = "models/gemini-1.5-flash"
+image_model_name = None
+image_model_source = "None"

 class ResilientModel:
    def __init__(self, name, safety_settings, role):
@@ -75,10 +77,15 @@ def get_optimal_model(base_type="pro"):
        candidates = [m.name for m in models if base_type in m.name]
        if not candidates: return f"models/gemini-1.5-{base_type}"
        def score(n):
-            # Prioritize stable models (higher quotas) over experimental/beta ones
-            if "exp" in n or "beta" in n or "preview" in n: return 0
-            if "latest" in n: return 50
-            return 100
+            # Prefer newer generations: 2.5 > 2.0 > 1.5
+            gen_bonus = 0
+            if "2.5" in n: gen_bonus = 300
+            elif "2.0" in n: gen_bonus = 200
+            elif "2." in n: gen_bonus = 150
+            # Within a generation, prefer stable over experimental
+            if "exp" in n or "beta" in n or "preview" in n: return gen_bonus + 0
+            if "latest" in n: return gen_bonus + 50
+            return gen_bonus + 100
        return sorted(candidates, key=score, reverse=True)[0]
    except Exception as e:
        utils.log("SYSTEM", f"⚠️ Error finding optimal model: {e}")
@@ -86,9 +93,9 @@ def get_optimal_model(base_type="pro"):

 def get_default_models():
    return {
-        "logic": {"model": "models/gemini-1.5-pro", "reason": "Fallback: Default Pro model selected.", "estimated_cost": "$3.50/1M"},
-        "writer": {"model": "models/gemini-1.5-flash", "reason": "Fallback: Default Flash model selected.", "estimated_cost": "$0.075/1M"},
-        "artist": {"model": "models/gemini-1.5-flash", "reason": "Fallback: Default Flash model selected.", "estimated_cost": "$0.075/1M"},
+        "logic": {"model": "models/gemini-2.0-pro-exp", "reason": "Fallback: Gemini 2.0 Pro for complex reasoning and JSON adherence.", "estimated_cost": "$0.00/1M (Experimental)"},
+        "writer": {"model": "models/gemini-2.0-flash", "reason": "Fallback: Gemini 2.0 Flash for fast, high-quality creative writing.", "estimated_cost": "$0.10/1M"},
+        "artist": {"model": "models/gemini-2.0-flash", "reason": "Fallback: Gemini 2.0 Flash for visual prompt design.", "estimated_cost": "$0.10/1M"},
        "ranking": []
    }

@@ -131,29 +138,37 @@ def select_best_models(force_refresh=False):
        model = genai.GenerativeModel(bootstrapper)
        prompt = f"""
        ROLE: AI Model Architect
-        TASK: Select the optimal Gemini models for specific application roles.
-        
+        TASK: Select the optimal Gemini models for a book-writing application. Prefer newer Gemini 2.x models when available.
+
        AVAILABLE_MODELS:
        {json.dumps(models)}
-        
-        PRICING_CONTEXT (USD per 1M tokens):
-        - Flash Models (e.g. gemini-1.5-flash): ~$0.075 Input / $0.30 Output. (Very Cheap)
-        - Pro Models (e.g. gemini-1.5-pro): ~$3.50 Input / $10.50 Output. (Expensive)
-        
+
+        PRICING_CONTEXT (USD per 1M tokens, approximate):
+        - Gemini 2.5 Pro/Flash: Best quality/speed; check current pricing.
+        - Gemini 2.0 Flash: ~$0.10 Input / $0.40 Output. (Fast, cost-effective, excellent quality).
+        - Gemini 2.0 Pro Exp: Free experimental tier with strong reasoning.
+        - Gemini 1.5 Flash: ~$0.075 Input / $0.30 Output. (Legacy, still reliable).
+        - Gemini 1.5 Pro: ~$1.25 Input / $5.00 Output. (Legacy, expensive).
+
        CRITERIA:
-        - LOGIC: Needs complex reasoning, JSON adherence, and instruction following. (Prefer Pro/1.5).
-        - WRITER: Needs creativity, prose quality, and speed. (Prefer Flash/1.5 for speed, or Pro for quality).
-        - ARTIST: Needs visual prompt understanding.
-        
+        - LOGIC: Needs complex reasoning, strict JSON adherence, plot consistency, and instruction following.
+          -> Prefer: Gemini 2.5 Pro > 2.0 Pro > 2.0 Flash > 1.5 Pro
+        - WRITER: Needs creativity, prose quality, long-form text generation, and speed.
+          -> Prefer: Gemini 2.5 Flash/Pro > 2.0 Flash > 1.5 Flash (balance quality/cost)
+        - ARTIST: Needs rich visual description, prompt understanding for cover art design.
+          -> Prefer: Gemini 2.0 Flash > 1.5 Flash (speed and visual understanding)
+
        CONSTRAINTS:
-        - Avoid 'experimental' or 'preview' unless no stable version exists.
-        - Prioritize 'latest' or stable versions.
-        
-        OUTPUT_FORMAT (JSON):
+        - Strongly prefer Gemini 2.x over 1.5 where available.
+        - Avoid 'experimental' or 'preview' only if a stable 2.x version exists; otherwise experimental 2.x is fine.
+        - 'thinking' models are too slow/expensive for Writer/Artist roles.
+        - Provide a ranking of ALL available models from best to worst overall.
+
+        OUTPUT_FORMAT (JSON only, no markdown):
        {{
-            "logic": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX Input / $X.XX Output" }},
-            "writer": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX Input / $X.XX Output" }},
-            "artist": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX Input / $X.XX Output" }},
+            "logic": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX/1M" }},
+            "writer": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX/1M" }},
+            "artist": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX/1M" }},
            "ranking": [ {{ "model": "string", "reason": "string", "estimated_cost": "string" }} ]
        }}
        """
@@ -195,7 +210,7 @@ def select_best_models(force_refresh=False):
        return fallback

 def init_models(force=False):
-    global model_logic, model_writer, model_artist, model_image, logic_model_name, writer_model_name, artist_model_name
+    global model_logic, model_writer, model_artist, model_image, logic_model_name, writer_model_name, artist_model_name, image_model_name, image_model_source
    if model_logic and not force: return
    genai.configure(api_key=config.API_KEY)
    
@@ -264,13 +279,28 @@ def init_models(force=False):
        model_writer.update(writer_name)
        model_artist.update(artist_name)
    
-    # Initialize Image Model (Default to None)
+    # Initialize Image Model
    model_image = None
+    image_model_name = None
+    image_model_source = "None"
+
+    hint = config.MODEL_IMAGE_HINT if hasattr(config, 'MODEL_IMAGE_HINT') else "AUTO"
+
    if hasattr(genai, 'ImageGenerationModel'):
-        try: model_image = genai.ImageGenerationModel("imagen-3.0-generate-001")
-        except: pass
-    
-    img_source = "Gemini API" if model_image else "None"
+        # Candidate image models in preference order
+        if hint and hint != "AUTO":
+            candidates = [hint]
+        else:
+            candidates = ["imagen-3.0-generate-001", "imagen-3.0-fast-generate-001"]
+        for candidate in candidates:
+            try:
+                model_image = genai.ImageGenerationModel(candidate)
+                image_model_name = candidate
+                image_model_source = "Gemini API"
+                utils.log("SYSTEM", f"✅ Image model: {candidate} (Gemini API)")
+                break
+            except Exception:
+                continue

    # Auto-detect GCP Project from credentials if not set (Fix for Image Model)
    if HAS_VERTEX and not config.GCP_PROJECT and config.GOOGLE_CREDS and os.path.exists(config.GOOGLE_CREDS):
@@ -326,9 +356,17 @@ def init_models(force=False):
        utils.log("SYSTEM", f"✅ Vertex AI initialized (Project: {config.GCP_PROJECT})")
        
        # Override with Vertex Image Model if available
-        try:
-            model_image = VertexImageModel.from_pretrained("imagen-3.0-generate-001")
-            img_source = "Vertex AI"
-        except: pass
-        
-    utils.log("SYSTEM", f"Image Generation Provider: {img_source}")
+        vertex_candidates = ["imagen-3.0-generate-001", "imagen-3.0-fast-generate-001"]
+        if hint and hint != "AUTO":
+            vertex_candidates = [hint]
+        for candidate in vertex_candidates:
+            try:
+                model_image = VertexImageModel.from_pretrained(candidate)
+                image_model_name = candidate
+                image_model_source = "Vertex AI"
+                utils.log("SYSTEM", f"✅ Image model: {candidate} (Vertex AI)")
+                break
+            except Exception:
+                continue
+
+    utils.log("SYSTEM", f"Image Generation Provider: {image_model_source} ({image_model_name or 'unavailable'})")