v1.2.0: Prefer Gemini 2.x models, improve cover generation and Docker health

Model selection (ai.py):
- get_optimal_model() now scores Gemini 2.5 > 2.0 > 1.5 when ranking candidates
- get_default_models() fallbacks updated to gemini-2.0-pro-exp (logic) and gemini-2.0-flash (writer/artist)
- AI selection prompt rewritten: includes Gemini 2.x pricing context, guidance to avoid 'thinking' models for writer/artist roles, and instructions to prefer 2.x over 1.5
- Added image_model_name and image_model_source globals for UI visibility
- init_models() now reads MODEL_IMAGE_HINT; tries imagen-3.0-generate-001 then imagen-3.0-fast-generate-001 on both Gemini API and Vertex AI paths

Cover generation (marketing.py):
- Fixed display bug: "Attempt X/5" now correctly reads "Attempt X/3"
- Added imagen-3.0-fast-generate-001 as intermediate fallback before legacy Imagen 2
- Quality threshold: images with score < 5 are only kept if nothing better exists
- Smarter prompt refinement on retry: deformity, blur, and watermark critique keywords each append targeted corrections to the art prompt
- Fixed missing sys import (sys.platform check for macOS was silently broken)

Config / Docker:
- config.py: added MODEL_IMAGE_HINT env var, bumped version to 1.2.0
- docker-compose.yml: added MODEL_IMAGE environment variable
- Dockerfile: added libpng-dev and libfreetype6-dev for better font/PNG rendering; added HEALTHCHECK so Portainer detects unhealthy containers

System status UI:
- system_status.html: added Image row showing active Imagen model and provider (Gemini API / Vertex AI)
- Added cache expiry countdown with colour-coded badges

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-20 10:31:02 -05:00
parent 5e0def99c1
commit 2a9a605800
7 changed files with 171 additions and 70 deletions

View File

@@ -3,11 +3,13 @@ FROM python:3.11-slim
# Set working directory # Set working directory
WORKDIR /app WORKDIR /app
# Install system dependencies required for Pillow (image processing) # Install system dependencies required for Pillow (image processing) and fonts
RUN apt-get update && apt-get install -y \ RUN apt-get update && apt-get install -y \
build-essential \ build-essential \
libjpeg-dev \ libjpeg-dev \
zlib1g-dev \ zlib1g-dev \
libpng-dev \
libfreetype6-dev \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
# Copy requirements files # Copy requirements files
@@ -24,4 +26,6 @@ COPY . .
# Set Python path and run # Set Python path and run
ENV PYTHONPATH=/app ENV PYTHONPATH=/app
EXPOSE 5000 EXPOSE 5000
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:5000/login')" || exit 1
CMD ["python", "-m", "modules.web_app"] CMD ["python", "-m", "modules.web_app"]

View File

@@ -14,6 +14,7 @@ GCP_LOCATION = get_clean_env("GCP_LOCATION", "us-central1")
MODEL_LOGIC_HINT = get_clean_env("MODEL_LOGIC", "AUTO") MODEL_LOGIC_HINT = get_clean_env("MODEL_LOGIC", "AUTO")
MODEL_WRITER_HINT = get_clean_env("MODEL_WRITER", "AUTO") MODEL_WRITER_HINT = get_clean_env("MODEL_WRITER", "AUTO")
MODEL_ARTIST_HINT = get_clean_env("MODEL_ARTIST", "AUTO") MODEL_ARTIST_HINT = get_clean_env("MODEL_ARTIST", "AUTO")
MODEL_IMAGE_HINT = get_clean_env("MODEL_IMAGE", "AUTO")
DEFAULT_BLUEPRINT = "book_def.json" DEFAULT_BLUEPRINT = "book_def.json"
# --- SECURITY & ADMIN --- # --- SECURITY & ADMIN ---
@@ -64,4 +65,4 @@ LENGTH_DEFINITIONS = {
} }
# --- SYSTEM --- # --- SYSTEM ---
VERSION = "1.1.0" VERSION = "1.2.0"

View File

@@ -37,3 +37,4 @@ services:
- MODEL_LOGIC=${MODEL_LOGIC:-AUTO} - MODEL_LOGIC=${MODEL_LOGIC:-AUTO}
- MODEL_WRITER=${MODEL_WRITER:-AUTO} - MODEL_WRITER=${MODEL_WRITER:-AUTO}
- MODEL_ARTIST=${MODEL_ARTIST:-AUTO} - MODEL_ARTIST=${MODEL_ARTIST:-AUTO}
- MODEL_IMAGE=${MODEL_IMAGE:-AUTO}

View File

@@ -31,6 +31,8 @@ model_image = None
logic_model_name = "models/gemini-1.5-pro" logic_model_name = "models/gemini-1.5-pro"
writer_model_name = "models/gemini-1.5-flash" writer_model_name = "models/gemini-1.5-flash"
artist_model_name = "models/gemini-1.5-flash" artist_model_name = "models/gemini-1.5-flash"
image_model_name = None
image_model_source = "None"
class ResilientModel: class ResilientModel:
def __init__(self, name, safety_settings, role): def __init__(self, name, safety_settings, role):
@@ -75,10 +77,15 @@ def get_optimal_model(base_type="pro"):
candidates = [m.name for m in models if base_type in m.name] candidates = [m.name for m in models if base_type in m.name]
if not candidates: return f"models/gemini-1.5-{base_type}" if not candidates: return f"models/gemini-1.5-{base_type}"
def score(n): def score(n):
# Prioritize stable models (higher quotas) over experimental/beta ones # Prefer newer generations: 2.5 > 2.0 > 1.5
if "exp" in n or "beta" in n or "preview" in n: return 0 gen_bonus = 0
if "latest" in n: return 50 if "2.5" in n: gen_bonus = 300
return 100 elif "2.0" in n: gen_bonus = 200
elif "2." in n: gen_bonus = 150
# Within a generation, prefer stable over experimental
if "exp" in n or "beta" in n or "preview" in n: return gen_bonus + 0
if "latest" in n: return gen_bonus + 50
return gen_bonus + 100
return sorted(candidates, key=score, reverse=True)[0] return sorted(candidates, key=score, reverse=True)[0]
except Exception as e: except Exception as e:
utils.log("SYSTEM", f"⚠️ Error finding optimal model: {e}") utils.log("SYSTEM", f"⚠️ Error finding optimal model: {e}")
@@ -86,9 +93,9 @@ def get_optimal_model(base_type="pro"):
def get_default_models(): def get_default_models():
return { return {
"logic": {"model": "models/gemini-1.5-pro", "reason": "Fallback: Default Pro model selected.", "estimated_cost": "$3.50/1M"}, "logic": {"model": "models/gemini-2.0-pro-exp", "reason": "Fallback: Gemini 2.0 Pro for complex reasoning and JSON adherence.", "estimated_cost": "$0.00/1M (Experimental)"},
"writer": {"model": "models/gemini-1.5-flash", "reason": "Fallback: Default Flash model selected.", "estimated_cost": "$0.075/1M"}, "writer": {"model": "models/gemini-2.0-flash", "reason": "Fallback: Gemini 2.0 Flash for fast, high-quality creative writing.", "estimated_cost": "$0.10/1M"},
"artist": {"model": "models/gemini-1.5-flash", "reason": "Fallback: Default Flash model selected.", "estimated_cost": "$0.075/1M"}, "artist": {"model": "models/gemini-2.0-flash", "reason": "Fallback: Gemini 2.0 Flash for visual prompt design.", "estimated_cost": "$0.10/1M"},
"ranking": [] "ranking": []
} }
@@ -131,29 +138,37 @@ def select_best_models(force_refresh=False):
model = genai.GenerativeModel(bootstrapper) model = genai.GenerativeModel(bootstrapper)
prompt = f""" prompt = f"""
ROLE: AI Model Architect ROLE: AI Model Architect
TASK: Select the optimal Gemini models for specific application roles. TASK: Select the optimal Gemini models for a book-writing application. Prefer newer Gemini 2.x models when available.
AVAILABLE_MODELS: AVAILABLE_MODELS:
{json.dumps(models)} {json.dumps(models)}
PRICING_CONTEXT (USD per 1M tokens): PRICING_CONTEXT (USD per 1M tokens, approximate):
- Flash Models (e.g. gemini-1.5-flash): ~$0.075 Input / $0.30 Output. (Very Cheap) - Gemini 2.5 Pro/Flash: Best quality/speed; check current pricing.
- Pro Models (e.g. gemini-1.5-pro): ~$3.50 Input / $10.50 Output. (Expensive) - Gemini 2.0 Flash: ~$0.10 Input / $0.40 Output. (Fast, cost-effective, excellent quality).
- Gemini 2.0 Pro Exp: Free experimental tier with strong reasoning.
- Gemini 1.5 Flash: ~$0.075 Input / $0.30 Output. (Legacy, still reliable).
- Gemini 1.5 Pro: ~$1.25 Input / $5.00 Output. (Legacy, expensive).
CRITERIA: CRITERIA:
- LOGIC: Needs complex reasoning, JSON adherence, and instruction following. (Prefer Pro/1.5). - LOGIC: Needs complex reasoning, strict JSON adherence, plot consistency, and instruction following.
- WRITER: Needs creativity, prose quality, and speed. (Prefer Flash/1.5 for speed, or Pro for quality). -> Prefer: Gemini 2.5 Pro > 2.0 Pro > 2.0 Flash > 1.5 Pro
- ARTIST: Needs visual prompt understanding. - WRITER: Needs creativity, prose quality, long-form text generation, and speed.
-> Prefer: Gemini 2.5 Flash/Pro > 2.0 Flash > 1.5 Flash (balance quality/cost)
- ARTIST: Needs rich visual description, prompt understanding for cover art design.
-> Prefer: Gemini 2.0 Flash > 1.5 Flash (speed and visual understanding)
CONSTRAINTS: CONSTRAINTS:
- Avoid 'experimental' or 'preview' unless no stable version exists. - Strongly prefer Gemini 2.x over 1.5 where available.
- Prioritize 'latest' or stable versions. - Avoid 'experimental' or 'preview' only if a stable 2.x version exists; otherwise experimental 2.x is fine.
- 'thinking' models are too slow/expensive for Writer/Artist roles.
- Provide a ranking of ALL available models from best to worst overall.
OUTPUT_FORMAT (JSON): OUTPUT_FORMAT (JSON only, no markdown):
{{ {{
"logic": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX Input / $X.XX Output" }}, "logic": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX/1M" }},
"writer": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX Input / $X.XX Output" }}, "writer": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX/1M" }},
"artist": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX Input / $X.XX Output" }}, "artist": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX/1M" }},
"ranking": [ {{ "model": "string", "reason": "string", "estimated_cost": "string" }} ] "ranking": [ {{ "model": "string", "reason": "string", "estimated_cost": "string" }} ]
}} }}
""" """
@@ -195,7 +210,7 @@ def select_best_models(force_refresh=False):
return fallback return fallback
def init_models(force=False): def init_models(force=False):
global model_logic, model_writer, model_artist, model_image, logic_model_name, writer_model_name, artist_model_name global model_logic, model_writer, model_artist, model_image, logic_model_name, writer_model_name, artist_model_name, image_model_name, image_model_source
if model_logic and not force: return if model_logic and not force: return
genai.configure(api_key=config.API_KEY) genai.configure(api_key=config.API_KEY)
@@ -264,13 +279,28 @@ def init_models(force=False):
model_writer.update(writer_name) model_writer.update(writer_name)
model_artist.update(artist_name) model_artist.update(artist_name)
# Initialize Image Model (Default to None) # Initialize Image Model
model_image = None model_image = None
if hasattr(genai, 'ImageGenerationModel'): image_model_name = None
try: model_image = genai.ImageGenerationModel("imagen-3.0-generate-001") image_model_source = "None"
except: pass
img_source = "Gemini API" if model_image else "None" hint = config.MODEL_IMAGE_HINT if hasattr(config, 'MODEL_IMAGE_HINT') else "AUTO"
if hasattr(genai, 'ImageGenerationModel'):
# Candidate image models in preference order
if hint and hint != "AUTO":
candidates = [hint]
else:
candidates = ["imagen-3.0-generate-001", "imagen-3.0-fast-generate-001"]
for candidate in candidates:
try:
model_image = genai.ImageGenerationModel(candidate)
image_model_name = candidate
image_model_source = "Gemini API"
utils.log("SYSTEM", f"✅ Image model: {candidate} (Gemini API)")
break
except Exception:
continue
# Auto-detect GCP Project from credentials if not set (Fix for Image Model) # Auto-detect GCP Project from credentials if not set (Fix for Image Model)
if HAS_VERTEX and not config.GCP_PROJECT and config.GOOGLE_CREDS and os.path.exists(config.GOOGLE_CREDS): if HAS_VERTEX and not config.GCP_PROJECT and config.GOOGLE_CREDS and os.path.exists(config.GOOGLE_CREDS):
@@ -326,9 +356,17 @@ def init_models(force=False):
utils.log("SYSTEM", f"✅ Vertex AI initialized (Project: {config.GCP_PROJECT})") utils.log("SYSTEM", f"✅ Vertex AI initialized (Project: {config.GCP_PROJECT})")
# Override with Vertex Image Model if available # Override with Vertex Image Model if available
vertex_candidates = ["imagen-3.0-generate-001", "imagen-3.0-fast-generate-001"]
if hint and hint != "AUTO":
vertex_candidates = [hint]
for candidate in vertex_candidates:
try: try:
model_image = VertexImageModel.from_pretrained("imagen-3.0-generate-001") model_image = VertexImageModel.from_pretrained(candidate)
img_source = "Vertex AI" image_model_name = candidate
except: pass image_model_source = "Vertex AI"
utils.log("SYSTEM", f"✅ Image model: {candidate} (Vertex AI)")
break
except Exception:
continue
utils.log("SYSTEM", f"Image Generation Provider: {img_source}") utils.log("SYSTEM", f"Image Generation Provider: {image_model_source} ({image_model_name or 'unavailable'})")

View File

@@ -1,10 +1,10 @@
import os import os
import sys
import json import json
import shutil import shutil
import textwrap import textwrap
import subprocess import subprocess
import requests import requests
import google.generativeai as genai
from . import utils from . import utils
import config import config
from modules import ai from modules import ai
@@ -212,9 +212,10 @@ def generate_cover(bp, folder, tracking=None, feedback=None, interactive=False):
best_img_score = 0 best_img_score = 0
best_img_path = None best_img_path = None
MAX_IMG_ATTEMPTS = 3
if regenerate_image: if regenerate_image:
for i in range(1, 4): for i in range(1, MAX_IMG_ATTEMPTS + 1):
utils.log("MARKETING", f"Generating cover art (Attempt {i}/5)...") utils.log("MARKETING", f"Generating cover art (Attempt {i}/{MAX_IMG_ATTEMPTS})...")
try: try:
if not ai.model_image: raise ImportError("No Image Generation Model available.") if not ai.model_image: raise ImportError("No Image Generation Model available.")
@@ -222,12 +223,21 @@ def generate_cover(bp, folder, tracking=None, feedback=None, interactive=False):
try: try:
result = ai.model_image.generate_images(prompt=art_prompt, number_of_images=1, aspect_ratio=ar) result = ai.model_image.generate_images(prompt=art_prompt, number_of_images=1, aspect_ratio=ar)
except Exception as e: except Exception as e:
if "resource" in str(e).lower() and ai.HAS_VERTEX: err_lower = str(e).lower()
utils.log("MARKETING", "⚠️ Imagen 3 failed. Trying Imagen 2...") # Try fast imagen variant before falling back to legacy
if ai.HAS_VERTEX and ("resource" in err_lower or "quota" in err_lower):
try:
utils.log("MARKETING", "⚠️ Imagen 3 failed. Trying Imagen 3 Fast...")
fb_model = ai.VertexImageModel.from_pretrained("imagen-3.0-fast-generate-001")
result = fb_model.generate_images(prompt=art_prompt, number_of_images=1, aspect_ratio=ar)
status = "success_fast"
except Exception:
utils.log("MARKETING", "⚠️ Imagen 3 Fast failed. Trying Imagen 2...")
fb_model = ai.VertexImageModel.from_pretrained("imagegeneration@006") fb_model = ai.VertexImageModel.from_pretrained("imagegeneration@006")
result = fb_model.generate_images(prompt=art_prompt, number_of_images=1, aspect_ratio=ar) result = fb_model.generate_images(prompt=art_prompt, number_of_images=1, aspect_ratio=ar)
status = "success_fallback" status = "success_fallback"
else: raise e else:
raise e
attempt_path = os.path.join(folder, f"cover_art_attempt_{i}.png") attempt_path = os.path.join(folder, f"cover_art_attempt_{i}.png")
result.images[0].save(attempt_path) result.images[0].save(attempt_path)
@@ -240,7 +250,6 @@ def generate_cover(bp, folder, tracking=None, feedback=None, interactive=False):
utils.log_image_attempt(folder, "cover", art_prompt, f"cover_art_{i}.png", status, score=score, critique=critique) utils.log_image_attempt(folder, "cover", art_prompt, f"cover_art_{i}.png", status, score=score, critique=critique)
if interactive: if interactive:
# Open image for review
try: try:
if os.name == 'nt': os.startfile(attempt_path) if os.name == 'nt': os.startfile(attempt_path)
elif sys.platform == 'darwin': subprocess.call(('open', attempt_path)) elif sys.platform == 'darwin': subprocess.call(('open', attempt_path))
@@ -254,16 +263,30 @@ def generate_cover(bp, folder, tracking=None, feedback=None, interactive=False):
utils.log("MARKETING", "User rejected cover. Retrying...") utils.log("MARKETING", "User rejected cover. Retrying...")
continue continue
if score > best_img_score: # Only keep as best if score meets minimum quality bar
if score >= 5 and score > best_img_score:
best_img_score = score
best_img_path = attempt_path
elif best_img_path is None and score > 0:
# Accept even low-quality image if we have nothing else
best_img_score = score best_img_score = score
best_img_path = attempt_path best_img_path = attempt_path
if score == 10: if score >= 9:
utils.log("MARKETING", " -> Perfect image accepted.") utils.log("MARKETING", " -> High quality image accepted.")
break break
if "scar" in critique.lower() or "deform" in critique.lower() or "blur" in critique.lower(): # Refine prompt based on critique keywords
art_prompt += " (Ensure high quality, clear skin, no scars, sharp focus)." prompt_additions = []
critique_lower = critique.lower() if critique else ""
if "scar" in critique_lower or "deform" in critique_lower:
prompt_additions.append("perfect anatomy, no deformities")
if "blur" in critique_lower or "blurry" in critique_lower:
prompt_additions.append("sharp focus, highly detailed")
if "text" in critique_lower or "letter" in critique_lower:
prompt_additions.append("no text, no letters, no watermarks")
if prompt_additions:
art_prompt += f". ({', '.join(prompt_additions)})"
except Exception as e: except Exception as e:
utils.log("MARKETING", f"Image generation failed: {e}") utils.log("MARKETING", f"Image generation failed: {e}")

View File

@@ -1303,7 +1303,8 @@ def system_status():
models_info = cache_data.get('models', {}) models_info = cache_data.get('models', {})
except: pass except: pass
return render_template('system_status.html', models=models_info, cache=cache_data, datetime=datetime) return render_template('system_status.html', models=models_info, cache=cache_data, datetime=datetime,
image_model=ai.image_model_name, image_source=ai.image_model_source)
@app.route('/personas') @app.route('/personas')
@login_required @login_required

View File

@@ -56,6 +56,22 @@
</tr> </tr>
{% endif %} {% endif %}
{% endfor %} {% endfor %}
<tr>
<td class="fw-bold text-uppercase">Image</td>
<td>
{% if image_model %}
<span class="badge bg-success">{{ image_model }}</span>
{% else %}
<span class="badge bg-danger">Unavailable</span>
{% endif %}
</td>
<td>
<span class="badge bg-light text-dark border">{{ image_source or 'None' }}</span>
</td>
<td class="small text-muted">
{% if image_model %}Imagen model used for book cover generation.{% else %}No image generation model could be initialized. Check GCP credentials or Gemini API key.{% endif %}
</td>
</tr>
{% else %} {% else %}
<tr> <tr>
<td colspan="3" class="text-center py-4 text-muted"> <td colspan="3" class="text-center py-4 text-muted">
@@ -139,15 +155,32 @@
<h5 class="mb-0"><i class="fas fa-clock me-2"></i>Cache Status</h5> <h5 class="mb-0"><i class="fas fa-clock me-2"></i>Cache Status</h5>
</div> </div>
<div class="card-body"> <div class="card-body">
<p class="mb-0"> <p class="mb-1">
<strong>Last Scan:</strong> <strong>Last Scan:</strong>
{% if cache and cache.timestamp %} {% if cache and cache.timestamp %}
{{ datetime.fromtimestamp(cache.timestamp).strftime('%Y-%m-%d %H:%M:%S') }} {{ datetime.fromtimestamp(cache.timestamp).strftime('%Y-%m-%d %H:%M:%S') }} UTC
{% else %} {% else %}
Never Never
{% endif %} {% endif %}
</p> </p>
<p class="text-muted small mb-0">Model selection is cached for 24 hours to save API calls.</p> <p class="mb-0">
<strong>Next Refresh:</strong>
{% if cache and cache.timestamp %}
{% set expires = cache.timestamp + 86400 %}
{% set now_ts = datetime.utcnow().timestamp() %}
{% if expires > now_ts %}
{% set remaining = (expires - now_ts) | int %}
{% set h = remaining // 3600 %}{% set m = (remaining % 3600) // 60 %}
in {{ h }}h {{ m }}m
<span class="badge bg-success ms-1">Cache Valid</span>
{% else %}
<span class="badge bg-warning text-dark">Expired — click Refresh &amp; Optimize</span>
{% endif %}
{% else %}
<span class="badge bg-warning text-dark">No cache — click Refresh &amp; Optimize</span>
{% endif %}
</p>
<p class="text-muted small mt-2 mb-0">Model selection is cached for 24 hours to save API calls.</p>
</div> </div>
</div> </div>
{% endblock %} {% endblock %}