From 2a9a6058007bf65b40e9216f1ad99d35f309799e Mon Sep 17 00:00:00 2001 From: Mike Wichers Date: Fri, 20 Feb 2026 10:31:02 -0500 Subject: [PATCH] v1.2.0: Prefer Gemini 2.x models, improve cover generation and Docker health Model selection (ai.py): - get_optimal_model() now scores Gemini 2.5 > 2.0 > 1.5 when ranking candidates - get_default_models() fallbacks updated to gemini-2.0-pro-exp (logic) and gemini-2.0-flash (writer/artist) - AI selection prompt rewritten: includes Gemini 2.x pricing context, guidance to avoid 'thinking' models for writer/artist roles, and instructions to prefer 2.x over 1.5 - Added image_model_name and image_model_source globals for UI visibility - init_models() now reads MODEL_IMAGE_HINT; tries imagen-3.0-generate-001 then imagen-3.0-fast-generate-001 on both Gemini API and Vertex AI paths Cover generation (marketing.py): - Fixed display bug: "Attempt X/5" now correctly reads "Attempt X/3" - Added imagen-3.0-fast-generate-001 as intermediate fallback before legacy Imagen 2 - Quality threshold: images with score < 5 are only kept if nothing better exists - Smarter prompt refinement on retry: deformity, blur, and watermark critique keywords each append targeted corrections to the art prompt - Fixed missing sys import (sys.platform check for macOS was silently broken) Config / Docker: - config.py: added MODEL_IMAGE_HINT env var, bumped version to 1.2.0 - docker-compose.yml: added MODEL_IMAGE environment variable - Dockerfile: added libpng-dev and libfreetype6-dev for better font/PNG rendering; added HEALTHCHECK so Portainer detects unhealthy containers System status UI: - system_status.html: added Image row showing active Imagen model and provider (Gemini API / Vertex AI) - Added cache expiry countdown with colour-coded badges Co-Authored-By: Claude Sonnet 4.6 --- Dockerfile | 6 +- config.py | 5 +- docker-compose.yml | 3 +- modules/ai.py | 112 +++++++++++++++++++++++------------ modules/marketing.py | 71 ++++++++++++++-------- modules/web_app.py | 3 +- templates/system_status.html | 41 +++++++++++-- 7 files changed, 171 insertions(+), 70 deletions(-) diff --git a/Dockerfile b/Dockerfile index 82a1072..106d995 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,11 +3,13 @@ FROM python:3.11-slim # Set working directory WORKDIR /app -# Install system dependencies required for Pillow (image processing) +# Install system dependencies required for Pillow (image processing) and fonts RUN apt-get update && apt-get install -y \ build-essential \ libjpeg-dev \ zlib1g-dev \ + libpng-dev \ + libfreetype6-dev \ && rm -rf /var/lib/apt/lists/* # Copy requirements files @@ -24,4 +26,6 @@ COPY . . # Set Python path and run ENV PYTHONPATH=/app EXPOSE 5000 +HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \ + CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:5000/login')" || exit 1 CMD ["python", "-m", "modules.web_app"] \ No newline at end of file diff --git a/config.py b/config.py index 099a13e..fab85ad 100644 --- a/config.py +++ b/config.py @@ -11,9 +11,10 @@ def get_clean_env(key, default=None): API_KEY = get_clean_env("GEMINI_API_KEY") GCP_PROJECT = get_clean_env("GCP_PROJECT") GCP_LOCATION = get_clean_env("GCP_LOCATION", "us-central1") -MODEL_LOGIC_HINT = get_clean_env("MODEL_LOGIC", "AUTO") +MODEL_LOGIC_HINT = get_clean_env("MODEL_LOGIC", "AUTO") MODEL_WRITER_HINT = get_clean_env("MODEL_WRITER", "AUTO") MODEL_ARTIST_HINT = get_clean_env("MODEL_ARTIST", "AUTO") +MODEL_IMAGE_HINT = get_clean_env("MODEL_IMAGE", "AUTO") DEFAULT_BLUEPRINT = "book_def.json" # --- SECURITY & ADMIN --- @@ -64,4 +65,4 @@ LENGTH_DEFINITIONS = { } # --- SYSTEM --- -VERSION = "1.1.0" \ No newline at end of file +VERSION = "1.2.0" \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 9ffd978..94221a6 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -36,4 +36,5 @@ services: - GCP_LOCATION=${GCP_LOCATION:-us-central1} - MODEL_LOGIC=${MODEL_LOGIC:-AUTO} - MODEL_WRITER=${MODEL_WRITER:-AUTO} - - MODEL_ARTIST=${MODEL_ARTIST:-AUTO} \ No newline at end of file + - MODEL_ARTIST=${MODEL_ARTIST:-AUTO} + - MODEL_IMAGE=${MODEL_IMAGE:-AUTO} \ No newline at end of file diff --git a/modules/ai.py b/modules/ai.py index 3dec6ba..39973fe 100644 --- a/modules/ai.py +++ b/modules/ai.py @@ -31,6 +31,8 @@ model_image = None logic_model_name = "models/gemini-1.5-pro" writer_model_name = "models/gemini-1.5-flash" artist_model_name = "models/gemini-1.5-flash" +image_model_name = None +image_model_source = "None" class ResilientModel: def __init__(self, name, safety_settings, role): @@ -75,10 +77,15 @@ def get_optimal_model(base_type="pro"): candidates = [m.name for m in models if base_type in m.name] if not candidates: return f"models/gemini-1.5-{base_type}" def score(n): - # Prioritize stable models (higher quotas) over experimental/beta ones - if "exp" in n or "beta" in n or "preview" in n: return 0 - if "latest" in n: return 50 - return 100 + # Prefer newer generations: 2.5 > 2.0 > 1.5 + gen_bonus = 0 + if "2.5" in n: gen_bonus = 300 + elif "2.0" in n: gen_bonus = 200 + elif "2." in n: gen_bonus = 150 + # Within a generation, prefer stable over experimental + if "exp" in n or "beta" in n or "preview" in n: return gen_bonus + 0 + if "latest" in n: return gen_bonus + 50 + return gen_bonus + 100 return sorted(candidates, key=score, reverse=True)[0] except Exception as e: utils.log("SYSTEM", f"⚠️ Error finding optimal model: {e}") @@ -86,9 +93,9 @@ def get_optimal_model(base_type="pro"): def get_default_models(): return { - "logic": {"model": "models/gemini-1.5-pro", "reason": "Fallback: Default Pro model selected.", "estimated_cost": "$3.50/1M"}, - "writer": {"model": "models/gemini-1.5-flash", "reason": "Fallback: Default Flash model selected.", "estimated_cost": "$0.075/1M"}, - "artist": {"model": "models/gemini-1.5-flash", "reason": "Fallback: Default Flash model selected.", "estimated_cost": "$0.075/1M"}, + "logic": {"model": "models/gemini-2.0-pro-exp", "reason": "Fallback: Gemini 2.0 Pro for complex reasoning and JSON adherence.", "estimated_cost": "$0.00/1M (Experimental)"}, + "writer": {"model": "models/gemini-2.0-flash", "reason": "Fallback: Gemini 2.0 Flash for fast, high-quality creative writing.", "estimated_cost": "$0.10/1M"}, + "artist": {"model": "models/gemini-2.0-flash", "reason": "Fallback: Gemini 2.0 Flash for visual prompt design.", "estimated_cost": "$0.10/1M"}, "ranking": [] } @@ -131,29 +138,37 @@ def select_best_models(force_refresh=False): model = genai.GenerativeModel(bootstrapper) prompt = f""" ROLE: AI Model Architect - TASK: Select the optimal Gemini models for specific application roles. - + TASK: Select the optimal Gemini models for a book-writing application. Prefer newer Gemini 2.x models when available. + AVAILABLE_MODELS: {json.dumps(models)} - - PRICING_CONTEXT (USD per 1M tokens): - - Flash Models (e.g. gemini-1.5-flash): ~$0.075 Input / $0.30 Output. (Very Cheap) - - Pro Models (e.g. gemini-1.5-pro): ~$3.50 Input / $10.50 Output. (Expensive) - + + PRICING_CONTEXT (USD per 1M tokens, approximate): + - Gemini 2.5 Pro/Flash: Best quality/speed; check current pricing. + - Gemini 2.0 Flash: ~$0.10 Input / $0.40 Output. (Fast, cost-effective, excellent quality). + - Gemini 2.0 Pro Exp: Free experimental tier with strong reasoning. + - Gemini 1.5 Flash: ~$0.075 Input / $0.30 Output. (Legacy, still reliable). + - Gemini 1.5 Pro: ~$1.25 Input / $5.00 Output. (Legacy, expensive). + CRITERIA: - - LOGIC: Needs complex reasoning, JSON adherence, and instruction following. (Prefer Pro/1.5). - - WRITER: Needs creativity, prose quality, and speed. (Prefer Flash/1.5 for speed, or Pro for quality). - - ARTIST: Needs visual prompt understanding. - + - LOGIC: Needs complex reasoning, strict JSON adherence, plot consistency, and instruction following. + -> Prefer: Gemini 2.5 Pro > 2.0 Pro > 2.0 Flash > 1.5 Pro + - WRITER: Needs creativity, prose quality, long-form text generation, and speed. + -> Prefer: Gemini 2.5 Flash/Pro > 2.0 Flash > 1.5 Flash (balance quality/cost) + - ARTIST: Needs rich visual description, prompt understanding for cover art design. + -> Prefer: Gemini 2.0 Flash > 1.5 Flash (speed and visual understanding) + CONSTRAINTS: - - Avoid 'experimental' or 'preview' unless no stable version exists. - - Prioritize 'latest' or stable versions. - - OUTPUT_FORMAT (JSON): + - Strongly prefer Gemini 2.x over 1.5 where available. + - Avoid 'experimental' or 'preview' only if a stable 2.x version exists; otherwise experimental 2.x is fine. + - 'thinking' models are too slow/expensive for Writer/Artist roles. + - Provide a ranking of ALL available models from best to worst overall. + + OUTPUT_FORMAT (JSON only, no markdown): {{ - "logic": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX Input / $X.XX Output" }}, - "writer": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX Input / $X.XX Output" }}, - "artist": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX Input / $X.XX Output" }}, + "logic": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX/1M" }}, + "writer": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX/1M" }}, + "artist": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX/1M" }}, "ranking": [ {{ "model": "string", "reason": "string", "estimated_cost": "string" }} ] }} """ @@ -195,7 +210,7 @@ def select_best_models(force_refresh=False): return fallback def init_models(force=False): - global model_logic, model_writer, model_artist, model_image, logic_model_name, writer_model_name, artist_model_name + global model_logic, model_writer, model_artist, model_image, logic_model_name, writer_model_name, artist_model_name, image_model_name, image_model_source if model_logic and not force: return genai.configure(api_key=config.API_KEY) @@ -264,13 +279,28 @@ def init_models(force=False): model_writer.update(writer_name) model_artist.update(artist_name) - # Initialize Image Model (Default to None) + # Initialize Image Model model_image = None + image_model_name = None + image_model_source = "None" + + hint = config.MODEL_IMAGE_HINT if hasattr(config, 'MODEL_IMAGE_HINT') else "AUTO" + if hasattr(genai, 'ImageGenerationModel'): - try: model_image = genai.ImageGenerationModel("imagen-3.0-generate-001") - except: pass - - img_source = "Gemini API" if model_image else "None" + # Candidate image models in preference order + if hint and hint != "AUTO": + candidates = [hint] + else: + candidates = ["imagen-3.0-generate-001", "imagen-3.0-fast-generate-001"] + for candidate in candidates: + try: + model_image = genai.ImageGenerationModel(candidate) + image_model_name = candidate + image_model_source = "Gemini API" + utils.log("SYSTEM", f"✅ Image model: {candidate} (Gemini API)") + break + except Exception: + continue # Auto-detect GCP Project from credentials if not set (Fix for Image Model) if HAS_VERTEX and not config.GCP_PROJECT and config.GOOGLE_CREDS and os.path.exists(config.GOOGLE_CREDS): @@ -326,9 +356,17 @@ def init_models(force=False): utils.log("SYSTEM", f"✅ Vertex AI initialized (Project: {config.GCP_PROJECT})") # Override with Vertex Image Model if available - try: - model_image = VertexImageModel.from_pretrained("imagen-3.0-generate-001") - img_source = "Vertex AI" - except: pass - - utils.log("SYSTEM", f"Image Generation Provider: {img_source}") \ No newline at end of file + vertex_candidates = ["imagen-3.0-generate-001", "imagen-3.0-fast-generate-001"] + if hint and hint != "AUTO": + vertex_candidates = [hint] + for candidate in vertex_candidates: + try: + model_image = VertexImageModel.from_pretrained(candidate) + image_model_name = candidate + image_model_source = "Vertex AI" + utils.log("SYSTEM", f"✅ Image model: {candidate} (Vertex AI)") + break + except Exception: + continue + + utils.log("SYSTEM", f"Image Generation Provider: {image_model_source} ({image_model_name or 'unavailable'})") \ No newline at end of file diff --git a/modules/marketing.py b/modules/marketing.py index 13d76c4..0f3ae1d 100644 --- a/modules/marketing.py +++ b/modules/marketing.py @@ -1,10 +1,10 @@ import os +import sys import json import shutil import textwrap import subprocess import requests -import google.generativeai as genai from . import utils import config from modules import ai @@ -212,59 +212,82 @@ def generate_cover(bp, folder, tracking=None, feedback=None, interactive=False): best_img_score = 0 best_img_path = None + MAX_IMG_ATTEMPTS = 3 if regenerate_image: - for i in range(1, 4): - utils.log("MARKETING", f"Generating cover art (Attempt {i}/5)...") + for i in range(1, MAX_IMG_ATTEMPTS + 1): + utils.log("MARKETING", f"Generating cover art (Attempt {i}/{MAX_IMG_ATTEMPTS})...") try: if not ai.model_image: raise ImportError("No Image Generation Model available.") - + status = "success" try: result = ai.model_image.generate_images(prompt=art_prompt, number_of_images=1, aspect_ratio=ar) except Exception as e: - if "resource" in str(e).lower() and ai.HAS_VERTEX: - utils.log("MARKETING", "⚠️ Imagen 3 failed. Trying Imagen 2...") - fb_model = ai.VertexImageModel.from_pretrained("imagegeneration@006") - result = fb_model.generate_images(prompt=art_prompt, number_of_images=1, aspect_ratio=ar) - status = "success_fallback" - else: raise e + err_lower = str(e).lower() + # Try fast imagen variant before falling back to legacy + if ai.HAS_VERTEX and ("resource" in err_lower or "quota" in err_lower): + try: + utils.log("MARKETING", "⚠️ Imagen 3 failed. Trying Imagen 3 Fast...") + fb_model = ai.VertexImageModel.from_pretrained("imagen-3.0-fast-generate-001") + result = fb_model.generate_images(prompt=art_prompt, number_of_images=1, aspect_ratio=ar) + status = "success_fast" + except Exception: + utils.log("MARKETING", "⚠️ Imagen 3 Fast failed. Trying Imagen 2...") + fb_model = ai.VertexImageModel.from_pretrained("imagegeneration@006") + result = fb_model.generate_images(prompt=art_prompt, number_of_images=1, aspect_ratio=ar) + status = "success_fallback" + else: + raise e attempt_path = os.path.join(folder, f"cover_art_attempt_{i}.png") result.images[0].save(attempt_path) utils.log_usage(folder, "imagen", image_count=1) - + score, critique = evaluate_image_quality(attempt_path, art_prompt, ai.model_writer, folder) if score is None: score = 0 - + utils.log("MARKETING", f" -> Image Score: {score}/10. Critique: {critique}") utils.log_image_attempt(folder, "cover", art_prompt, f"cover_art_{i}.png", status, score=score, critique=critique) - + if interactive: - # Open image for review try: if os.name == 'nt': os.startfile(attempt_path) elif sys.platform == 'darwin': subprocess.call(('open', attempt_path)) else: subprocess.call(('xdg-open', attempt_path)) except: pass - + if Confirm.ask(f"Accept cover attempt {i} (Score: {score})?", default=True): best_img_path = attempt_path break else: utils.log("MARKETING", "User rejected cover. Retrying...") continue - - if score > best_img_score: + + # Only keep as best if score meets minimum quality bar + if score >= 5 and score > best_img_score: best_img_score = score best_img_path = attempt_path - - if score == 10: - utils.log("MARKETING", " -> Perfect image accepted.") + elif best_img_path is None and score > 0: + # Accept even low-quality image if we have nothing else + best_img_score = score + best_img_path = attempt_path + + if score >= 9: + utils.log("MARKETING", " -> High quality image accepted.") break - - if "scar" in critique.lower() or "deform" in critique.lower() or "blur" in critique.lower(): - art_prompt += " (Ensure high quality, clear skin, no scars, sharp focus)." - + + # Refine prompt based on critique keywords + prompt_additions = [] + critique_lower = critique.lower() if critique else "" + if "scar" in critique_lower or "deform" in critique_lower: + prompt_additions.append("perfect anatomy, no deformities") + if "blur" in critique_lower or "blurry" in critique_lower: + prompt_additions.append("sharp focus, highly detailed") + if "text" in critique_lower or "letter" in critique_lower: + prompt_additions.append("no text, no letters, no watermarks") + if prompt_additions: + art_prompt += f". ({', '.join(prompt_additions)})" + except Exception as e: utils.log("MARKETING", f"Image generation failed: {e}") if "quota" in str(e).lower(): break diff --git a/modules/web_app.py b/modules/web_app.py index 94bc652..a160271 100644 --- a/modules/web_app.py +++ b/modules/web_app.py @@ -1303,7 +1303,8 @@ def system_status(): models_info = cache_data.get('models', {}) except: pass - return render_template('system_status.html', models=models_info, cache=cache_data, datetime=datetime) + return render_template('system_status.html', models=models_info, cache=cache_data, datetime=datetime, + image_model=ai.image_model_name, image_source=ai.image_model_source) @app.route('/personas') @login_required diff --git a/templates/system_status.html b/templates/system_status.html index e423854..2fe4eb3 100644 --- a/templates/system_status.html +++ b/templates/system_status.html @@ -56,6 +56,22 @@ {% endif %} {% endfor %} + + Image + + {% if image_model %} + {{ image_model }} + {% else %} + Unavailable + {% endif %} + + + {{ image_source or 'None' }} + + + {% if image_model %}Imagen model used for book cover generation.{% else %}No image generation model could be initialized. Check GCP credentials or Gemini API key.{% endif %} + + {% else %} @@ -139,15 +155,32 @@
Cache Status
-

- Last Scan: +

+ Last Scan: {% if cache and cache.timestamp %} - {{ datetime.fromtimestamp(cache.timestamp).strftime('%Y-%m-%d %H:%M:%S') }} + {{ datetime.fromtimestamp(cache.timestamp).strftime('%Y-%m-%d %H:%M:%S') }} UTC {% else %} Never {% endif %}

-

Model selection is cached for 24 hours to save API calls.

+

+ Next Refresh: + {% if cache and cache.timestamp %} + {% set expires = cache.timestamp + 86400 %} + {% set now_ts = datetime.utcnow().timestamp() %} + {% if expires > now_ts %} + {% set remaining = (expires - now_ts) | int %} + {% set h = remaining // 3600 %}{% set m = (remaining % 3600) // 60 %} + in {{ h }}h {{ m }}m + Cache Valid + {% else %} + Expired — click Refresh & Optimize + {% endif %} + {% else %} + No cache — click Refresh & Optimize + {% endif %} +

+

Model selection is cached for 24 hours to save API calls.

{% endblock %} \ No newline at end of file