v1.2.0: Prefer Gemini 2.x models, improve cover generation and Docker health
Model selection (ai.py): - get_optimal_model() now scores Gemini 2.5 > 2.0 > 1.5 when ranking candidates - get_default_models() fallbacks updated to gemini-2.0-pro-exp (logic) and gemini-2.0-flash (writer/artist) - AI selection prompt rewritten: includes Gemini 2.x pricing context, guidance to avoid 'thinking' models for writer/artist roles, and instructions to prefer 2.x over 1.5 - Added image_model_name and image_model_source globals for UI visibility - init_models() now reads MODEL_IMAGE_HINT; tries imagen-3.0-generate-001 then imagen-3.0-fast-generate-001 on both Gemini API and Vertex AI paths Cover generation (marketing.py): - Fixed display bug: "Attempt X/5" now correctly reads "Attempt X/3" - Added imagen-3.0-fast-generate-001 as intermediate fallback before legacy Imagen 2 - Quality threshold: images with score < 5 are only kept if nothing better exists - Smarter prompt refinement on retry: deformity, blur, and watermark critique keywords each append targeted corrections to the art prompt - Fixed missing sys import (sys.platform check for macOS was silently broken) Config / Docker: - config.py: added MODEL_IMAGE_HINT env var, bumped version to 1.2.0 - docker-compose.yml: added MODEL_IMAGE environment variable - Dockerfile: added libpng-dev and libfreetype6-dev for better font/PNG rendering; added HEALTHCHECK so Portainer detects unhealthy containers System status UI: - system_status.html: added Image row showing active Imagen model and provider (Gemini API / Vertex AI) - Added cache expiry countdown with colour-coded badges Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -3,11 +3,13 @@ FROM python:3.11-slim
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies required for Pillow (image processing)
|
||||
# Install system dependencies required for Pillow (image processing) and fonts
|
||||
RUN apt-get update && apt-get install -y \
|
||||
build-essential \
|
||||
libjpeg-dev \
|
||||
zlib1g-dev \
|
||||
libpng-dev \
|
||||
libfreetype6-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy requirements files
|
||||
@@ -24,4 +26,6 @@ COPY . .
|
||||
# Set Python path and run
|
||||
ENV PYTHONPATH=/app
|
||||
EXPOSE 5000
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
||||
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:5000/login')" || exit 1
|
||||
CMD ["python", "-m", "modules.web_app"]
|
||||
@@ -14,6 +14,7 @@ GCP_LOCATION = get_clean_env("GCP_LOCATION", "us-central1")
|
||||
MODEL_LOGIC_HINT = get_clean_env("MODEL_LOGIC", "AUTO")
|
||||
MODEL_WRITER_HINT = get_clean_env("MODEL_WRITER", "AUTO")
|
||||
MODEL_ARTIST_HINT = get_clean_env("MODEL_ARTIST", "AUTO")
|
||||
MODEL_IMAGE_HINT = get_clean_env("MODEL_IMAGE", "AUTO")
|
||||
DEFAULT_BLUEPRINT = "book_def.json"
|
||||
|
||||
# --- SECURITY & ADMIN ---
|
||||
@@ -64,4 +65,4 @@ LENGTH_DEFINITIONS = {
|
||||
}
|
||||
|
||||
# --- SYSTEM ---
|
||||
VERSION = "1.1.0"
|
||||
VERSION = "1.2.0"
|
||||
@@ -37,3 +37,4 @@ services:
|
||||
- MODEL_LOGIC=${MODEL_LOGIC:-AUTO}
|
||||
- MODEL_WRITER=${MODEL_WRITER:-AUTO}
|
||||
- MODEL_ARTIST=${MODEL_ARTIST:-AUTO}
|
||||
- MODEL_IMAGE=${MODEL_IMAGE:-AUTO}
|
||||
@@ -31,6 +31,8 @@ model_image = None
|
||||
logic_model_name = "models/gemini-1.5-pro"
|
||||
writer_model_name = "models/gemini-1.5-flash"
|
||||
artist_model_name = "models/gemini-1.5-flash"
|
||||
image_model_name = None
|
||||
image_model_source = "None"
|
||||
|
||||
class ResilientModel:
|
||||
def __init__(self, name, safety_settings, role):
|
||||
@@ -75,10 +77,15 @@ def get_optimal_model(base_type="pro"):
|
||||
candidates = [m.name for m in models if base_type in m.name]
|
||||
if not candidates: return f"models/gemini-1.5-{base_type}"
|
||||
def score(n):
|
||||
# Prioritize stable models (higher quotas) over experimental/beta ones
|
||||
if "exp" in n or "beta" in n or "preview" in n: return 0
|
||||
if "latest" in n: return 50
|
||||
return 100
|
||||
# Prefer newer generations: 2.5 > 2.0 > 1.5
|
||||
gen_bonus = 0
|
||||
if "2.5" in n: gen_bonus = 300
|
||||
elif "2.0" in n: gen_bonus = 200
|
||||
elif "2." in n: gen_bonus = 150
|
||||
# Within a generation, prefer stable over experimental
|
||||
if "exp" in n or "beta" in n or "preview" in n: return gen_bonus + 0
|
||||
if "latest" in n: return gen_bonus + 50
|
||||
return gen_bonus + 100
|
||||
return sorted(candidates, key=score, reverse=True)[0]
|
||||
except Exception as e:
|
||||
utils.log("SYSTEM", f"⚠️ Error finding optimal model: {e}")
|
||||
@@ -86,9 +93,9 @@ def get_optimal_model(base_type="pro"):
|
||||
|
||||
def get_default_models():
|
||||
return {
|
||||
"logic": {"model": "models/gemini-1.5-pro", "reason": "Fallback: Default Pro model selected.", "estimated_cost": "$3.50/1M"},
|
||||
"writer": {"model": "models/gemini-1.5-flash", "reason": "Fallback: Default Flash model selected.", "estimated_cost": "$0.075/1M"},
|
||||
"artist": {"model": "models/gemini-1.5-flash", "reason": "Fallback: Default Flash model selected.", "estimated_cost": "$0.075/1M"},
|
||||
"logic": {"model": "models/gemini-2.0-pro-exp", "reason": "Fallback: Gemini 2.0 Pro for complex reasoning and JSON adherence.", "estimated_cost": "$0.00/1M (Experimental)"},
|
||||
"writer": {"model": "models/gemini-2.0-flash", "reason": "Fallback: Gemini 2.0 Flash for fast, high-quality creative writing.", "estimated_cost": "$0.10/1M"},
|
||||
"artist": {"model": "models/gemini-2.0-flash", "reason": "Fallback: Gemini 2.0 Flash for visual prompt design.", "estimated_cost": "$0.10/1M"},
|
||||
"ranking": []
|
||||
}
|
||||
|
||||
@@ -131,29 +138,37 @@ def select_best_models(force_refresh=False):
|
||||
model = genai.GenerativeModel(bootstrapper)
|
||||
prompt = f"""
|
||||
ROLE: AI Model Architect
|
||||
TASK: Select the optimal Gemini models for specific application roles.
|
||||
TASK: Select the optimal Gemini models for a book-writing application. Prefer newer Gemini 2.x models when available.
|
||||
|
||||
AVAILABLE_MODELS:
|
||||
{json.dumps(models)}
|
||||
|
||||
PRICING_CONTEXT (USD per 1M tokens):
|
||||
- Flash Models (e.g. gemini-1.5-flash): ~$0.075 Input / $0.30 Output. (Very Cheap)
|
||||
- Pro Models (e.g. gemini-1.5-pro): ~$3.50 Input / $10.50 Output. (Expensive)
|
||||
PRICING_CONTEXT (USD per 1M tokens, approximate):
|
||||
- Gemini 2.5 Pro/Flash: Best quality/speed; check current pricing.
|
||||
- Gemini 2.0 Flash: ~$0.10 Input / $0.40 Output. (Fast, cost-effective, excellent quality).
|
||||
- Gemini 2.0 Pro Exp: Free experimental tier with strong reasoning.
|
||||
- Gemini 1.5 Flash: ~$0.075 Input / $0.30 Output. (Legacy, still reliable).
|
||||
- Gemini 1.5 Pro: ~$1.25 Input / $5.00 Output. (Legacy, expensive).
|
||||
|
||||
CRITERIA:
|
||||
- LOGIC: Needs complex reasoning, JSON adherence, and instruction following. (Prefer Pro/1.5).
|
||||
- WRITER: Needs creativity, prose quality, and speed. (Prefer Flash/1.5 for speed, or Pro for quality).
|
||||
- ARTIST: Needs visual prompt understanding.
|
||||
- LOGIC: Needs complex reasoning, strict JSON adherence, plot consistency, and instruction following.
|
||||
-> Prefer: Gemini 2.5 Pro > 2.0 Pro > 2.0 Flash > 1.5 Pro
|
||||
- WRITER: Needs creativity, prose quality, long-form text generation, and speed.
|
||||
-> Prefer: Gemini 2.5 Flash/Pro > 2.0 Flash > 1.5 Flash (balance quality/cost)
|
||||
- ARTIST: Needs rich visual description, prompt understanding for cover art design.
|
||||
-> Prefer: Gemini 2.0 Flash > 1.5 Flash (speed and visual understanding)
|
||||
|
||||
CONSTRAINTS:
|
||||
- Avoid 'experimental' or 'preview' unless no stable version exists.
|
||||
- Prioritize 'latest' or stable versions.
|
||||
- Strongly prefer Gemini 2.x over 1.5 where available.
|
||||
- Avoid 'experimental' or 'preview' only if a stable 2.x version exists; otherwise experimental 2.x is fine.
|
||||
- 'thinking' models are too slow/expensive for Writer/Artist roles.
|
||||
- Provide a ranking of ALL available models from best to worst overall.
|
||||
|
||||
OUTPUT_FORMAT (JSON):
|
||||
OUTPUT_FORMAT (JSON only, no markdown):
|
||||
{{
|
||||
"logic": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX Input / $X.XX Output" }},
|
||||
"writer": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX Input / $X.XX Output" }},
|
||||
"artist": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX Input / $X.XX Output" }},
|
||||
"logic": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX/1M" }},
|
||||
"writer": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX/1M" }},
|
||||
"artist": {{ "model": "string", "reason": "string", "estimated_cost": "$X.XX/1M" }},
|
||||
"ranking": [ {{ "model": "string", "reason": "string", "estimated_cost": "string" }} ]
|
||||
}}
|
||||
"""
|
||||
@@ -195,7 +210,7 @@ def select_best_models(force_refresh=False):
|
||||
return fallback
|
||||
|
||||
def init_models(force=False):
|
||||
global model_logic, model_writer, model_artist, model_image, logic_model_name, writer_model_name, artist_model_name
|
||||
global model_logic, model_writer, model_artist, model_image, logic_model_name, writer_model_name, artist_model_name, image_model_name, image_model_source
|
||||
if model_logic and not force: return
|
||||
genai.configure(api_key=config.API_KEY)
|
||||
|
||||
@@ -264,13 +279,28 @@ def init_models(force=False):
|
||||
model_writer.update(writer_name)
|
||||
model_artist.update(artist_name)
|
||||
|
||||
# Initialize Image Model (Default to None)
|
||||
# Initialize Image Model
|
||||
model_image = None
|
||||
if hasattr(genai, 'ImageGenerationModel'):
|
||||
try: model_image = genai.ImageGenerationModel("imagen-3.0-generate-001")
|
||||
except: pass
|
||||
image_model_name = None
|
||||
image_model_source = "None"
|
||||
|
||||
img_source = "Gemini API" if model_image else "None"
|
||||
hint = config.MODEL_IMAGE_HINT if hasattr(config, 'MODEL_IMAGE_HINT') else "AUTO"
|
||||
|
||||
if hasattr(genai, 'ImageGenerationModel'):
|
||||
# Candidate image models in preference order
|
||||
if hint and hint != "AUTO":
|
||||
candidates = [hint]
|
||||
else:
|
||||
candidates = ["imagen-3.0-generate-001", "imagen-3.0-fast-generate-001"]
|
||||
for candidate in candidates:
|
||||
try:
|
||||
model_image = genai.ImageGenerationModel(candidate)
|
||||
image_model_name = candidate
|
||||
image_model_source = "Gemini API"
|
||||
utils.log("SYSTEM", f"✅ Image model: {candidate} (Gemini API)")
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Auto-detect GCP Project from credentials if not set (Fix for Image Model)
|
||||
if HAS_VERTEX and not config.GCP_PROJECT and config.GOOGLE_CREDS and os.path.exists(config.GOOGLE_CREDS):
|
||||
@@ -326,9 +356,17 @@ def init_models(force=False):
|
||||
utils.log("SYSTEM", f"✅ Vertex AI initialized (Project: {config.GCP_PROJECT})")
|
||||
|
||||
# Override with Vertex Image Model if available
|
||||
vertex_candidates = ["imagen-3.0-generate-001", "imagen-3.0-fast-generate-001"]
|
||||
if hint and hint != "AUTO":
|
||||
vertex_candidates = [hint]
|
||||
for candidate in vertex_candidates:
|
||||
try:
|
||||
model_image = VertexImageModel.from_pretrained("imagen-3.0-generate-001")
|
||||
img_source = "Vertex AI"
|
||||
except: pass
|
||||
model_image = VertexImageModel.from_pretrained(candidate)
|
||||
image_model_name = candidate
|
||||
image_model_source = "Vertex AI"
|
||||
utils.log("SYSTEM", f"✅ Image model: {candidate} (Vertex AI)")
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
utils.log("SYSTEM", f"Image Generation Provider: {img_source}")
|
||||
utils.log("SYSTEM", f"Image Generation Provider: {image_model_source} ({image_model_name or 'unavailable'})")
|
||||
@@ -1,10 +1,10 @@
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import shutil
|
||||
import textwrap
|
||||
import subprocess
|
||||
import requests
|
||||
import google.generativeai as genai
|
||||
from . import utils
|
||||
import config
|
||||
from modules import ai
|
||||
@@ -212,9 +212,10 @@ def generate_cover(bp, folder, tracking=None, feedback=None, interactive=False):
|
||||
best_img_score = 0
|
||||
best_img_path = None
|
||||
|
||||
MAX_IMG_ATTEMPTS = 3
|
||||
if regenerate_image:
|
||||
for i in range(1, 4):
|
||||
utils.log("MARKETING", f"Generating cover art (Attempt {i}/5)...")
|
||||
for i in range(1, MAX_IMG_ATTEMPTS + 1):
|
||||
utils.log("MARKETING", f"Generating cover art (Attempt {i}/{MAX_IMG_ATTEMPTS})...")
|
||||
try:
|
||||
if not ai.model_image: raise ImportError("No Image Generation Model available.")
|
||||
|
||||
@@ -222,12 +223,21 @@ def generate_cover(bp, folder, tracking=None, feedback=None, interactive=False):
|
||||
try:
|
||||
result = ai.model_image.generate_images(prompt=art_prompt, number_of_images=1, aspect_ratio=ar)
|
||||
except Exception as e:
|
||||
if "resource" in str(e).lower() and ai.HAS_VERTEX:
|
||||
utils.log("MARKETING", "⚠️ Imagen 3 failed. Trying Imagen 2...")
|
||||
err_lower = str(e).lower()
|
||||
# Try fast imagen variant before falling back to legacy
|
||||
if ai.HAS_VERTEX and ("resource" in err_lower or "quota" in err_lower):
|
||||
try:
|
||||
utils.log("MARKETING", "⚠️ Imagen 3 failed. Trying Imagen 3 Fast...")
|
||||
fb_model = ai.VertexImageModel.from_pretrained("imagen-3.0-fast-generate-001")
|
||||
result = fb_model.generate_images(prompt=art_prompt, number_of_images=1, aspect_ratio=ar)
|
||||
status = "success_fast"
|
||||
except Exception:
|
||||
utils.log("MARKETING", "⚠️ Imagen 3 Fast failed. Trying Imagen 2...")
|
||||
fb_model = ai.VertexImageModel.from_pretrained("imagegeneration@006")
|
||||
result = fb_model.generate_images(prompt=art_prompt, number_of_images=1, aspect_ratio=ar)
|
||||
status = "success_fallback"
|
||||
else: raise e
|
||||
else:
|
||||
raise e
|
||||
|
||||
attempt_path = os.path.join(folder, f"cover_art_attempt_{i}.png")
|
||||
result.images[0].save(attempt_path)
|
||||
@@ -240,7 +250,6 @@ def generate_cover(bp, folder, tracking=None, feedback=None, interactive=False):
|
||||
utils.log_image_attempt(folder, "cover", art_prompt, f"cover_art_{i}.png", status, score=score, critique=critique)
|
||||
|
||||
if interactive:
|
||||
# Open image for review
|
||||
try:
|
||||
if os.name == 'nt': os.startfile(attempt_path)
|
||||
elif sys.platform == 'darwin': subprocess.call(('open', attempt_path))
|
||||
@@ -254,16 +263,30 @@ def generate_cover(bp, folder, tracking=None, feedback=None, interactive=False):
|
||||
utils.log("MARKETING", "User rejected cover. Retrying...")
|
||||
continue
|
||||
|
||||
if score > best_img_score:
|
||||
# Only keep as best if score meets minimum quality bar
|
||||
if score >= 5 and score > best_img_score:
|
||||
best_img_score = score
|
||||
best_img_path = attempt_path
|
||||
elif best_img_path is None and score > 0:
|
||||
# Accept even low-quality image if we have nothing else
|
||||
best_img_score = score
|
||||
best_img_path = attempt_path
|
||||
|
||||
if score == 10:
|
||||
utils.log("MARKETING", " -> Perfect image accepted.")
|
||||
if score >= 9:
|
||||
utils.log("MARKETING", " -> High quality image accepted.")
|
||||
break
|
||||
|
||||
if "scar" in critique.lower() or "deform" in critique.lower() or "blur" in critique.lower():
|
||||
art_prompt += " (Ensure high quality, clear skin, no scars, sharp focus)."
|
||||
# Refine prompt based on critique keywords
|
||||
prompt_additions = []
|
||||
critique_lower = critique.lower() if critique else ""
|
||||
if "scar" in critique_lower or "deform" in critique_lower:
|
||||
prompt_additions.append("perfect anatomy, no deformities")
|
||||
if "blur" in critique_lower or "blurry" in critique_lower:
|
||||
prompt_additions.append("sharp focus, highly detailed")
|
||||
if "text" in critique_lower or "letter" in critique_lower:
|
||||
prompt_additions.append("no text, no letters, no watermarks")
|
||||
if prompt_additions:
|
||||
art_prompt += f". ({', '.join(prompt_additions)})"
|
||||
|
||||
except Exception as e:
|
||||
utils.log("MARKETING", f"Image generation failed: {e}")
|
||||
|
||||
@@ -1303,7 +1303,8 @@ def system_status():
|
||||
models_info = cache_data.get('models', {})
|
||||
except: pass
|
||||
|
||||
return render_template('system_status.html', models=models_info, cache=cache_data, datetime=datetime)
|
||||
return render_template('system_status.html', models=models_info, cache=cache_data, datetime=datetime,
|
||||
image_model=ai.image_model_name, image_source=ai.image_model_source)
|
||||
|
||||
@app.route('/personas')
|
||||
@login_required
|
||||
|
||||
@@ -56,6 +56,22 @@
|
||||
</tr>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
<tr>
|
||||
<td class="fw-bold text-uppercase">Image</td>
|
||||
<td>
|
||||
{% if image_model %}
|
||||
<span class="badge bg-success">{{ image_model }}</span>
|
||||
{% else %}
|
||||
<span class="badge bg-danger">Unavailable</span>
|
||||
{% endif %}
|
||||
</td>
|
||||
<td>
|
||||
<span class="badge bg-light text-dark border">{{ image_source or 'None' }}</span>
|
||||
</td>
|
||||
<td class="small text-muted">
|
||||
{% if image_model %}Imagen model used for book cover generation.{% else %}No image generation model could be initialized. Check GCP credentials or Gemini API key.{% endif %}
|
||||
</td>
|
||||
</tr>
|
||||
{% else %}
|
||||
<tr>
|
||||
<td colspan="3" class="text-center py-4 text-muted">
|
||||
@@ -139,15 +155,32 @@
|
||||
<h5 class="mb-0"><i class="fas fa-clock me-2"></i>Cache Status</h5>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<p class="mb-0">
|
||||
<p class="mb-1">
|
||||
<strong>Last Scan:</strong>
|
||||
{% if cache and cache.timestamp %}
|
||||
{{ datetime.fromtimestamp(cache.timestamp).strftime('%Y-%m-%d %H:%M:%S') }}
|
||||
{{ datetime.fromtimestamp(cache.timestamp).strftime('%Y-%m-%d %H:%M:%S') }} UTC
|
||||
{% else %}
|
||||
Never
|
||||
{% endif %}
|
||||
</p>
|
||||
<p class="text-muted small mb-0">Model selection is cached for 24 hours to save API calls.</p>
|
||||
<p class="mb-0">
|
||||
<strong>Next Refresh:</strong>
|
||||
{% if cache and cache.timestamp %}
|
||||
{% set expires = cache.timestamp + 86400 %}
|
||||
{% set now_ts = datetime.utcnow().timestamp() %}
|
||||
{% if expires > now_ts %}
|
||||
{% set remaining = (expires - now_ts) | int %}
|
||||
{% set h = remaining // 3600 %}{% set m = (remaining % 3600) // 60 %}
|
||||
in {{ h }}h {{ m }}m
|
||||
<span class="badge bg-success ms-1">Cache Valid</span>
|
||||
{% else %}
|
||||
<span class="badge bg-warning text-dark">Expired — click Refresh & Optimize</span>
|
||||
{% endif %}
|
||||
{% else %}
|
||||
<span class="badge bg-warning text-dark">No cache — click Refresh & Optimize</span>
|
||||
{% endif %}
|
||||
</p>
|
||||
<p class="text-muted small mt-2 mb-0">Model selection is cached for 24 hours to save API calls.</p>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock %}
|
||||
Reference in New Issue
Block a user