Files
bookapp/marketing/cover.py
Mike Wichers 3a42d1a339 feat: Rebuild cover pipeline with full evaluate→critique→refine→retry quality gates
Major changes to marketing/cover.py:
- Split evaluate_image_quality() into two purpose-built functions:
  * evaluate_cover_art(): 5-rubric scoring (visual impact, genre fit, composition,
    quality, clean image) with auto-fail for visible text (score capped at 4) and
    deductions for deformed anatomy
  * evaluate_cover_layout(): 5-rubric scoring (legibility, typography, placement,
    professional polish, genre signal) with auto-fail for illegible title (capped at 4)
- Added validate_art_prompt(): pre-validates the Imagen prompt before generation —
  strips accidental text instructions, ensures focal point + rule-of-thirds + genre fit
- Added _build_visual_context(): extracts protagonist/antagonist descriptions and key
  themes from tracking data into structured visual context for the art director prompt
- Score thresholds raised to match chapter pipeline: ART_PASSING=7, ART_AUTO_ACCEPT=8,
  LAYOUT_PASSING=7 (was: art>=5 or >0, layout breaks only at ==10)
- Critique-driven art prompt refinement between attempts: full LLM rewrite of the
  Imagen prompt using the evaluator's actionable feedback (not just keyword appending)
- Layout loop now breaks early at score>=7 (was: only at ==10, so never)
- Design prompt strengthened with explicit character/visual context and NO TEXT clause

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-22 22:24:27 -05:00

555 lines
26 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import os
import sys
import json
import shutil
import textwrap
import subprocess
from core import utils
from ai import models as ai_models
from marketing.fonts import download_font
try:
from PIL import Image, ImageDraw, ImageFont, ImageStat
HAS_PIL = True
except ImportError:
HAS_PIL = False
# Score gates (mirrors chapter writing pipeline thresholds)
ART_SCORE_AUTO_ACCEPT = 8 # Stop retrying — image is excellent
ART_SCORE_PASSING = 7 # Acceptable; keep as best candidate
LAYOUT_SCORE_PASSING = 7 # Accept layout and stop retrying
# ---------------------------------------------------------------------------
# Evaluation helpers
# ---------------------------------------------------------------------------
def evaluate_cover_art(image_path, genre, title, model, folder=None):
"""Score generated cover art against a professional book-cover rubric.
Returns (score: int | None, critique: str).
Auto-fail conditions:
- Any visible text/watermarks → score capped at 4
- Blurry or deformed anatomy → deduct 2 points
"""
if not HAS_PIL:
return None, "PIL not installed"
try:
img = Image.open(image_path)
prompt = f"""
ROLE: Professional Book Cover Art Critic
TASK: Score this AI-generated cover art for a {genre} novel titled '{title}'.
SCORING RUBRIC (1-10):
1. VISUAL IMPACT: Is the image immediately arresting? Does it demand attention on a shelf?
2. GENRE FIT: Does the visual style, mood, and colour palette unmistakably signal {genre}?
3. COMPOSITION: Is there a clear focal point? Are the top or bottom thirds usable for title/author text overlay?
4. TECHNICAL QUALITY: Sharp, detailed, free of deformities, blurring, or AI artefacts?
5. CLEAN IMAGE: Absolutely NO text, letters, numbers, watermarks, logos, or UI elements?
SCORING SCALE:
- 9-10: Masterclass cover art, ready for a major publisher
- 7-8: Professional quality, genre-appropriate, minor flaws only
- 5-6: Usable but generic or has one significant flaw
- 1-4: Unusable — major artefacts, wrong genre, deformed figures, or visible text
AUTO-FAIL RULES (apply before scoring):
- If ANY text, letters, watermarks or UI elements are visible → score CANNOT exceed 4. State this explicitly.
- If figures have deformed anatomy or blurring → deduct 2 from your final score.
OUTPUT_FORMAT (JSON): {{"score": int, "critique": "Specific issues citing what to fix in the next attempt.", "actionable": "One concrete change to the image prompt that would improve the next attempt."}}
"""
response = model.generate_content([prompt, img])
model_name = getattr(model, 'name', "logic")
if folder:
utils.log_usage(folder, model_name, response.usage_metadata)
data = json.loads(utils.clean_json(response.text))
score = data.get('score')
critique = data.get('critique', '')
if data.get('actionable'):
critique += f" FIX: {data['actionable']}"
return score, critique
except Exception as e:
return None, str(e)
def evaluate_cover_layout(image_path, title, author, genre, font_name, model, folder=None):
"""Score the finished cover (art + text overlay) as a professional book cover.
Returns (score: int | None, critique: str).
"""
if not HAS_PIL:
return None, "PIL not installed"
try:
img = Image.open(image_path)
prompt = f"""
ROLE: Graphic Design Critic
TASK: Score this finished book cover for '{title}' by {author} ({genre}).
SCORING RUBRIC (1-10):
1. LEGIBILITY: Is the title instantly readable? High contrast against the background?
2. TYPOGRAPHY: Does the font '{font_name}' suit the {genre} genre? Is sizing proportional?
3. PLACEMENT: Is the title placed where it doesn't obscure the focal point? Is the author name readable?
4. PROFESSIONAL POLISH: Does this look like a published, commercially-viable cover?
5. GENRE SIGNAL: At a glance, does the whole cover (art + text) correctly signal {genre}?
SCORING SCALE:
- 9-10: Indistinguishable from a professional published cover
- 7-8: Strong cover, minor refinement would help
- 5-6: Passable but text placement or contrast needs work
- 1-4: Unusable — unreadable text, clashing colours, or amateurish layout
AUTO-FAIL: If the title text is illegible (low contrast, obscured, or missing) → score CANNOT exceed 4.
OUTPUT_FORMAT (JSON): {{"score": int, "critique": "Specific layout issues.", "actionable": "One change to position, colour, or font size that would fix the worst problem."}}
"""
response = model.generate_content([prompt, img])
model_name = getattr(model, 'name', "logic")
if folder:
utils.log_usage(folder, model_name, response.usage_metadata)
data = json.loads(utils.clean_json(response.text))
score = data.get('score')
critique = data.get('critique', '')
if data.get('actionable'):
critique += f" FIX: {data['actionable']}"
return score, critique
except Exception as e:
return None, str(e)
# ---------------------------------------------------------------------------
# Art prompt pre-validation
# ---------------------------------------------------------------------------
def validate_art_prompt(art_prompt, meta, model, folder=None):
"""Pre-validate and improve the image generation prompt before calling Imagen.
Checks for: accidental text instructions, vague focal point, missing composition
guidance, and genre mismatch. Returns improved prompt or original on failure.
"""
genre = meta.get('genre', 'Fiction')
title = meta.get('title', 'Untitled')
check_prompt = f"""
ROLE: Art Director
TASK: Review and improve this image generation prompt for a {genre} book cover titled '{title}'.
CURRENT_PROMPT:
{art_prompt}
CHECK FOR AND FIX:
1. Any instruction to render text, letters, or the title? → Remove it (text is overlaid separately).
2. Is there a specific, memorable FOCAL POINT described? → Add one if missing.
3. Does the colour palette and style match {genre} conventions? → Correct if off.
4. Is RULE OF THIRDS composition mentioned (space at top/bottom for title overlay)? → Add if missing.
5. Does it end with "No text, no letters, no watermarks"? → Ensure this is present.
Return the improved prompt under 200 words.
OUTPUT_FORMAT (JSON): {{"improved_prompt": "..."}}
"""
try:
resp = model.generate_content(check_prompt)
if folder:
utils.log_usage(folder, model.name, resp.usage_metadata)
data = json.loads(utils.clean_json(resp.text))
improved = data.get('improved_prompt', '').strip()
if improved and len(improved) > 50:
utils.log("MARKETING", " -> Art prompt validated and improved.")
return improved
except Exception as e:
utils.log("MARKETING", f" -> Art prompt validation failed: {e}. Using original.")
return art_prompt
# ---------------------------------------------------------------------------
# Visual context helper
# ---------------------------------------------------------------------------
def _build_visual_context(bp, tracking):
"""Extract structured visual context: protagonist, antagonist, key themes."""
lines = []
chars = bp.get('characters', [])
protagonist = next((c for c in chars if 'protagonist' in c.get('role', '').lower()), None)
if protagonist:
lines.append(f"PROTAGONIST: {protagonist.get('name')}{protagonist.get('description', '')[:200]}")
antagonist = next((c for c in chars if 'antagonist' in c.get('role', '').lower()), None)
if antagonist:
lines.append(f"ANTAGONIST: {antagonist.get('name')}{antagonist.get('description', '')[:150]}")
if tracking and tracking.get('characters'):
for name, data in list(tracking['characters'].items())[:2]:
desc = ', '.join(data.get('descriptors', []))[:120]
if desc:
lines.append(f"CHARACTER VISUAL ({name}): {desc}")
if tracking and tracking.get('events'):
recent = [e for e in tracking['events'][-3:] if isinstance(e, str)]
if recent:
lines.append(f"KEY THEMES/EVENTS: {'; '.join(recent)[:200]}")
return "\n".join(lines) if lines else ""
# ---------------------------------------------------------------------------
# Main entry point
# ---------------------------------------------------------------------------
def generate_cover(bp, folder, tracking=None, feedback=None, interactive=False):
if not HAS_PIL:
utils.log("MARKETING", "Pillow not installed. Skipping cover.")
return
utils.log("MARKETING", "Generating cover...")
meta = bp.get('book_metadata', {})
orientation = meta.get('style', {}).get('page_orientation', 'Portrait')
ar = "3:4"
if orientation == "Landscape": ar = "4:3"
elif orientation == "Square": ar = "1:1"
visual_context = _build_visual_context(bp, tracking)
regenerate_image = True
design_instruction = ""
if os.path.exists(os.path.join(folder, "cover_art.png")) and not feedback:
regenerate_image = False
if feedback and feedback.strip():
utils.log("MARKETING", f"Analysing feedback: '{feedback}'...")
analysis_prompt = f"""
ROLE: Design Assistant
TASK: Analyse user feedback on a book cover.
FEEDBACK: "{feedback}"
DECISION:
1. Keep the background image; change only text/layout/colour → REGENERATE_LAYOUT
2. Create a completely new background image → REGENERATE_IMAGE
OUTPUT_FORMAT (JSON): {{"action": "REGENERATE_LAYOUT" or "REGENERATE_IMAGE", "instruction": "Specific instruction for the Art Director."}}
"""
try:
resp = ai_models.model_logic.generate_content(analysis_prompt)
utils.log_usage(folder, ai_models.model_logic.name, resp.usage_metadata)
decision = json.loads(utils.clean_json(resp.text))
if decision.get('action') == 'REGENERATE_LAYOUT':
regenerate_image = False
utils.log("MARKETING", "Feedback: keeping image, regenerating layout only.")
design_instruction = decision.get('instruction', feedback)
except Exception:
utils.log("MARKETING", "Feedback analysis failed. Defaulting to full regeneration.")
genre = meta.get('genre', 'Fiction')
tone = meta.get('style', {}).get('tone', 'Balanced')
genre_style_map = {
'thriller': 'dark, cinematic, high-contrast photography style',
'mystery': 'moody, atmospheric, noir-inspired painting',
'romance': 'warm, painterly, soft-focus illustration',
'fantasy': 'epic digital painting, rich colours, mythic scale',
'science fiction': 'sharp digital art, cool palette, futuristic',
'horror': 'unsettling dark atmospheric painting, desaturated',
'historical fiction':'classical oil painting style, period-accurate',
'young adult': 'vibrant illustrated style, bold colours',
}
suggested_style = genre_style_map.get(genre.lower(), 'professional digital illustration')
design_prompt = f"""
ROLE: Art Director
TASK: Design a professional book cover for an AI image generator.
BOOK:
- TITLE: {meta.get('title')}
- GENRE: {genre}
- TONE: {tone}
- SUGGESTED_VISUAL_STYLE: {suggested_style}
VISUAL_CONTEXT (characters and themes from the finished story — use these):
{visual_context if visual_context else "Use strong genre conventions."}
USER_FEEDBACK: {feedback if feedback else "None"}
DESIGN_INSTRUCTION: {design_instruction if design_instruction else "Create a compelling, genre-appropriate cover."}
COVER_ART_RULES:
- The art_prompt MUST produce an image with ABSOLUTELY NO text, letters, numbers, watermarks, UI elements, or logos. Text is overlaid separately.
- Describe a specific, memorable FOCAL POINT (e.g. protagonist mid-action, a symbolic object, a dramatic landscape).
- Use RULE OF THIRDS composition — preserve visual space at top AND bottom for title/author text overlay.
- Describe LIGHTING that reinforces the tone (e.g. "harsh neon backlight", "golden hour", "cold winter dawn").
- Specify the COLOUR PALETTE explicitly (e.g. "deep crimson and shadow-black", "soft rose gold and ivory cream").
- If characters are described in VISUAL_CONTEXT, their appearance MUST match those descriptions exactly.
- End the art_prompt with: "No text, no letters, no watermarks, no UI elements. {suggested_style} quality, 8k detail."
OUTPUT_FORMAT (JSON only, no markdown wrapper):
{{
"font_name": "One Google Font suited to {genre} (e.g. Cinzel for fantasy, Oswald for thriller, Playfair Display for romance)",
"primary_color": "#HexCode",
"text_color": "#HexCode (high contrast against primary_color)",
"art_prompt": "Detailed image generation prompt. Style → Focal point → Composition → Lighting → Colour palette → Characters (if any). End with the NO TEXT clause."
}}
"""
try:
response = ai_models.model_artist.generate_content(design_prompt)
utils.log_usage(folder, ai_models.model_artist.name, response.usage_metadata)
design = json.loads(utils.clean_json(response.text))
except Exception as e:
utils.log("MARKETING", f"Cover design failed: {e}")
return
bg_color = design.get('primary_color', '#252570')
art_prompt = design.get('art_prompt', f"Cover art for {meta.get('title')}")
font_name = design.get('font_name') or 'Playfair Display'
# Pre-validate and improve the art prompt before handing to Imagen
art_prompt = validate_art_prompt(art_prompt, meta, ai_models.model_logic, folder)
with open(os.path.join(folder, "cover_art_prompt.txt"), "w") as f:
f.write(art_prompt)
img = None
width, height = 600, 900
# -----------------------------------------------------------------------
# Phase 1: Art generation loop (evaluate → critique → refine → retry)
# -----------------------------------------------------------------------
best_art_score = 0
best_art_path = None
current_art_prompt = art_prompt
MAX_ART_ATTEMPTS = 3
if regenerate_image:
for attempt in range(1, MAX_ART_ATTEMPTS + 1):
utils.log("MARKETING", f"Generating cover art (Attempt {attempt}/{MAX_ART_ATTEMPTS})...")
attempt_path = os.path.join(folder, f"cover_art_attempt_{attempt}.png")
gen_status = "success"
try:
if not ai_models.model_image:
raise ImportError("No image generation model available.")
try:
result = ai_models.model_image.generate_images(
prompt=current_art_prompt, number_of_images=1, aspect_ratio=ar)
except Exception as img_err:
err_lower = str(img_err).lower()
if ai_models.HAS_VERTEX and ("resource" in err_lower or "quota" in err_lower):
try:
utils.log("MARKETING", "⚠️ Imagen 3 failed. Trying Imagen 3 Fast...")
fb = ai_models.VertexImageModel.from_pretrained("imagen-3.0-fast-generate-001")
result = fb.generate_images(prompt=current_art_prompt, number_of_images=1, aspect_ratio=ar)
gen_status = "success_fast"
except Exception:
utils.log("MARKETING", "⚠️ Imagen 3 Fast failed. Trying Imagen 2...")
fb = ai_models.VertexImageModel.from_pretrained("imagegeneration@006")
result = fb.generate_images(prompt=current_art_prompt, number_of_images=1, aspect_ratio=ar)
gen_status = "success_fallback"
else:
raise img_err
result.images[0].save(attempt_path)
utils.log_usage(folder, "imagen", image_count=1)
score, critique = evaluate_cover_art(
attempt_path, genre, meta.get('title', ''), ai_models.model_logic, folder)
if score is None:
score = 0
utils.log("MARKETING", f" -> Art Score: {score}/10. Critique: {critique}")
utils.log_image_attempt(folder, "cover", current_art_prompt,
f"cover_art_attempt_{attempt}.png", gen_status,
score=score, critique=critique)
if interactive:
try:
if os.name == 'nt': os.startfile(attempt_path)
elif sys.platform == 'darwin': subprocess.call(('open', attempt_path))
else: subprocess.call(('xdg-open', attempt_path))
except Exception:
pass
from rich.prompt import Confirm
if Confirm.ask(f"Accept cover art attempt {attempt} (score {score})?", default=True):
best_art_path = attempt_path
best_art_score = score
break
else:
utils.log("MARKETING", "User rejected art. Regenerating...")
continue
# Track best image — prefer passing threshold; keep first usable as fallback
if score >= ART_SCORE_PASSING and score > best_art_score:
best_art_score = score
best_art_path = attempt_path
elif best_art_path is None and score > 0:
best_art_score = score
best_art_path = attempt_path
if score >= ART_SCORE_AUTO_ACCEPT:
utils.log("MARKETING", " -> High-quality art accepted early.")
break
# Critique-driven prompt refinement for next attempt
if attempt < MAX_ART_ATTEMPTS and critique:
refine_req = f"""
ROLE: Art Director
TASK: Rewrite the image prompt to fix the critique below. Keep under 200 words.
CRITIQUE: {critique}
ORIGINAL_PROMPT: {current_art_prompt}
RULES:
- Preserve genre style, focal point, and colour palette unless explicitly criticised.
- If text/watermarks were visible: reinforce "absolutely no text, no letters, no watermarks."
- If anatomy was deformed: add "perfect anatomy, professional figure illustration."
- If blurry: add "tack-sharp focus, highly detailed."
OUTPUT_FORMAT (JSON): {{"improved_prompt": "..."}}
"""
try:
rr = ai_models.model_logic.generate_content(refine_req)
utils.log_usage(folder, ai_models.model_logic.name, rr.usage_metadata)
rd = json.loads(utils.clean_json(rr.text))
improved = rd.get('improved_prompt', '').strip()
if improved and len(improved) > 50:
current_art_prompt = improved
utils.log("MARKETING", " -> Art prompt refined for next attempt.")
except Exception:
pass
except Exception as e:
utils.log("MARKETING", f"Image generation attempt {attempt} failed: {e}")
if "quota" in str(e).lower():
break
if best_art_path and os.path.exists(best_art_path):
final_art_path = os.path.join(folder, "cover_art.png")
if best_art_path != final_art_path:
shutil.copy(best_art_path, final_art_path)
img = Image.open(final_art_path).resize((width, height)).convert("RGB")
utils.log("MARKETING", f" -> Best art: {best_art_score}/10.")
else:
utils.log("MARKETING", "⚠️ No usable art generated. Falling back to solid colour cover.")
img = Image.new('RGB', (width, height), color=bg_color)
utils.log_image_attempt(folder, "cover", art_prompt, "cover.png", "fallback_solid")
else:
final_art_path = os.path.join(folder, "cover_art.png")
if os.path.exists(final_art_path):
utils.log("MARKETING", "Using existing cover art (layout update only).")
img = Image.open(final_art_path).resize((width, height)).convert("RGB")
else:
utils.log("MARKETING", "Existing art not found. Using solid colour fallback.")
img = Image.new('RGB', (width, height), color=bg_color)
if img is None:
utils.log("MARKETING", "Cover generation aborted — no image available.")
return
font_path = download_font(font_name)
# -----------------------------------------------------------------------
# Phase 2: Text layout loop (evaluate → critique → adjust → retry)
# -----------------------------------------------------------------------
best_layout_score = 0
best_layout_path = None
base_layout_prompt = f"""
ROLE: Graphic Designer
TASK: Determine precise text layout coordinates for a 600×900 book cover image.
BOOK:
- TITLE: {meta.get('title')}
- AUTHOR: {meta.get('author', 'Unknown')}
- GENRE: {genre}
- FONT: {font_name}
- TEXT_COLOR: {design.get('text_color', '#FFFFFF')}
PLACEMENT RULES:
- Title in top third OR bottom third (not centre — that obscures the focal art).
- Author name in the opposite zone, or just below the title.
- Font sizes: title ~60-80px, author ~28-36px for a 600px-wide canvas.
- Do NOT place text over faces or the primary focal point.
- Coordinates are the CENTER of the text block (x=300 is horizontal centre).
{f"USER FEEDBACK: {feedback}. Adjust placement/colour accordingly." if feedback else ""}
OUTPUT_FORMAT (JSON):
{{
"title": {{"x": Int, "y": Int, "font_size": Int, "font_name": "{font_name}", "color": "#Hex"}},
"author": {{"x": Int, "y": Int, "font_size": Int, "font_name": "{font_name}", "color": "#Hex"}}
}}
"""
layout_prompt = base_layout_prompt
MAX_LAYOUT_ATTEMPTS = 5
for attempt in range(1, MAX_LAYOUT_ATTEMPTS + 1):
utils.log("MARKETING", f"Designing text layout (Attempt {attempt}/{MAX_LAYOUT_ATTEMPTS})...")
try:
resp = ai_models.model_writer.generate_content([layout_prompt, img])
utils.log_usage(folder, ai_models.model_writer.name, resp.usage_metadata)
layout = json.loads(utils.clean_json(resp.text))
if isinstance(layout, list):
layout = layout[0] if layout else {}
except Exception as e:
utils.log("MARKETING", f"Layout generation failed: {e}")
continue
img_copy = img.copy()
draw = ImageDraw.Draw(img_copy)
def draw_element(key, text_override=None):
elem = layout.get(key)
if not elem:
return
if isinstance(elem, list):
elem = elem[0] if elem else {}
text = text_override if text_override else elem.get('text')
if not text:
return
f_name = elem.get('font_name') or font_name
f_p = download_font(f_name)
try:
fnt = ImageFont.truetype(f_p, elem.get('font_size', 40)) if f_p else ImageFont.load_default()
except Exception:
fnt = ImageFont.load_default()
x, y = elem.get('x', 300), elem.get('y', 450)
color = elem.get('color') or design.get('text_color', '#FFFFFF')
avg_w = fnt.getlength("A")
wrap_w = int(550 / avg_w) if avg_w > 0 else 20
lines = textwrap.wrap(text, width=wrap_w)
line_heights = []
for ln in lines:
bbox = draw.textbbox((0, 0), ln, font=fnt)
line_heights.append(bbox[3] - bbox[1] + 10)
total_h = sum(line_heights)
current_y = y - (total_h // 2)
for idx, ln in enumerate(lines):
bbox = draw.textbbox((0, 0), ln, font=fnt)
lx = x - ((bbox[2] - bbox[0]) / 2)
draw.text((lx, current_y), ln, font=fnt, fill=color)
current_y += line_heights[idx]
draw_element('title', meta.get('title'))
draw_element('author', meta.get('author'))
attempt_path = os.path.join(folder, f"cover_layout_attempt_{attempt}.png")
img_copy.save(attempt_path)
score, critique = evaluate_cover_layout(
attempt_path, meta.get('title', ''), meta.get('author', ''), genre, font_name,
ai_models.model_writer, folder
)
if score is None:
score = 0
utils.log("MARKETING", f" -> Layout Score: {score}/10. Critique: {critique}")
if score > best_layout_score:
best_layout_score = score
best_layout_path = attempt_path
if score >= LAYOUT_SCORE_PASSING:
utils.log("MARKETING", f" -> Layout accepted (score {score}{LAYOUT_SCORE_PASSING}).")
break
if attempt < MAX_LAYOUT_ATTEMPTS:
layout_prompt = (base_layout_prompt
+ f"\n\nCRITIQUE OF ATTEMPT {attempt}: {critique}\n"
+ "Adjust coordinates, font_size, or color to fix these issues exactly.")
if best_layout_path:
shutil.copy(best_layout_path, os.path.join(folder, "cover.png"))
utils.log("MARKETING", f"Cover saved. Best layout score: {best_layout_score}/10.")
else:
utils.log("MARKETING", "⚠️ No layout produced. Cover not saved.")