Auto-commit: v2.14 — Stuck job robustness (heartbeat, retry, stale watcher, granular logging)

- web/db.py: Add last_heartbeat column to Run model
- core/utils.py: Add set_heartbeat_callback() and send_heartbeat()
- web/tasks.py: Add _robust_update_run_status() with 5-retry exponential backoff;
  add db_heartbeat_callback(); remove all bare except:pass on DB status updates;
  set start_time + last_heartbeat when marking run as 'running'
- web/app.py: Add last_heartbeat column migration; add _stale_job_watcher()
  background thread (checks every 5 min, 15-min heartbeat threshold, 2-hr start_time threshold)
- cli/engine.py: Add phase-level logging banners and try/except wrappers in
  process_book(); add utils.send_heartbeat() after each chapter save;
  add start/finish logging in run_generation()

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-21 19:00:29 -05:00
parent 97efd51fd5
commit 81340a18ea
6 changed files with 275 additions and 122 deletions

View File

@@ -31,26 +31,31 @@ def process_book(bp, folder, context="", resume=False, interactive=False):
bp_path = os.path.join(folder, "blueprint_initial.json")
t_step = time.time()
utils.update_progress(5)
if resume and os.path.exists(bp_path):
utils.log("RESUME", "Loading existing blueprint...")
saved_bp = utils.load_json(bp_path)
if saved_bp:
if 'book_metadata' in bp and 'book_metadata' in saved_bp:
for k in ['title', 'author', 'genre', 'target_audience', 'style', 'author_bio', 'author_details']:
if k in bp['book_metadata']:
saved_bp['book_metadata'][k] = bp['book_metadata'][k]
if 'series_metadata' in bp:
saved_bp['series_metadata'] = bp['series_metadata']
bp = saved_bp
utils.log("SYSTEM", "--- Phase: Blueprint ---")
try:
if resume and os.path.exists(bp_path):
utils.log("RESUME", "Loading existing blueprint...")
saved_bp = utils.load_json(bp_path)
if saved_bp:
if 'book_metadata' in bp and 'book_metadata' in saved_bp:
for k in ['title', 'author', 'genre', 'target_audience', 'style', 'author_bio', 'author_details']:
if k in bp['book_metadata']:
saved_bp['book_metadata'][k] = bp['book_metadata'][k]
if 'series_metadata' in bp:
saved_bp['series_metadata'] = bp['series_metadata']
bp = saved_bp
with open(bp_path, "w") as f: json.dump(bp, f, indent=2)
else:
bp = planner.enrich(bp, folder, context)
with open(bp_path, "w") as f: json.dump(bp, f, indent=2)
else:
bp = planner.enrich(bp, folder, context)
with open(bp_path, "w") as f: json.dump(bp, f, indent=2)
# Ensure Persona Exists (Auto-create if missing)
if 'author_details' not in bp['book_metadata'] or not bp['book_metadata']['author_details']:
bp['book_metadata']['author_details'] = style_persona.create_initial_persona(bp, folder)
with open(bp_path, "w") as f: json.dump(bp, f, indent=2)
# Ensure Persona Exists (Auto-create if missing)
if 'author_details' not in bp['book_metadata'] or not bp['book_metadata']['author_details']:
bp['book_metadata']['author_details'] = style_persona.create_initial_persona(bp, folder)
with open(bp_path, "w") as f: json.dump(bp, f, indent=2)
except Exception as _e:
utils.log("ERROR", f"Blueprint phase failed: {type(_e).__name__}: {_e}")
raise
utils.log("TIMING", f"Blueprint Phase: {time.time() - t_step:.1f}s")
@@ -58,29 +63,40 @@ def process_book(bp, folder, context="", resume=False, interactive=False):
events_path = os.path.join(folder, "events.json")
t_step = time.time()
utils.update_progress(10)
if resume and os.path.exists(events_path):
utils.log("RESUME", "Loading existing events...")
events = utils.load_json(events_path)
else:
events = planner.plan_structure(bp, folder)
depth = bp['length_settings']['depth']
target_chaps = bp['length_settings']['chapters']
for d in range(1, depth+1):
events = planner.expand(events, d, target_chaps, bp, folder)
time.sleep(1)
with open(events_path, "w") as f: json.dump(events, f, indent=2)
utils.log("SYSTEM", "--- Phase: Story Structure & Events ---")
try:
if resume and os.path.exists(events_path):
utils.log("RESUME", "Loading existing events...")
events = utils.load_json(events_path)
else:
events = planner.plan_structure(bp, folder)
depth = bp['length_settings']['depth']
target_chaps = bp['length_settings']['chapters']
for d in range(1, depth+1):
utils.log("SYSTEM", f" Expanding story structure depth {d}/{depth}...")
events = planner.expand(events, d, target_chaps, bp, folder)
time.sleep(1)
with open(events_path, "w") as f: json.dump(events, f, indent=2)
except Exception as _e:
utils.log("ERROR", f"Events/Structure phase failed: {type(_e).__name__}: {_e}")
raise
utils.log("TIMING", f"Structure & Expansion: {time.time() - t_step:.1f}s")
# 4. Chapter Plan
chapters_path = os.path.join(folder, "chapters.json")
t_step = time.time()
utils.update_progress(15)
if resume and os.path.exists(chapters_path):
utils.log("RESUME", "Loading existing chapter plan...")
chapters = utils.load_json(chapters_path)
else:
chapters = planner.create_chapter_plan(events, bp, folder)
with open(chapters_path, "w") as f: json.dump(chapters, f, indent=2)
utils.log("SYSTEM", "--- Phase: Chapter Planning ---")
try:
if resume and os.path.exists(chapters_path):
utils.log("RESUME", "Loading existing chapter plan...")
chapters = utils.load_json(chapters_path)
else:
chapters = planner.create_chapter_plan(events, bp, folder)
with open(chapters_path, "w") as f: json.dump(chapters, f, indent=2)
except Exception as _e:
utils.log("ERROR", f"Chapter planning phase failed: {type(_e).__name__}: {_e}")
raise
utils.log("TIMING", f"Chapter Planning: {time.time() - t_step:.1f}s")
# 5. Writing Loop
@@ -126,6 +142,7 @@ def process_book(bp, folder, context="", resume=False, interactive=False):
summary = resp_sum.text
except: summary = "The story continues."
utils.log("SYSTEM", f"--- Phase: Writing ({len(chapters)} chapters planned) ---")
t_step = time.time()
session_chapters = 0
session_time = 0
@@ -222,6 +239,7 @@ def process_book(bp, folder, context="", resume=False, interactive=False):
ms.append({'num': ch['chapter_number'], 'title': ch['title'], 'pov_character': ch.get('pov_character'), 'content': txt})
with open(ms_path, "w") as f: json.dump(ms, f, indent=2)
utils.send_heartbeat() # Signal that the task is still alive
# Update Tracking
tracking = bible_tracker.update_tracking(folder, ch['chapter_number'], txt, tracking)
@@ -284,21 +302,29 @@ def process_book(bp, folder, context="", resume=False, interactive=False):
utils.log("TIMING", f"Writing Phase: {time.time() - t_step:.1f}s")
# Harvest
# Post-Processing
t_step = time.time()
utils.update_progress(92)
bp = bible_tracker.harvest_metadata(bp, folder, ms)
with open(os.path.join(folder, "final_blueprint.json"), "w") as f: json.dump(bp, f, indent=2)
utils.log("SYSTEM", "--- Phase: Post-Processing (Harvest, Cover, Export) ---")
try:
utils.update_progress(92)
utils.log("SYSTEM", " Harvesting metadata from manuscript...")
bp = bible_tracker.harvest_metadata(bp, folder, ms)
with open(os.path.join(folder, "final_blueprint.json"), "w") as f: json.dump(bp, f, indent=2)
# Create Assets
utils.update_progress(95)
marketing_assets.create_marketing_assets(bp, folder, tracking, interactive=interactive)
utils.update_progress(95)
utils.log("SYSTEM", " Generating cover and marketing assets...")
marketing_assets.create_marketing_assets(bp, folder, tracking, interactive=interactive)
# Update Persona
style_persona.update_persona_sample(bp, folder)
utils.log("SYSTEM", " Updating author persona sample...")
style_persona.update_persona_sample(bp, folder)
utils.update_progress(98)
utils.log("SYSTEM", " Compiling final export files...")
exporter.compile_files(bp, ms, folder)
except Exception as _e:
utils.log("ERROR", f"Post-processing phase failed: {type(_e).__name__}: {_e}")
raise
utils.update_progress(98)
exporter.compile_files(bp, ms, folder)
utils.log("TIMING", f"Post-Processing: {time.time() - t_step:.1f}s")
utils.log("SYSTEM", f"Book Finished. Total Time: {time.time() - total_start:.1f}s")
@@ -307,16 +333,17 @@ def process_book(bp, folder, context="", resume=False, interactive=False):
def run_generation(target=None, specific_run_id=None, interactive=False):
utils.log("SYSTEM", "=== run_generation: Initialising AI models ===")
ai_setup.init_models()
if not target: target = config.DEFAULT_BLUEPRINT
data = utils.load_json(target)
if not data:
utils.log("SYSTEM", f"Could not load {target}")
utils.log("ERROR", f"Could not load bible/target: {target}")
return
utils.log("SYSTEM", "Starting Series Generation...")
utils.log("SYSTEM", f"=== Starting Series Generation: {data.get('project_metadata', {}).get('title', 'Untitled')} ===")
project_dir = os.path.dirname(os.path.abspath(target))
runs_base = os.path.join(project_dir, "runs")
@@ -386,7 +413,13 @@ def run_generation(target=None, specific_run_id=None, interactive=False):
book_folder = os.path.join(run_dir, f"Book_{book.get('book_number', i+1)}_{safe_title}")
os.makedirs(book_folder, exist_ok=True)
process_book(bp, book_folder, context=previous_context, resume=resume_mode, interactive=interactive)
utils.log("SYSTEM", f"--- Starting process_book for '{book.get('title')}' in {book_folder} ---")
try:
process_book(bp, book_folder, context=previous_context, resume=resume_mode, interactive=interactive)
except Exception as _e:
utils.log("ERROR", f"process_book failed for Book {book.get('book_number')}: {type(_e).__name__}: {_e}")
raise
utils.log("SYSTEM", f"--- Finished process_book for '{book.get('title')}' ---")
final_bp_path = os.path.join(book_folder, "final_blueprint.json")
if os.path.exists(final_bp_path):