From 1f01fedf00ec18a36a9e5343b2d171c31b72e524 Mon Sep 17 00:00:00 2001 From: Mike Wichers Date: Sat, 21 Feb 2026 10:50:00 -0500 Subject: [PATCH] =?UTF-8?q?Auto-commit:=20v2.9=20=E2=80=94=20Fix=20backgro?= =?UTF-8?q?und=20task=20hangs=20(OAuth=20headless=20guard,=20SQLite=20time?= =?UTF-8?q?outs,=20log=20touch)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - ai/setup.py: Added threading import; OAuth block now detects background/headless threads and skips run_local_server to prevent indefinite blocking. Logs a clear warning and falls back to ADC for Vertex AI. Token file only written when creds are not None. - web/tasks.py: All sqlite3.connect() calls now use timeout=30, check_same_thread=False. OperationalError on the initial status update is caught and logged via utils.log. generate_book_task now touches initial_log immediately so the UI polling endpoint always finds an existing file even if the worker crashes on the next line. - ai_blueprint.md: Bumped to v2.9; Section 12.D sub-items 1-3 marked ✅; item 13 added to summary. Co-Authored-By: Claude Sonnet 4.6 --- ai/setup.py | 24 ++++++++++++++++++------ ai_blueprint.md | 13 ++++++++++++- web/tasks.py | 22 +++++++++++++++++----- 3 files changed, 47 insertions(+), 12 deletions(-) diff --git a/ai/setup.py b/ai/setup.py index a46fe35..19c8667 100644 --- a/ai/setup.py +++ b/ai/setup.py @@ -2,6 +2,7 @@ import os import json import time import warnings +import threading import google.generativeai as genai from core import config, utils from ai import models @@ -256,19 +257,30 @@ def init_models(force=False): if os.path.exists(token_path): creds = models.Credentials.from_authorized_user_file(token_path, SCOPES) + _is_headless = threading.current_thread() is not threading.main_thread() + if not creds or not creds.valid: if creds and creds.expired and creds.refresh_token: try: creds.refresh(models.Request()) except Exception: - utils.log("SYSTEM", "Token refresh failed. Re-authenticating...") + if _is_headless: + utils.log("SYSTEM", "⚠️ Token refresh failed and cannot re-authenticate in a background/headless thread. Vertex AI will use ADC or be unavailable.") + creds = None + else: + utils.log("SYSTEM", "Token refresh failed. Re-authenticating...") + flow = models.InstalledAppFlow.from_client_secrets_file(gac, SCOPES) + creds = flow.run_local_server(port=0) + else: + if _is_headless: + utils.log("SYSTEM", "⚠️ OAuth Client ID requires browser login but running in headless/background mode. Skipping interactive auth. Use a Service Account key for Vertex AI in background tasks.") + creds = None + else: + utils.log("SYSTEM", "OAuth Client ID detected. Launching browser to authenticate...") flow = models.InstalledAppFlow.from_client_secrets_file(gac, SCOPES) creds = flow.run_local_server(port=0) - else: - utils.log("SYSTEM", "OAuth Client ID detected. Launching browser to authenticate...") - flow = models.InstalledAppFlow.from_client_secrets_file(gac, SCOPES) - creds = flow.run_local_server(port=0) - with open(token_path, 'w') as token: token.write(creds.to_json()) + if creds: + with open(token_path, 'w') as token: token.write(creds.to_json()) utils.log("SYSTEM", "✅ Authenticated via OAuth Client ID.") except Exception as e: diff --git a/ai_blueprint.md b/ai_blueprint.md index 92e1a81..308157d 100644 --- a/ai_blueprint.md +++ b/ai_blueprint.md @@ -1,4 +1,4 @@ -# AI Context Optimization Blueprint (v2.8) +# AI Context Optimization Blueprint (v2.9) This blueprint outlines architectural improvements for how AI context is managed during the writing process. The goal is to provide the AI (Claude/Gemini) with **better, highly-targeted context upfront**, which will dramatically improve first-draft quality and reduce the reliance on expensive, time-consuming quality checks and rewrites (currently up to 5 attempts). @@ -147,6 +147,16 @@ Three bugs combined to produce a blank page or silent failure when creating a ne 5. ✅ `web/routes/project.py` (`project_setup_wizard`): When `model_logic` was `None`, the route silently redirected to the dashboard with a flash the user missed. Now renders the setup form with a complete default suggestions dict (all fields populated, lists as `[]`) and a visible `"warning"` flash so the user can fill in details manually. *(Implemented v2.8)* 6. ✅ `web/routes/project.py` (`create_project_final`): `planner.enrich()` was called with the full project bible dict. `enrich()` reads `bp.get('manual_instruction')` from the top level (got `'A generic story'` fallback — the real concept was in `bible['books'][0]['manual_instruction']`), and wrote enriched data into a new `book_metadata` key instead of the bible's `books[0]`. Fixed to build a proper per-book blueprint, call enrich, and merge `characters`, `plot_beats`, and `structure_prompt` back into the correct bible locations. *(Implemented v2.8)* +### D. "Waiting for logs" / "Preparing environment" Background Task Hangs +The UI gets stuck indefinitely because the background Huey worker thread hangs before emitting the first "Starting Job" log, or fails to connect to the database. + +**Places that impact this and their fixes:** +1. ✅ **OAuth Browser Prompt in Background Thread**: `ai/setup.py` — Added `import threading`; the OAuth block now checks `threading.current_thread() is not threading.main_thread()`. If running headlessly, `run_local_server` is skipped, `creds` is set to `None`, and a clear warning is logged. Vertex AI falls back to ADC. Token is only written if `creds` is not `None`. *(Implemented v2.9)* + +2. ✅ **SQLite Database Locking Timeout**: `web/tasks.py` — All `sqlite3.connect()` calls now use `timeout=30, check_same_thread=False`. The initial status-update `OperationalError` is caught and logged via `utils.log` so it appears in the log file rather than silently disappearing. *(Implemented v2.9)* + +3. ✅ **Missing Initial Log File Creation**: `web/tasks.py` `generate_book_task` — The `initial_log` path is now `open(…, 'a')`-touched immediately after construction and before `utils.set_log_file()`, guaranteeing the file exists for UI polling even if the worker crashes on the very next line. *(Implemented v2.9)* + ## Summary of Actionable Changes for Implementation Mode: 1. ✅ Modify `writer.py` to filter `chars_for_writer` based on characters named in `beats`. *(Implemented in v1.5.0)* 2. ✅ Modify `writer.py` `prev_content` logic to extract the *tail* of the chapter, not a blind slice. *(Implemented in v1.5.0 via `utils.truncate_to_tokens` tail logic)* @@ -160,3 +170,4 @@ Three bugs combined to produce a blank page or silent failure when creating a ne 10. ✅ **(v2.6)** "Redo Book" form in `consistency_report.html` + `revise_book` route in `run.py` that creates a new run with the instruction applied as bible feedback. *(Implemented v2.6)* 11. ✅ **(v2.7)** Series Continuity Fix: `series_metadata` (is_series, series_title, book_number, total_books) injected as `SERIES_CONTEXT` into `story/planner.py` (`enrich`, `plan_structure`), `story/writer.py` (`write_chapter`), and `story/editor.py` (`evaluate_chapter_quality`) prompts with position-aware guidance per book number. *(Implemented v2.7)* 12. ✅ **(v2.8)** Infrastructure & UI Bug Fixes: API timeouts (180s generation, 30s list_models) in `ai/models.py` + `ai/setup.py`; Huey consumer moved to module level with reloader guard in `web/app.py`; Jinja2 `UndefinedError` fix for `tropes`/`formatting_rules` in `project_setup.html`; `project_setup_wizard` now renders form instead of silent redirect when models fail; `create_project_final` `enrich()` call fixed to use correct per-book blueprint structure. *(Implemented v2.8)* +13. ✅ **(v2.9)** Background Task Hang Fixes: OAuth headless guard in `ai/setup.py` (skips `run_local_server` in non-main threads, logs warning, falls back to ADC); SQLite `timeout=30, check_same_thread=False` on all connections in `web/tasks.py`; initial log file touched immediately in `generate_book_task` so UI polling never sees an empty/missing file. *(Implemented v2.9)* diff --git a/web/tasks.py b/web/tasks.py index 294f04b..884e38a 100644 --- a/web/tasks.py +++ b/web/tasks.py @@ -20,7 +20,7 @@ def db_log_callback(db_path, run_id, phase, msg): """Writes log entry directly to SQLite to avoid Flask Context issues in threads.""" for _ in range(5): try: - with sqlite3.connect(db_path, timeout=5) as conn: + with sqlite3.connect(db_path, timeout=30, check_same_thread=False) as conn: conn.execute("INSERT INTO log_entry (run_id, timestamp, phase, message) VALUES (?, ?, ?, ?)", (run_id, datetime.utcnow(), phase, str(msg))) break @@ -32,7 +32,7 @@ def db_progress_callback(db_path, run_id, percent): """Updates run progress in SQLite.""" for _ in range(5): try: - with sqlite3.connect(db_path, timeout=5) as conn: + with sqlite3.connect(db_path, timeout=30, check_same_thread=False) as conn: conn.execute("UPDATE run SET progress = ? WHERE id = ?", (percent, run_id)) break except sqlite3.OperationalError: time.sleep(0.1) @@ -48,6 +48,15 @@ def generate_book_task(run_id, project_path, bible_path, allow_copy=True, feedba # Log to project root initially until run folder is created by engine initial_log = os.path.join(project_path, log_filename) + + # Touch the file immediately so the UI has something to poll even if the + # worker crashes before the first utils.log() call. + try: + with open(initial_log, 'a', encoding='utf-8') as _f: + pass + except Exception: + pass + utils.set_log_file(initial_log) # Hook up Database Logging @@ -57,9 +66,12 @@ def generate_book_task(run_id, project_path, bible_path, allow_copy=True, feedba # Set Status to Running try: - with sqlite3.connect(db_path, timeout=10) as conn: + with sqlite3.connect(db_path, timeout=30, check_same_thread=False) as conn: conn.execute("UPDATE run SET status = 'running' WHERE id = ?", (run_id,)) - except: pass + except sqlite3.OperationalError as e: + utils.log("SYSTEM", f"⚠️ Database locked when setting run status (run {run_id}): {e}") + except Exception: + pass utils.log("SYSTEM", f"Starting Job #{run_id}") @@ -185,7 +197,7 @@ def generate_book_task(run_id, project_path, bible_path, allow_copy=True, feedba # 4. Update Database with Final Status try: - with sqlite3.connect(db_path, timeout=10) as conn: + with sqlite3.connect(db_path, timeout=30, check_same_thread=False) as conn: conn.execute("UPDATE run SET status = ?, cost = ?, end_time = ?, log_file = ?, progress = 100 WHERE id = ?", (status, total_cost, datetime.utcnow(), final_log_path, run_id)) except Exception as e: