Auto-commit: v2.9 — Fix background task hangs (OAuth headless guard, SQLite timeouts, log touch)
- ai/setup.py: Added threading import; OAuth block now detects background/headless threads and skips run_local_server to prevent indefinite blocking. Logs a clear warning and falls back to ADC for Vertex AI. Token file only written when creds are not None. - web/tasks.py: All sqlite3.connect() calls now use timeout=30, check_same_thread=False. OperationalError on the initial status update is caught and logged via utils.log. generate_book_task now touches initial_log immediately so the UI polling endpoint always finds an existing file even if the worker crashes on the next line. - ai_blueprint.md: Bumped to v2.9; Section 12.D sub-items 1-3 marked ✅; item 13 added to summary. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
24
ai/setup.py
24
ai/setup.py
@@ -2,6 +2,7 @@ import os
|
||||
import json
|
||||
import time
|
||||
import warnings
|
||||
import threading
|
||||
import google.generativeai as genai
|
||||
from core import config, utils
|
||||
from ai import models
|
||||
@@ -256,19 +257,30 @@ def init_models(force=False):
|
||||
if os.path.exists(token_path):
|
||||
creds = models.Credentials.from_authorized_user_file(token_path, SCOPES)
|
||||
|
||||
_is_headless = threading.current_thread() is not threading.main_thread()
|
||||
|
||||
if not creds or not creds.valid:
|
||||
if creds and creds.expired and creds.refresh_token:
|
||||
try:
|
||||
creds.refresh(models.Request())
|
||||
except Exception:
|
||||
utils.log("SYSTEM", "Token refresh failed. Re-authenticating...")
|
||||
if _is_headless:
|
||||
utils.log("SYSTEM", "⚠️ Token refresh failed and cannot re-authenticate in a background/headless thread. Vertex AI will use ADC or be unavailable.")
|
||||
creds = None
|
||||
else:
|
||||
utils.log("SYSTEM", "Token refresh failed. Re-authenticating...")
|
||||
flow = models.InstalledAppFlow.from_client_secrets_file(gac, SCOPES)
|
||||
creds = flow.run_local_server(port=0)
|
||||
else:
|
||||
if _is_headless:
|
||||
utils.log("SYSTEM", "⚠️ OAuth Client ID requires browser login but running in headless/background mode. Skipping interactive auth. Use a Service Account key for Vertex AI in background tasks.")
|
||||
creds = None
|
||||
else:
|
||||
utils.log("SYSTEM", "OAuth Client ID detected. Launching browser to authenticate...")
|
||||
flow = models.InstalledAppFlow.from_client_secrets_file(gac, SCOPES)
|
||||
creds = flow.run_local_server(port=0)
|
||||
else:
|
||||
utils.log("SYSTEM", "OAuth Client ID detected. Launching browser to authenticate...")
|
||||
flow = models.InstalledAppFlow.from_client_secrets_file(gac, SCOPES)
|
||||
creds = flow.run_local_server(port=0)
|
||||
with open(token_path, 'w') as token: token.write(creds.to_json())
|
||||
if creds:
|
||||
with open(token_path, 'w') as token: token.write(creds.to_json())
|
||||
|
||||
utils.log("SYSTEM", "✅ Authenticated via OAuth Client ID.")
|
||||
except Exception as e:
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# AI Context Optimization Blueprint (v2.8)
|
||||
# AI Context Optimization Blueprint (v2.9)
|
||||
|
||||
This blueprint outlines architectural improvements for how AI context is managed during the writing process. The goal is to provide the AI (Claude/Gemini) with **better, highly-targeted context upfront**, which will dramatically improve first-draft quality and reduce the reliance on expensive, time-consuming quality checks and rewrites (currently up to 5 attempts).
|
||||
|
||||
@@ -147,6 +147,16 @@ Three bugs combined to produce a blank page or silent failure when creating a ne
|
||||
5. ✅ `web/routes/project.py` (`project_setup_wizard`): When `model_logic` was `None`, the route silently redirected to the dashboard with a flash the user missed. Now renders the setup form with a complete default suggestions dict (all fields populated, lists as `[]`) and a visible `"warning"` flash so the user can fill in details manually. *(Implemented v2.8)*
|
||||
6. ✅ `web/routes/project.py` (`create_project_final`): `planner.enrich()` was called with the full project bible dict. `enrich()` reads `bp.get('manual_instruction')` from the top level (got `'A generic story'` fallback — the real concept was in `bible['books'][0]['manual_instruction']`), and wrote enriched data into a new `book_metadata` key instead of the bible's `books[0]`. Fixed to build a proper per-book blueprint, call enrich, and merge `characters`, `plot_beats`, and `structure_prompt` back into the correct bible locations. *(Implemented v2.8)*
|
||||
|
||||
### D. "Waiting for logs" / "Preparing environment" Background Task Hangs
|
||||
The UI gets stuck indefinitely because the background Huey worker thread hangs before emitting the first "Starting Job" log, or fails to connect to the database.
|
||||
|
||||
**Places that impact this and their fixes:**
|
||||
1. ✅ **OAuth Browser Prompt in Background Thread**: `ai/setup.py` — Added `import threading`; the OAuth block now checks `threading.current_thread() is not threading.main_thread()`. If running headlessly, `run_local_server` is skipped, `creds` is set to `None`, and a clear warning is logged. Vertex AI falls back to ADC. Token is only written if `creds` is not `None`. *(Implemented v2.9)*
|
||||
|
||||
2. ✅ **SQLite Database Locking Timeout**: `web/tasks.py` — All `sqlite3.connect()` calls now use `timeout=30, check_same_thread=False`. The initial status-update `OperationalError` is caught and logged via `utils.log` so it appears in the log file rather than silently disappearing. *(Implemented v2.9)*
|
||||
|
||||
3. ✅ **Missing Initial Log File Creation**: `web/tasks.py` `generate_book_task` — The `initial_log` path is now `open(…, 'a')`-touched immediately after construction and before `utils.set_log_file()`, guaranteeing the file exists for UI polling even if the worker crashes on the very next line. *(Implemented v2.9)*
|
||||
|
||||
## Summary of Actionable Changes for Implementation Mode:
|
||||
1. ✅ Modify `writer.py` to filter `chars_for_writer` based on characters named in `beats`. *(Implemented in v1.5.0)*
|
||||
2. ✅ Modify `writer.py` `prev_content` logic to extract the *tail* of the chapter, not a blind slice. *(Implemented in v1.5.0 via `utils.truncate_to_tokens` tail logic)*
|
||||
@@ -160,3 +170,4 @@ Three bugs combined to produce a blank page or silent failure when creating a ne
|
||||
10. ✅ **(v2.6)** "Redo Book" form in `consistency_report.html` + `revise_book` route in `run.py` that creates a new run with the instruction applied as bible feedback. *(Implemented v2.6)*
|
||||
11. ✅ **(v2.7)** Series Continuity Fix: `series_metadata` (is_series, series_title, book_number, total_books) injected as `SERIES_CONTEXT` into `story/planner.py` (`enrich`, `plan_structure`), `story/writer.py` (`write_chapter`), and `story/editor.py` (`evaluate_chapter_quality`) prompts with position-aware guidance per book number. *(Implemented v2.7)*
|
||||
12. ✅ **(v2.8)** Infrastructure & UI Bug Fixes: API timeouts (180s generation, 30s list_models) in `ai/models.py` + `ai/setup.py`; Huey consumer moved to module level with reloader guard in `web/app.py`; Jinja2 `UndefinedError` fix for `tropes`/`formatting_rules` in `project_setup.html`; `project_setup_wizard` now renders form instead of silent redirect when models fail; `create_project_final` `enrich()` call fixed to use correct per-book blueprint structure. *(Implemented v2.8)*
|
||||
13. ✅ **(v2.9)** Background Task Hang Fixes: OAuth headless guard in `ai/setup.py` (skips `run_local_server` in non-main threads, logs warning, falls back to ADC); SQLite `timeout=30, check_same_thread=False` on all connections in `web/tasks.py`; initial log file touched immediately in `generate_book_task` so UI polling never sees an empty/missing file. *(Implemented v2.9)*
|
||||
|
||||
22
web/tasks.py
22
web/tasks.py
@@ -20,7 +20,7 @@ def db_log_callback(db_path, run_id, phase, msg):
|
||||
"""Writes log entry directly to SQLite to avoid Flask Context issues in threads."""
|
||||
for _ in range(5):
|
||||
try:
|
||||
with sqlite3.connect(db_path, timeout=5) as conn:
|
||||
with sqlite3.connect(db_path, timeout=30, check_same_thread=False) as conn:
|
||||
conn.execute("INSERT INTO log_entry (run_id, timestamp, phase, message) VALUES (?, ?, ?, ?)",
|
||||
(run_id, datetime.utcnow(), phase, str(msg)))
|
||||
break
|
||||
@@ -32,7 +32,7 @@ def db_progress_callback(db_path, run_id, percent):
|
||||
"""Updates run progress in SQLite."""
|
||||
for _ in range(5):
|
||||
try:
|
||||
with sqlite3.connect(db_path, timeout=5) as conn:
|
||||
with sqlite3.connect(db_path, timeout=30, check_same_thread=False) as conn:
|
||||
conn.execute("UPDATE run SET progress = ? WHERE id = ?", (percent, run_id))
|
||||
break
|
||||
except sqlite3.OperationalError: time.sleep(0.1)
|
||||
@@ -48,6 +48,15 @@ def generate_book_task(run_id, project_path, bible_path, allow_copy=True, feedba
|
||||
|
||||
# Log to project root initially until run folder is created by engine
|
||||
initial_log = os.path.join(project_path, log_filename)
|
||||
|
||||
# Touch the file immediately so the UI has something to poll even if the
|
||||
# worker crashes before the first utils.log() call.
|
||||
try:
|
||||
with open(initial_log, 'a', encoding='utf-8') as _f:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
utils.set_log_file(initial_log)
|
||||
|
||||
# Hook up Database Logging
|
||||
@@ -57,9 +66,12 @@ def generate_book_task(run_id, project_path, bible_path, allow_copy=True, feedba
|
||||
|
||||
# Set Status to Running
|
||||
try:
|
||||
with sqlite3.connect(db_path, timeout=10) as conn:
|
||||
with sqlite3.connect(db_path, timeout=30, check_same_thread=False) as conn:
|
||||
conn.execute("UPDATE run SET status = 'running' WHERE id = ?", (run_id,))
|
||||
except: pass
|
||||
except sqlite3.OperationalError as e:
|
||||
utils.log("SYSTEM", f"⚠️ Database locked when setting run status (run {run_id}): {e}")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
utils.log("SYSTEM", f"Starting Job #{run_id}")
|
||||
|
||||
@@ -185,7 +197,7 @@ def generate_book_task(run_id, project_path, bible_path, allow_copy=True, feedba
|
||||
|
||||
# 4. Update Database with Final Status
|
||||
try:
|
||||
with sqlite3.connect(db_path, timeout=10) as conn:
|
||||
with sqlite3.connect(db_path, timeout=30, check_same_thread=False) as conn:
|
||||
conn.execute("UPDATE run SET status = ?, cost = ?, end_time = ?, log_file = ?, progress = 100 WHERE id = ?",
|
||||
(status, total_cost, datetime.utcnow(), final_log_path, run_id))
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user