From 1f799227d9faa4a8bdc5e7a6ea4ee94f81ce4aff Mon Sep 17 00:00:00 2001
From: Mike Wichers <thethreemagi@outlook.com>
Date: Sat, 21 Feb 2026 02:16:39 -0500
Subject: [PATCH] =?UTF-8?q?Auto-commit:=20Fix=20spinning=20logs=20?=
 =?UTF-8?q?=E2=80=94=20API=20timeouts=20+=20reliable=20Huey=20consumer=20s?=
 =?UTF-8?q?tart?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Root causes of indefinite spinning during book create/generate:

1. ai/models.py — ResilientModel.generate_content() had no timeout: a
   stalled Gemini API call would block the thread forever. Now injects
   request_options={"timeout": 180} into every call. Also removed the
   dangerous init_models(force=True) call inside the retry handler, which
   was making a second network call during an existing API failure.

2. ai/setup.py — genai.list_models() calls in get_optimal_model(),
   select_best_models(), and init_models() had no timeout. Added
   request_options={"timeout": 30} to all three calls so model init
   fails fast rather than hanging indefinitely.

3. web/app.py — Huey task consumer only started inside
   `if __name__ == "__main__":`, meaning tasks queued via flask run,
   gunicorn, or other WSGI runners were never executed (status stuck at
   "queued" forever). Moved consumer start to module level with a
   WERKZEUG_RUN_MAIN guard to prevent double-start under the reloader.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 ai/models.py | 21 ++++++++++++---------
 ai/setup.py  |  9 ++++++---
 web/app.py   | 24 ++++++++++++++++++------
 3 files changed, 36 insertions(+), 18 deletions(-)

diff --git a/ai/models.py b/ai/models.py
index 6018f57..7b58a02 100644
--- a/ai/models.py
+++ b/ai/models.py
@@ -47,6 +47,9 @@ class ResilientModel:
 
     _TOKEN_WARN_LIMIT = 30_000
 
+    # Timeout in seconds for all generate_content calls (prevents indefinite hangs)
+    _GENERATION_TIMEOUT = 180
+
     def generate_content(self, *args, **kwargs):
         # Estimate payload size and warn if it exceeds the safe limit
         if args:
@@ -64,22 +67,22 @@ class ResilientModel:
         max_retries = 3
         base_delay = 5
 
+        # Inject timeout into request_options without overwriting caller-supplied values
+        rq_opts = kwargs.pop("request_options", {}) or {}
+        if isinstance(rq_opts, dict):
+            rq_opts.setdefault("timeout", self._GENERATION_TIMEOUT)
+
         while True:
             try:
-                return self.model.generate_content(*args, **kwargs)
+                return self.model.generate_content(*args, **kwargs, request_options=rq_opts)
             except Exception as e:
                 err_str = str(e).lower()
-                is_retryable = "429" in err_str or "quota" in err_str or "500" in err_str or "503" in err_str or "504" in err_str or "deadline" in err_str or "internal error" in err_str
+                is_timeout = "timeout" in err_str or "deadline" in err_str or "timed out" in err_str
+                is_retryable = is_timeout or "429" in err_str or "quota" in err_str or "500" in err_str or "503" in err_str or "504" in err_str or "internal error" in err_str
                 if is_retryable and retries < max_retries:
                     delay = base_delay * (2 ** retries)
-                    utils.log("SYSTEM", f"⚠️ Quota error on {self.role} ({self.name}). Retrying in {delay}s...")
+                    utils.log("SYSTEM", f"⚠️ {'Timeout' if is_timeout else 'API error'} on {self.role} ({self.name}). Retrying in {delay}s... ({retries + 1}/{max_retries})")
                     time.sleep(delay)
-
-                    if retries == 0:
-                        utils.log("SYSTEM", "Attempting to re-optimize models to find alternative...")
-                        from ai import setup as _setup
-                        _setup.init_models(force=True)
-
                     retries += 1
                     continue
                 raise e
diff --git a/ai/setup.py b/ai/setup.py
index ee5ba8a..a46fe35 100644
--- a/ai/setup.py
+++ b/ai/setup.py
@@ -7,9 +7,12 @@ from core import config, utils
 from ai import models
 
 
+_LIST_MODELS_TIMEOUT = {"timeout": 30}
+
+
 def get_optimal_model(base_type="pro"):
     try:
-        available = [m for m in genai.list_models() if 'generateContent' in m.supported_generation_methods]
+        available = [m for m in genai.list_models(request_options=_LIST_MODELS_TIMEOUT) if 'generateContent' in m.supported_generation_methods]
         candidates = [m.name for m in available if base_type in m.name]
         if not candidates: return f"models/gemini-1.5-{base_type}"
 
@@ -56,7 +59,7 @@ def select_best_models(force_refresh=False):
 
     try:
         utils.log("SYSTEM", "Refreshing AI model list from API...")
-        all_models = list(genai.list_models())
+        all_models = list(genai.list_models(request_options=_LIST_MODELS_TIMEOUT))
         raw_model_names = [m.name for m in all_models]
         utils.log("SYSTEM", f"Found {len(all_models)} raw models from Google API.")
 
@@ -155,7 +158,7 @@ def init_models(force=False):
     if not skip_validation:
         utils.log("SYSTEM", "Validating credentials...")
         try:
-            list(genai.list_models(page_size=1))
+            list(genai.list_models(page_size=1, request_options=_LIST_MODELS_TIMEOUT))
             utils.log("SYSTEM", "✅ Gemini API Key is valid.")
         except Exception as e:
             if os.path.exists(cache_path):
diff --git a/web/app.py b/web/app.py
index 304d0b2..f362e56 100644
--- a/web/app.py
+++ b/web/app.py
@@ -90,16 +90,28 @@ with app.app_context():
         print(f"⚠️ System: Failed to clean up stuck runs: {e}")
 
 
-if __name__ == "__main__":
-    import threading
+# --- HUEY CONSUMER ---
+# Start the Huey task consumer in a background thread whenever the app loads.
+# Guard against the Werkzeug reloader spawning a second consumer in the child process,
+# and against test runners or importers that should not start background workers.
+import threading as _threading
 
-    # Start Huey consumer in background thread
-    def run_huey():
+def _start_huey_consumer():
+    try:
         from huey.consumer import Consumer
         consumer = Consumer(huey, workers=1, worker_type='thread', loglevel=20)
+        print("✅ System: Huey task consumer started.")
         consumer.run()
+    except Exception as e:
+        print(f"⚠️ System: Huey consumer failed to start: {e}")
 
-    t = threading.Thread(target=run_huey, daemon=True)
-    t.start()
+_is_reloader_child = os.environ.get('WERKZEUG_RUN_MAIN') == 'true'
+_is_testing = os.environ.get('FLASK_TESTING') == '1'
 
+if not _is_reloader_child and not _is_testing:
+    _huey_thread = _threading.Thread(target=_start_huey_consumer, daemon=True, name="huey-consumer")
+    _huey_thread.start()
+
+
+if __name__ == "__main__":
     app.run(host='0.0.0.0', port=5000, debug=False)