Auto-commit: Fix spinning logs — API timeouts + reliable Huey consumer start

Root causes of indefinite spinning during book create/generate: 1. ai/models.py — ResilientModel.generate_content() had no timeout: a stalled Gemini API call would block the thread forever. Now injects request_options={"timeout": 180} into every call. Also removed the dangerous init_models(force=True) call inside the retry handler, which was making a second network call during an existing API failure. 2. ai/setup.py — genai.list_models() calls in get_optimal_model(), select_best_models(), and init_models() had no timeout. Added request_options={"timeout": 30} to all three calls so model init fails fast rather than hanging indefinitely. 3. web/app.py — Huey task consumer only started inside `if __name__ == "__main__":`, meaning tasks queued via flask run, gunicorn, or other WSGI runners were never executed (status stuck at "queued" forever). Moved consumer start to module level with a WERKZEUG_RUN_MAIN guard to prevent double-start under the reloader. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-21 02:16:39 -05:00
parent 85f1290f02
commit 1f799227d9
3 changed files with 36 additions and 18 deletions
--- a/ai/models.py
+++ b/ai/models.py
@@ -47,6 +47,9 @@ class ResilientModel:

    _TOKEN_WARN_LIMIT = 30_000

+    # Timeout in seconds for all generate_content calls (prevents indefinite hangs)
+    _GENERATION_TIMEOUT = 180
+
    def generate_content(self, *args, **kwargs):
        # Estimate payload size and warn if it exceeds the safe limit
        if args:
@@ -64,22 +67,22 @@ class ResilientModel:
        max_retries = 3
        base_delay = 5

+        # Inject timeout into request_options without overwriting caller-supplied values
+        rq_opts = kwargs.pop("request_options", {}) or {}
+        if isinstance(rq_opts, dict):
+            rq_opts.setdefault("timeout", self._GENERATION_TIMEOUT)
+
        while True:
            try:
-                return self.model.generate_content(*args, **kwargs)
+                return self.model.generate_content(*args, **kwargs, request_options=rq_opts)
            except Exception as e:
                err_str = str(e).lower()
-                is_retryable = "429" in err_str or "quota" in err_str or "500" in err_str or "503" in err_str or "504" in err_str or "deadline" in err_str or "internal error" in err_str
+                is_timeout = "timeout" in err_str or "deadline" in err_str or "timed out" in err_str
+                is_retryable = is_timeout or "429" in err_str or "quota" in err_str or "500" in err_str or "503" in err_str or "504" in err_str or "internal error" in err_str
                if is_retryable and retries < max_retries:
                    delay = base_delay * (2 ** retries)
-                    utils.log("SYSTEM", f"⚠️ Quota error on {self.role} ({self.name}). Retrying in {delay}s...")
+                    utils.log("SYSTEM", f"⚠️ {'Timeout' if is_timeout else 'API error'} on {self.role} ({self.name}). Retrying in {delay}s... ({retries + 1}/{max_retries})")
                    time.sleep(delay)
-
-                    if retries == 0:
-                        utils.log("SYSTEM", "Attempting to re-optimize models to find alternative...")
-                        from ai import setup as _setup
-                        _setup.init_models(force=True)
-
                    retries += 1
                    continue
                raise e