From 1f799227d9faa4a8bdc5e7a6ea4ee94f81ce4aff Mon Sep 17 00:00:00 2001 From: Mike Wichers Date: Sat, 21 Feb 2026 02:16:39 -0500 Subject: [PATCH] =?UTF-8?q?Auto-commit:=20Fix=20spinning=20logs=20?= =?UTF-8?q?=E2=80=94=20API=20timeouts=20+=20reliable=20Huey=20consumer=20s?= =?UTF-8?q?tart?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root causes of indefinite spinning during book create/generate: 1. ai/models.py — ResilientModel.generate_content() had no timeout: a stalled Gemini API call would block the thread forever. Now injects request_options={"timeout": 180} into every call. Also removed the dangerous init_models(force=True) call inside the retry handler, which was making a second network call during an existing API failure. 2. ai/setup.py — genai.list_models() calls in get_optimal_model(), select_best_models(), and init_models() had no timeout. Added request_options={"timeout": 30} to all three calls so model init fails fast rather than hanging indefinitely. 3. web/app.py — Huey task consumer only started inside `if __name__ == "__main__":`, meaning tasks queued via flask run, gunicorn, or other WSGI runners were never executed (status stuck at "queued" forever). Moved consumer start to module level with a WERKZEUG_RUN_MAIN guard to prevent double-start under the reloader. Co-Authored-By: Claude Sonnet 4.6 --- ai/models.py | 21 ++++++++++++--------- ai/setup.py | 9 ++++++--- web/app.py | 24 ++++++++++++++++++------ 3 files changed, 36 insertions(+), 18 deletions(-) diff --git a/ai/models.py b/ai/models.py index 6018f57..7b58a02 100644 --- a/ai/models.py +++ b/ai/models.py @@ -47,6 +47,9 @@ class ResilientModel: _TOKEN_WARN_LIMIT = 30_000 + # Timeout in seconds for all generate_content calls (prevents indefinite hangs) + _GENERATION_TIMEOUT = 180 + def generate_content(self, *args, **kwargs): # Estimate payload size and warn if it exceeds the safe limit if args: @@ -64,22 +67,22 @@ class ResilientModel: max_retries = 3 base_delay = 5 + # Inject timeout into request_options without overwriting caller-supplied values + rq_opts = kwargs.pop("request_options", {}) or {} + if isinstance(rq_opts, dict): + rq_opts.setdefault("timeout", self._GENERATION_TIMEOUT) + while True: try: - return self.model.generate_content(*args, **kwargs) + return self.model.generate_content(*args, **kwargs, request_options=rq_opts) except Exception as e: err_str = str(e).lower() - is_retryable = "429" in err_str or "quota" in err_str or "500" in err_str or "503" in err_str or "504" in err_str or "deadline" in err_str or "internal error" in err_str + is_timeout = "timeout" in err_str or "deadline" in err_str or "timed out" in err_str + is_retryable = is_timeout or "429" in err_str or "quota" in err_str or "500" in err_str or "503" in err_str or "504" in err_str or "internal error" in err_str if is_retryable and retries < max_retries: delay = base_delay * (2 ** retries) - utils.log("SYSTEM", f"⚠️ Quota error on {self.role} ({self.name}). Retrying in {delay}s...") + utils.log("SYSTEM", f"⚠️ {'Timeout' if is_timeout else 'API error'} on {self.role} ({self.name}). Retrying in {delay}s... ({retries + 1}/{max_retries})") time.sleep(delay) - - if retries == 0: - utils.log("SYSTEM", "Attempting to re-optimize models to find alternative...") - from ai import setup as _setup - _setup.init_models(force=True) - retries += 1 continue raise e diff --git a/ai/setup.py b/ai/setup.py index ee5ba8a..a46fe35 100644 --- a/ai/setup.py +++ b/ai/setup.py @@ -7,9 +7,12 @@ from core import config, utils from ai import models +_LIST_MODELS_TIMEOUT = {"timeout": 30} + + def get_optimal_model(base_type="pro"): try: - available = [m for m in genai.list_models() if 'generateContent' in m.supported_generation_methods] + available = [m for m in genai.list_models(request_options=_LIST_MODELS_TIMEOUT) if 'generateContent' in m.supported_generation_methods] candidates = [m.name for m in available if base_type in m.name] if not candidates: return f"models/gemini-1.5-{base_type}" @@ -56,7 +59,7 @@ def select_best_models(force_refresh=False): try: utils.log("SYSTEM", "Refreshing AI model list from API...") - all_models = list(genai.list_models()) + all_models = list(genai.list_models(request_options=_LIST_MODELS_TIMEOUT)) raw_model_names = [m.name for m in all_models] utils.log("SYSTEM", f"Found {len(all_models)} raw models from Google API.") @@ -155,7 +158,7 @@ def init_models(force=False): if not skip_validation: utils.log("SYSTEM", "Validating credentials...") try: - list(genai.list_models(page_size=1)) + list(genai.list_models(page_size=1, request_options=_LIST_MODELS_TIMEOUT)) utils.log("SYSTEM", "✅ Gemini API Key is valid.") except Exception as e: if os.path.exists(cache_path): diff --git a/web/app.py b/web/app.py index 304d0b2..f362e56 100644 --- a/web/app.py +++ b/web/app.py @@ -90,16 +90,28 @@ with app.app_context(): print(f"⚠️ System: Failed to clean up stuck runs: {e}") -if __name__ == "__main__": - import threading +# --- HUEY CONSUMER --- +# Start the Huey task consumer in a background thread whenever the app loads. +# Guard against the Werkzeug reloader spawning a second consumer in the child process, +# and against test runners or importers that should not start background workers. +import threading as _threading - # Start Huey consumer in background thread - def run_huey(): +def _start_huey_consumer(): + try: from huey.consumer import Consumer consumer = Consumer(huey, workers=1, worker_type='thread', loglevel=20) + print("✅ System: Huey task consumer started.") consumer.run() + except Exception as e: + print(f"⚠️ System: Huey consumer failed to start: {e}") - t = threading.Thread(target=run_huey, daemon=True) - t.start() +_is_reloader_child = os.environ.get('WERKZEUG_RUN_MAIN') == 'true' +_is_testing = os.environ.get('FLASK_TESTING') == '1' +if not _is_reloader_child and not _is_testing: + _huey_thread = _threading.Thread(target=_start_huey_consumer, daemon=True, name="huey-consumer") + _huey_thread.start() + + +if __name__ == "__main__": app.run(host='0.0.0.0', port=5000, debug=False)