Auto-commit: Fix spinning logs — API timeouts + reliable Huey consumer start
Root causes of indefinite spinning during book create/generate:
1. ai/models.py — ResilientModel.generate_content() had no timeout: a
stalled Gemini API call would block the thread forever. Now injects
request_options={"timeout": 180} into every call. Also removed the
dangerous init_models(force=True) call inside the retry handler, which
was making a second network call during an existing API failure.
2. ai/setup.py — genai.list_models() calls in get_optimal_model(),
select_best_models(), and init_models() had no timeout. Added
request_options={"timeout": 30} to all three calls so model init
fails fast rather than hanging indefinitely.
3. web/app.py — Huey task consumer only started inside
`if __name__ == "__main__":`, meaning tasks queued via flask run,
gunicorn, or other WSGI runners were never executed (status stuck at
"queued" forever). Moved consumer start to module level with a
WERKZEUG_RUN_MAIN guard to prevent double-start under the reloader.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
21
ai/models.py
21
ai/models.py
@@ -47,6 +47,9 @@ class ResilientModel:
|
|||||||
|
|
||||||
_TOKEN_WARN_LIMIT = 30_000
|
_TOKEN_WARN_LIMIT = 30_000
|
||||||
|
|
||||||
|
# Timeout in seconds for all generate_content calls (prevents indefinite hangs)
|
||||||
|
_GENERATION_TIMEOUT = 180
|
||||||
|
|
||||||
def generate_content(self, *args, **kwargs):
|
def generate_content(self, *args, **kwargs):
|
||||||
# Estimate payload size and warn if it exceeds the safe limit
|
# Estimate payload size and warn if it exceeds the safe limit
|
||||||
if args:
|
if args:
|
||||||
@@ -64,22 +67,22 @@ class ResilientModel:
|
|||||||
max_retries = 3
|
max_retries = 3
|
||||||
base_delay = 5
|
base_delay = 5
|
||||||
|
|
||||||
|
# Inject timeout into request_options without overwriting caller-supplied values
|
||||||
|
rq_opts = kwargs.pop("request_options", {}) or {}
|
||||||
|
if isinstance(rq_opts, dict):
|
||||||
|
rq_opts.setdefault("timeout", self._GENERATION_TIMEOUT)
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
return self.model.generate_content(*args, **kwargs)
|
return self.model.generate_content(*args, **kwargs, request_options=rq_opts)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
err_str = str(e).lower()
|
err_str = str(e).lower()
|
||||||
is_retryable = "429" in err_str or "quota" in err_str or "500" in err_str or "503" in err_str or "504" in err_str or "deadline" in err_str or "internal error" in err_str
|
is_timeout = "timeout" in err_str or "deadline" in err_str or "timed out" in err_str
|
||||||
|
is_retryable = is_timeout or "429" in err_str or "quota" in err_str or "500" in err_str or "503" in err_str or "504" in err_str or "internal error" in err_str
|
||||||
if is_retryable and retries < max_retries:
|
if is_retryable and retries < max_retries:
|
||||||
delay = base_delay * (2 ** retries)
|
delay = base_delay * (2 ** retries)
|
||||||
utils.log("SYSTEM", f"⚠️ Quota error on {self.role} ({self.name}). Retrying in {delay}s...")
|
utils.log("SYSTEM", f"⚠️ {'Timeout' if is_timeout else 'API error'} on {self.role} ({self.name}). Retrying in {delay}s... ({retries + 1}/{max_retries})")
|
||||||
time.sleep(delay)
|
time.sleep(delay)
|
||||||
|
|
||||||
if retries == 0:
|
|
||||||
utils.log("SYSTEM", "Attempting to re-optimize models to find alternative...")
|
|
||||||
from ai import setup as _setup
|
|
||||||
_setup.init_models(force=True)
|
|
||||||
|
|
||||||
retries += 1
|
retries += 1
|
||||||
continue
|
continue
|
||||||
raise e
|
raise e
|
||||||
|
|||||||
@@ -7,9 +7,12 @@ from core import config, utils
|
|||||||
from ai import models
|
from ai import models
|
||||||
|
|
||||||
|
|
||||||
|
_LIST_MODELS_TIMEOUT = {"timeout": 30}
|
||||||
|
|
||||||
|
|
||||||
def get_optimal_model(base_type="pro"):
|
def get_optimal_model(base_type="pro"):
|
||||||
try:
|
try:
|
||||||
available = [m for m in genai.list_models() if 'generateContent' in m.supported_generation_methods]
|
available = [m for m in genai.list_models(request_options=_LIST_MODELS_TIMEOUT) if 'generateContent' in m.supported_generation_methods]
|
||||||
candidates = [m.name for m in available if base_type in m.name]
|
candidates = [m.name for m in available if base_type in m.name]
|
||||||
if not candidates: return f"models/gemini-1.5-{base_type}"
|
if not candidates: return f"models/gemini-1.5-{base_type}"
|
||||||
|
|
||||||
@@ -56,7 +59,7 @@ def select_best_models(force_refresh=False):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
utils.log("SYSTEM", "Refreshing AI model list from API...")
|
utils.log("SYSTEM", "Refreshing AI model list from API...")
|
||||||
all_models = list(genai.list_models())
|
all_models = list(genai.list_models(request_options=_LIST_MODELS_TIMEOUT))
|
||||||
raw_model_names = [m.name for m in all_models]
|
raw_model_names = [m.name for m in all_models]
|
||||||
utils.log("SYSTEM", f"Found {len(all_models)} raw models from Google API.")
|
utils.log("SYSTEM", f"Found {len(all_models)} raw models from Google API.")
|
||||||
|
|
||||||
@@ -155,7 +158,7 @@ def init_models(force=False):
|
|||||||
if not skip_validation:
|
if not skip_validation:
|
||||||
utils.log("SYSTEM", "Validating credentials...")
|
utils.log("SYSTEM", "Validating credentials...")
|
||||||
try:
|
try:
|
||||||
list(genai.list_models(page_size=1))
|
list(genai.list_models(page_size=1, request_options=_LIST_MODELS_TIMEOUT))
|
||||||
utils.log("SYSTEM", "✅ Gemini API Key is valid.")
|
utils.log("SYSTEM", "✅ Gemini API Key is valid.")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if os.path.exists(cache_path):
|
if os.path.exists(cache_path):
|
||||||
|
|||||||
24
web/app.py
24
web/app.py
@@ -90,16 +90,28 @@ with app.app_context():
|
|||||||
print(f"⚠️ System: Failed to clean up stuck runs: {e}")
|
print(f"⚠️ System: Failed to clean up stuck runs: {e}")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
# --- HUEY CONSUMER ---
|
||||||
import threading
|
# Start the Huey task consumer in a background thread whenever the app loads.
|
||||||
|
# Guard against the Werkzeug reloader spawning a second consumer in the child process,
|
||||||
|
# and against test runners or importers that should not start background workers.
|
||||||
|
import threading as _threading
|
||||||
|
|
||||||
# Start Huey consumer in background thread
|
def _start_huey_consumer():
|
||||||
def run_huey():
|
try:
|
||||||
from huey.consumer import Consumer
|
from huey.consumer import Consumer
|
||||||
consumer = Consumer(huey, workers=1, worker_type='thread', loglevel=20)
|
consumer = Consumer(huey, workers=1, worker_type='thread', loglevel=20)
|
||||||
|
print("✅ System: Huey task consumer started.")
|
||||||
consumer.run()
|
consumer.run()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"⚠️ System: Huey consumer failed to start: {e}")
|
||||||
|
|
||||||
t = threading.Thread(target=run_huey, daemon=True)
|
_is_reloader_child = os.environ.get('WERKZEUG_RUN_MAIN') == 'true'
|
||||||
t.start()
|
_is_testing = os.environ.get('FLASK_TESTING') == '1'
|
||||||
|
|
||||||
|
if not _is_reloader_child and not _is_testing:
|
||||||
|
_huey_thread = _threading.Thread(target=_start_huey_consumer, daemon=True, name="huey-consumer")
|
||||||
|
_huey_thread.start()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
app.run(host='0.0.0.0', port=5000, debug=False)
|
app.run(host='0.0.0.0', port=5000, debug=False)
|
||||||
|
|||||||
Reference in New Issue
Block a user