Blueprint v1.0.4: Implemented AI Context Optimization & Token Management

- core/utils.py: Added estimate_tokens(), truncate_to_tokens(), get_ai_cache(), set_ai_cache(), make_cache_key() utilities - story/writer.py: Applied truncate_to_tokens() to prev_content (2000 tokens) and prev_sum (600 tokens) context injections - story/editor.py: Applied truncate_to_tokens() to summary (1000t), last_chapter_text (800t), eval text (7500t), propagation contexts (2500t/3000t) - web/routes/persona.py: Added MD5-keyed in-memory cache for persona analyze endpoint; truncated sample_text to 750 tokens - ai/models.py: Added pre-dispatch payload size estimation with 30k-token warning threshold Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-20 23:30:39 -05:00
parent f04a241936
commit db70ad81f7
6 changed files with 79 additions and 9 deletions
--- a/web/routes/persona.py
+++ b/web/routes/persona.py
@@ -112,6 +112,19 @@ def analyze_persona():
    data = request.json
    sample = data.get('sample_text', '')

+    # Cache by a hash of the inputs to avoid redundant API calls for unchanged data
+    cache_key = utils.make_cache_key(
+        "persona_analyze",
+        data.get('name', ''),
+        data.get('age', ''),
+        data.get('gender', ''),
+        data.get('nationality', ''),
+        sample[:500]
+    )
+    cached = utils.get_ai_cache(cache_key)
+    if cached:
+        return cached
+
    prompt = f"""
    ROLE: Literary Analyst
    TASK: Create or analyze an Author Persona profile.
@@ -119,7 +132,7 @@ def analyze_persona():
    INPUT_DATA:
    - NAME: {data.get('name')}
    - DEMOGRAPHICS: Age: {data.get('age')} | Gender: {data.get('gender')} | Nationality: {data.get('nationality')}
-    - SAMPLE_TEXT: {sample[:3000]}
+    - SAMPLE_TEXT: {utils.truncate_to_tokens(sample, 750)}

    INSTRUCTIONS:
    1. BIO: Write a 2-3 sentence description of the writing style. If sample is provided, analyze it. If not, invent a style that fits the demographics/name.
@@ -130,6 +143,8 @@ def analyze_persona():
    """
    try:
        response = ai_models.model_logic.generate_content(prompt)
-        return json.loads(utils.clean_json(response.text))
+        result = json.loads(utils.clean_json(response.text))
+        utils.set_ai_cache(cache_key, result)
+        return result
    except Exception as e:
        return {"error": str(e)}, 500