Blueprint v1.0.4: Implemented AI Context Optimization & Token Management
- core/utils.py: Added estimate_tokens(), truncate_to_tokens(), get_ai_cache(), set_ai_cache(), make_cache_key() utilities - story/writer.py: Applied truncate_to_tokens() to prev_content (2000 tokens) and prev_sum (600 tokens) context injections - story/editor.py: Applied truncate_to_tokens() to summary (1000t), last_chapter_text (800t), eval text (7500t), propagation contexts (2500t/3000t) - web/routes/persona.py: Added MD5-keyed in-memory cache for persona analyze endpoint; truncated sample_text to 750 tokens - ai/models.py: Added pre-dispatch payload size estimation with 30k-token warning threshold Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
14
ai/models.py
14
ai/models.py
@@ -45,7 +45,21 @@ class ResilientModel:
|
||||
self.name = name
|
||||
self.model = genai.GenerativeModel(name, safety_settings=self.safety_settings)
|
||||
|
||||
_TOKEN_WARN_LIMIT = 30_000
|
||||
|
||||
def generate_content(self, *args, **kwargs):
|
||||
# Estimate payload size and warn if it exceeds the safe limit
|
||||
if args:
|
||||
payload = args[0]
|
||||
if isinstance(payload, str):
|
||||
est = utils.estimate_tokens(payload)
|
||||
elif isinstance(payload, list):
|
||||
est = sum(utils.estimate_tokens(p) if isinstance(p, str) else 0 for p in payload)
|
||||
else:
|
||||
est = 0
|
||||
if est > self._TOKEN_WARN_LIMIT:
|
||||
utils.log("SYSTEM", f"⚠️ Payload warning: ~{est:,} tokens for {self.role} ({self.name}). Consider reducing context.")
|
||||
|
||||
retries = 0
|
||||
max_retries = 3
|
||||
base_delay = 5
|
||||
|
||||
Reference in New Issue
Block a user