bookapp/story/style_persona.py

import json
import os
import time
from core import config, utils
from ai import models as ai_models


def get_style_guidelines():
    defaults = {
        "ai_isms": [
            'testament to', 'tapestry', 'shiver down spine', 'unspoken agreement',
            'palpable tension', 'a sense of', 'suddenly', 'in that moment',
            'symphony of', 'dance of', 'azure', 'cerulean',
            'delved', 'mined', 'neon-lit', 'bustling', 'weaved', 'intricately',
            'a reminder that', 'couldn\'t help but', 'it occurred to',
            'the air was thick with', 'etched in', 'a wave of', 'wash of emotion',
            'intertwined', 'navigate', 'realm', 'in the grand scheme',
            'at the end of the day', 'painting a picture', 'a dance between',
            'the weight of', 'visceral reminder', 'stark reminder',
            'a symphony', 'a mosaic', 'rich tapestry', 'whirlwind of',
            'his/her heart raced', 'time seemed to slow', 'the world fell away',
            'needless to say', 'it goes without saying', 'importantly',
            'it is worth noting', 'commendable', 'meticulous', 'pivotal',
            'in conclusion', 'overall', 'in summary', 'to summarize'
        ],
        "filter_words": [
            'felt', 'saw', 'heard', 'realized', 'decided', 'noticed', 'knew', 'thought',
            'wondered', 'seemed', 'appeared', 'looked like', 'watched', 'observed', 'sensed'
        ]
    }
    path = os.path.join(config.DATA_DIR, "style_guidelines.json")
    if os.path.exists(path):
        try:
            user_data = utils.load_json(path)
            if user_data:
                if 'ai_isms' in user_data: defaults['ai_isms'] = user_data['ai_isms']
                if 'filter_words' in user_data: defaults['filter_words'] = user_data['filter_words']
        except: pass
    else:
        try:
            with open(path, 'w') as f: json.dump(defaults, f, indent=2)
        except: pass
    return defaults


def refresh_style_guidelines(model, folder=None):
    utils.log("SYSTEM", "Refreshing Style Guidelines via AI...")
    current = get_style_guidelines()

    prompt = f"""
    ROLE: Literary Editor
    TASK: Update 'Banned Words' lists for AI writing.

    INPUT_DATA:
    - CURRENT_AI_ISMS: {json.dumps(current.get('ai_isms', []))}
    - CURRENT_FILTER_WORDS: {json.dumps(current.get('filter_words', []))}

    INSTRUCTIONS:
    1. Review lists. Remove false positives.
    2. Add new common AI tropes (e.g. 'neon-lit', 'bustling', 'a sense of', 'mined', 'delved').
    3. Ensure robustness.

    OUTPUT_FORMAT (JSON): {{ "ai_isms": [strings], "filter_words": [strings] }}
    """
    try:
        response = model.generate_content(prompt)
        model_name = getattr(model, 'name', ai_models.logic_model_name)
        if folder: utils.log_usage(folder, model_name, response.usage_metadata)
        new_data = json.loads(utils.clean_json(response.text))

        if 'ai_isms' in new_data and 'filter_words' in new_data:
            path = os.path.join(config.DATA_DIR, "style_guidelines.json")
            with open(path, 'w') as f: json.dump(new_data, f, indent=2)
            utils.log("SYSTEM", "Style Guidelines updated.")
            return new_data
    except Exception as e:
        utils.log("SYSTEM", f"Failed to refresh guidelines: {e}")
    return current


def create_initial_persona(bp, folder):
    utils.log("SYSTEM", "Generating initial Author Persona based on genre/tone...")
    meta = bp.get('book_metadata', {})
    style = meta.get('style', {})

    prompt = f"""
    ROLE: Creative Director
    TASK: Create a fictional 'Author Persona'.

    METADATA:
    - TITLE: {meta.get('title')}
    - GENRE: {meta.get('genre')}
    - TONE: {style.get('tone')}
    - AUDIENCE: {meta.get('target_audience')}

    OUTPUT_FORMAT (JSON): {{ "name": "Pen Name", "bio": "Description of writing style (voice, sentence structure, vocabulary)...", "age": "...", "gender": "..." }}
    """
    try:
        response = ai_models.model_logic.generate_content(prompt)
        utils.log_usage(folder, ai_models.model_logic.name, response.usage_metadata)
        return json.loads(utils.clean_json(response.text))
    except Exception as e:
        utils.log("SYSTEM", f"Persona generation failed: {e}")
        return {"name": "AI Author", "bio": "Standard, balanced writing style."}


def validate_persona(bp, persona_details, folder):
    """Validate a newly created persona by generating a 200-word sample and scoring it.

    Experiment 6 (Iterative Persona Validation): generates a test passage in the
    persona's voice and evaluates voice quality before accepting it. This front-loads
    quality assurance so Phase 3 starts with a well-calibrated author voice.

    Returns (is_valid: bool, score: int). Threshold: score >= 7 → accepted.
    """
    meta = bp.get('book_metadata', {})
    genre = meta.get('genre', 'Fiction')
    tone = meta.get('style', {}).get('tone', 'balanced')
    name = persona_details.get('name', 'Unknown Author')
    bio = persona_details.get('bio', 'Standard style.')

    sample_prompt = f"""
    ROLE: Fiction Writer
    TASK: Write a 200-word opening scene that perfectly demonstrates this author's voice.

    AUTHOR_PERSONA:
    Name: {name}
    Style/Bio: {bio}

    GENRE: {genre}
    TONE: {tone}

    RULES:
    - Exactly ~200 words of prose (no chapter header, no commentary)
    - Must reflect the persona's stated sentence structure, vocabulary, and voice
    - Show, don't tell — no filter words (felt, saw, heard, realized, noticed)
    - Deep POV: immerse the reader in a character's immediate experience

    OUTPUT: Prose only.
    """
    try:
        resp = ai_models.model_logic.generate_content(sample_prompt)
        utils.log_usage(folder, ai_models.model_logic.name, resp.usage_metadata)
        sample_text = resp.text
    except Exception as e:
        utils.log("SYSTEM", f"  -> Persona validation sample failed: {e}. Accepting persona.")
        return True, 7

    # Lightweight scoring: focused on voice quality (not full 13-rubric)
    score_prompt = f"""
    ROLE: Literary Editor
    TASK: Score this prose sample for author voice quality.

    EXPECTED_PERSONA:
    {bio}

    SAMPLE:
    {sample_text}

    CRITERIA:
    1. Does the prose reflect the stated author persona? (voice, register, sentence style)
    2. Is the prose free of filter words (felt, saw, heard, noticed, realized)?
    3. Is it deep POV — immediate, immersive, not distant narration?
    4. Is there genuine sentence variety and strong verb choice?

    SCORING (1-10):
    - 8-10: Voice is distinct, matches persona, clean deep POV
    - 6-7: Reasonable voice, minor filter word issues
    - 1-5: Generic AI prose, heavy filter words, or persona not reflected

    OUTPUT_FORMAT (JSON): {{"score": int, "reason": "One sentence."}}
    """
    try:
        resp2 = ai_models.model_logic.generate_content(score_prompt)
        utils.log_usage(folder, ai_models.model_logic.name, resp2.usage_metadata)
        data = json.loads(utils.clean_json(resp2.text))
        score = int(data.get('score', 7))
        reason = data.get('reason', '')
        is_valid = score >= 7
        utils.log("SYSTEM", f"  -> Persona validation: {score}/10 {'✅ Accepted' if is_valid else '❌ Rejected'} — {reason}")
        return is_valid, score
    except Exception as e:
        utils.log("SYSTEM", f"  -> Persona scoring failed: {e}. Accepting persona.")
        return True, 7


def refine_persona(bp, text, folder, pov_character=None):
    utils.log("SYSTEM", "Refining Author Persona based on recent chapters...")
    ad = bp.get('book_metadata', {}).get('author_details', {})

    # If a POV character is given and has a voice_profile, refine that instead
    if pov_character:
        for char in bp.get('characters', []):
            if char.get('name') == pov_character and char.get('voice_profile'):
                vp = char['voice_profile']
                current_bio = vp.get('bio', 'Standard style.')
                prompt = f"""
    ROLE: Literary Stylist
    TASK: Refine a POV character's voice profile based on the text sample.

    INPUT_DATA:
    - TEXT_SAMPLE: {text[:3000]}
    - CHARACTER: {pov_character}
    - CURRENT_VOICE_BIO: {current_bio}

    GOAL: Ensure future chapters for this POV character sound exactly like the sample. Highlight quirks, patterns, vocabulary specific to this character's perspective.

    OUTPUT_FORMAT (JSON): {{ "bio": "Updated voice bio..." }}
    """
                try:
                    response = ai_models.model_logic.generate_content(prompt)
                    utils.log_usage(folder, ai_models.model_logic.name, response.usage_metadata)
                    new_bio = json.loads(utils.clean_json(response.text)).get('bio')
                    if new_bio:
                        char['voice_profile']['bio'] = new_bio
                        utils.log("SYSTEM", f"  -> Voice profile bio updated for '{pov_character}'.")
                except Exception as e:
                    utils.log("SYSTEM", f"  -> Voice profile refinement failed for '{pov_character}': {e}")
                return ad  # Return author_details unchanged

    # Default: refine the main author persona bio
    current_bio = ad.get('bio', 'Standard style.')
    prompt = f"""
    ROLE: Literary Stylist
    TASK: Refine Author Bio based on text sample.

    INPUT_DATA:
    - TEXT_SAMPLE: {text[:3000]}
    - CURRENT_BIO: {current_bio}

    GOAL: Ensure future chapters sound exactly like the sample. Highlight quirks, patterns, vocabulary.

    OUTPUT_FORMAT (JSON): {{ "bio": "Updated bio..." }}
    """
    try:
        response = ai_models.model_logic.generate_content(prompt)
        utils.log_usage(folder, ai_models.model_logic.name, response.usage_metadata)
        new_bio = json.loads(utils.clean_json(response.text)).get('bio')
        if new_bio:
            ad['bio'] = new_bio
            utils.log("SYSTEM", "  -> Persona bio updated.")
            return ad
    except: pass
    return ad


def update_persona_sample(bp, folder):
    utils.log("SYSTEM", "Extracting author persona from manuscript...")

    ms_path = os.path.join(folder, "manuscript.json")
    if not os.path.exists(ms_path): return
    ms = utils.load_json(ms_path)
    if not ms: return

    full_text = "\n".join([c.get('content', '') for c in ms])
    if len(full_text) < 500: return

    if not os.path.exists(config.PERSONAS_DIR): os.makedirs(config.PERSONAS_DIR)

    meta = bp.get('book_metadata', {})
    safe_title = utils.sanitize_filename(meta.get('title', 'book'))[:20]
    timestamp = int(time.time())
    filename = f"sample_{safe_title}_{timestamp}.txt"
    filepath = os.path.join(config.PERSONAS_DIR, filename)

    sample_text = full_text[:3000]
    with open(filepath, 'w', encoding='utf-8') as f: f.write(sample_text)

    author_name = meta.get('author', 'Unknown Author')

    # Use a local file mirror for the engine context (runs outside Flask app context)
    _personas_file = os.path.join(config.PERSONAS_DIR, "personas.json")
    personas = {}
    if os.path.exists(_personas_file):
        try:
            with open(_personas_file, 'r') as f: personas = json.load(f)
        except: pass

    if author_name not in personas:
        utils.log("SYSTEM", f"Generating new persona profile for '{author_name}'...")
        prompt = f"""
        ROLE: Literary Analyst
        TASK: Analyze writing style (Tone, Voice, Vocabulary).
        TEXT: {sample_text[:1000]}
        OUTPUT: 1-sentence author bio.
        """
        try:
            response = ai_models.model_logic.generate_content(prompt)
            utils.log_usage(folder, ai_models.model_logic.name, response.usage_metadata)
            bio = response.text.strip()
        except: bio = "Style analysis unavailable."

        personas[author_name] = {
            "name": author_name,
            "bio": bio,
            "sample_files": [filename],
            "sample_text": sample_text[:500]
        }
    else:
        utils.log("SYSTEM", f"Updating persona '{author_name}' with new sample.")
        if 'sample_files' not in personas[author_name]: personas[author_name]['sample_files'] = []
        if filename not in personas[author_name]['sample_files']:
            personas[author_name]['sample_files'].append(filename)

    with open(_personas_file, 'w') as f: json.dump(personas, f, indent=2)