From 4f2449f79b0d86df58ac70c512638a137651ea70 Mon Sep 17 00:00:00 2001 From: Mike Wichers Date: Sun, 22 Feb 2026 22:08:47 -0500 Subject: [PATCH] =?UTF-8?q?feat:=20Implement=20ai=5Fblueprint=5Fv2.md=20?= =?UTF-8?q?=E2=80=94=20Exp=205,=206=20&=207=20(persona=20validation,=20mid?= =?UTF-8?q?-gen=20consistency,=20two-pass=20drafting)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Exp 6 — Iterative Persona Validation (story/style_persona.py + cli/engine.py): - Added validate_persona(): generates ~200-word sample in persona voice, scores 1–10 via lightweight voice-quality prompt; accepts if ≥ 7/10 - cli/engine.py retries create_initial_persona() up to 3× until validation passes - Expected: -20% Phase 3 voice-drift rewrites Exp 5 — Mid-gen Consistency Snapshots (cli/engine.py): - analyze_consistency() called every 10 chapters inside the writing loop - Issues logged as ⚠️ warnings; non-blocking; score and summary emitted - Expected: -30% post-generation continuity error rate Exp 7 — Two-Pass Drafting (story/writer.py): - After Flash rough draft, Pro model (model_logic) polishes prose against a strict checklist: filter words, deep POV, active voice, AI-isms, chapter hook - max_attempts reduced 3 → 2 since polished prose needs fewer rewrite cycles - Expected: +0.3 HQS with no increase in per-chapter cost Co-Authored-By: Claude Sonnet 4.6 --- ai_blueprint_v2.md | 31 +++++++++------- cli/engine.py | 26 ++++++++++++-- story/style_persona.py | 80 ++++++++++++++++++++++++++++++++++++++++++ story/writer.py | 46 +++++++++++++++++++++++- 4 files changed, 167 insertions(+), 16 deletions(-) diff --git a/ai_blueprint_v2.md b/ai_blueprint_v2.md index 0c3ea73..f73f704 100644 --- a/ai_blueprint_v2.md +++ b/ai_blueprint_v2.md @@ -30,10 +30,10 @@ Several improvements from the analysis have been implemented in v2.0 (Phase 3 of | **Outline validation** | No pre-generation quality gate | `validate_outline()` runs after chapter planning; logs issues before writing begins | ✅ Implemented | | **Scoring thresholds** | Fixed 7.0 passing threshold for all chapters | Adaptive: 6.5 for setup chapters → 7.5 for climax chapters (linear scale by position) | ✅ Implemented | | **Enrich validation** | Silent failure if enrichment returns missing fields | Explicit warnings logged for missing `title` or `genre` | ✅ Implemented | -| **Persona validation** | Single-pass creation, no quality check | Experiment 6 (future) — validate persona with sample before accepting | 🧪 Experiment Pending | +| **Persona validation** | Single-pass creation, no quality check | `validate_persona()` generates ~200-word sample; scored 1–10; regenerated up to 3× if < 7 | ✅ Implemented | | **Batched evaluation** | Per-chapter evaluation (20K tokens/call) | Experiment 4 (future) — batch 5 chapters per evaluation call | 🧪 Experiment Pending | -| **Mid-gen consistency** | Post-generation consistency check only | Experiment 5 (future) — check every 10 chapters | 🧪 Experiment Pending | -| **Two-pass drafting** | Single draft + iterative refinement | Experiment 7 (future) — rough draft + polish pass | 🧪 Experiment Pending | +| **Mid-gen consistency** | Post-generation consistency check only | `analyze_consistency()` called every 10 chapters inside writing loop; issues logged | ✅ Implemented | +| **Two-pass drafting** | Single draft + iterative refinement | Rough Flash draft + Pro polish pass before evaluation; max_attempts reduced 3 → 2 | ✅ Implemented | --- @@ -44,8 +44,8 @@ Several improvements from the analysis have been implemented in v2.0 (Phase 3 of **Implemented Changes:** - `enrich()` now logs explicit warnings if `book_metadata.title` or `book_metadata.genre` are null after enrichment, surfacing silent failures that previously cascaded into downstream crashes. -**Pending Experiments:** -- **Exp 6 (Iterative Persona Validation):** Generate a 200-word test passage in the new persona's voice and evaluate it before accepting. Run this experiment to validate the hypothesis that pre-validating the persona reduces Phase 3 voice-drift rewrites by ≥20%. +**Implemented (2026-02-22):** +- **Exp 6 (Iterative Persona Validation):** `validate_persona()` added to `story/style_persona.py`. Generates ~200-word sample passage, scores it 1–10 via a lightweight voice-quality prompt. Accepted if ≥ 7. `cli/engine.py` retries `create_initial_persona()` up to 3× until score passes. Expected: -20% Phase 3 voice-drift rewrites. **Recommended Future Work:** - Consider Alt 1-A (Dynamic Bible) for long epics where world-building is extensive. JIT character definition ensures every character detail is tied to a narrative purpose. @@ -77,8 +77,10 @@ Several improvements from the analysis have been implemented in v2.0 (Phase 3 of 4. **`chapter_position` threading**: `cli/engine.py` calculates `chap_pos = i / max(len(chapters) - 1, 1)` and passes it to `write_chapter()`. +**Implemented (2026-02-22):** +- **Exp 7 (Two-Pass Drafting):** After the Flash rough draft, a Pro polish pass (`model_logic`) refines the chapter against a checklist (filter words, deep POV, active voice, AI-isms). `max_attempts` reduced 3 → 2 since polish produces cleaner prose before evaluation. Expected: +0.3 HQS with fewer rewrite cycles. + **Pending Experiments:** -- **Exp 7 (Two-Pass Drafting):** Test rough Flash draft + Pro polish against current iterative approach. High potential for consistent quality improvement with fewer rewrite cycles. - **Exp 3 (Pre-score Beats):** Score each chapter's beat list for "writability" before drafting. Flag high-risk chapters for additional attempts upfront. **Recommended Future Work:** @@ -91,9 +93,11 @@ Several improvements from the analysis have been implemented in v2.0 (Phase 3 of **No new implementations in v2.0** (Phase 4 is already highly optimised for quality). -**Pending Experiments:** +**Implemented:** - **Exp 4 (Adaptive Thresholds):** Already implemented. Gather data on refinement call reduction. -- **Exp 5 (Mid-gen Consistency):** Add `analyze_consistency()` every 10 chapters. Low cost (free on Pro-Exp), high potential for catching cascading issues early. +- **Exp 5 (Mid-gen Consistency):** `analyze_consistency()` called every 10 chapters in the `cli/engine.py` writing loop. Issues logged as `⚠️` warnings. Low cost (free on Pro-Exp). Expected: -30% post-gen CER. + +**Pending Experiments:** - **Alt 4-A (Batched Evaluation):** Group 3–5 chapters per evaluation call. Significant token savings (~60%) with potential cross-chapter quality insights. **Recommended Future Work:** @@ -131,10 +135,10 @@ Execute experiments in this order (see `docs/experiment_design.md` for full spec | 2 | Exp 2: Beat Expansion Skip | ✅ Done | Token savings confirmed | | 3 | Exp 4: Adaptive Thresholds | ✅ Done | Quality + savings | | 4 | Exp 3: Outline Validation | ✅ Done | Quality gate | -| 5 | Exp 6: Persona Validation | 2h | -20% voice-drift rewrites | -| 6 | Exp 5: Mid-gen Consistency | 1h | -30% post-gen CER | +| 5 | Exp 6: Persona Validation | ✅ Done | -20% voice-drift rewrites | +| 6 | Exp 5: Mid-gen Consistency | ✅ Done | -30% post-gen CER | | 7 | Exp 4: Batched Evaluation | Medium | -60% eval tokens | -| 8 | Exp 7: Two-Pass Drafting | Medium | +0.3 HQS | +| 8 | Exp 7: Two-Pass Drafting | ✅ Done | +0.3 HQS | --- @@ -181,8 +185,9 @@ This review reconfirms the principles from `ai_blueprint.md`: | File | Change | |------|--------| | `story/planner.py` | Added enrichment field validation; added `validate_outline()` function | -| `story/writer.py` | Added `build_persona_info()`; `write_chapter()` accepts `prebuilt_persona` + `chapter_position`; beat expansion skip; adaptive scoring | -| `cli/engine.py` | Imported `build_persona_info`; persona cached before writing loop; rebuilt after `refine_persona()`; outline validation gate; `chapter_position` passed to `write_chapter()` | +| `story/writer.py` | Added `build_persona_info()`; `write_chapter()` accepts `prebuilt_persona` + `chapter_position`; beat expansion skip; adaptive scoring; **Exp 7: two-pass Pro polish before evaluation; `max_attempts` 3 → 2** | +| `story/style_persona.py` | **Exp 6: Added `validate_persona()` — generates ~200-word sample, scores voice quality, rejects if < 7/10** | +| `cli/engine.py` | Imported `build_persona_info`; persona cached before writing loop; rebuilt after `refine_persona()`; outline validation gate; `chapter_position` passed to `write_chapter()`; **Exp 6: persona retries up to 3× until validation passes; Exp 5: `analyze_consistency()` every 10 chapters** | | `docs/current_state_analysis.md` | New: Phase mapping with cost analysis | | `docs/alternatives_analysis.md` | New: 15 alternative approaches with hypotheses | | `docs/experiment_design.md` | New: 7 controlled A/B experiment specifications | diff --git a/cli/engine.py b/cli/engine.py index 125d4cb..d45e327 100644 --- a/cli/engine.py +++ b/cli/engine.py @@ -50,9 +50,16 @@ def process_book(bp, folder, context="", resume=False, interactive=False): bp = planner.enrich(bp, folder, context) with open(bp_path, "w") as f: json.dump(bp, f, indent=2) - # Ensure Persona Exists (Auto-create if missing) + # Ensure Persona Exists (Auto-create + Exp 6: Validate before accepting) if 'author_details' not in bp['book_metadata'] or not bp['book_metadata']['author_details']: - bp['book_metadata']['author_details'] = style_persona.create_initial_persona(bp, folder) + max_persona_attempts = 3 + for persona_attempt in range(1, max_persona_attempts + 1): + candidate_persona = style_persona.create_initial_persona(bp, folder) + is_valid, p_score = style_persona.validate_persona(bp, candidate_persona, folder) + if is_valid or persona_attempt == max_persona_attempts: + bp['book_metadata']['author_details'] = candidate_persona + break + utils.log("SYSTEM", f" -> Persona attempt {persona_attempt}/{max_persona_attempts} scored {p_score}/10. Regenerating...") with open(bp_path, "w") as f: json.dump(bp, f, indent=2) except Exception as _e: utils.log("ERROR", f"Blueprint phase failed: {type(_e).__name__}: {_e}") @@ -268,6 +275,21 @@ def process_book(bp, folder, context="", resume=False, interactive=False): # Update Structured Story State (Item 9: Thread Tracking) current_story_state = story_state.update_story_state(txt, ch['chapter_number'], current_story_state, folder) + # Exp 5: Mid-gen Consistency Snapshot (every 10 chapters) + if len(ms) > 0 and len(ms) % 10 == 0: + utils.log("EDITOR", f"--- Mid-gen consistency check after chapter {ch['chapter_number']} ({len(ms)} written) ---") + try: + consistency = story_editor.analyze_consistency(bp, ms, folder) + issues = consistency.get('issues', []) + if issues: + for issue in issues: + utils.log("EDITOR", f" ⚠️ {issue}") + c_score = consistency.get('score', 'N/A') + c_summary = consistency.get('summary', '') + utils.log("EDITOR", f" Consistency score: {c_score}/10 — {c_summary}") + except Exception as _ce: + utils.log("EDITOR", f" Mid-gen consistency check failed (non-blocking): {_ce}") + # Dynamic Pacing Check (every other chapter) remaining = chapters[i+1:] if remaining and len(remaining) >= 2 and i % 2 == 1: diff --git a/story/style_persona.py b/story/style_persona.py index 8178c2a..5b9a1a8 100644 --- a/story/style_persona.py +++ b/story/style_persona.py @@ -104,6 +104,86 @@ def create_initial_persona(bp, folder): return {"name": "AI Author", "bio": "Standard, balanced writing style."} +def validate_persona(bp, persona_details, folder): + """Validate a newly created persona by generating a 200-word sample and scoring it. + + Experiment 6 (Iterative Persona Validation): generates a test passage in the + persona's voice and evaluates voice quality before accepting it. This front-loads + quality assurance so Phase 3 starts with a well-calibrated author voice. + + Returns (is_valid: bool, score: int). Threshold: score >= 7 → accepted. + """ + meta = bp.get('book_metadata', {}) + genre = meta.get('genre', 'Fiction') + tone = meta.get('style', {}).get('tone', 'balanced') + name = persona_details.get('name', 'Unknown Author') + bio = persona_details.get('bio', 'Standard style.') + + sample_prompt = f""" + ROLE: Fiction Writer + TASK: Write a 200-word opening scene that perfectly demonstrates this author's voice. + + AUTHOR_PERSONA: + Name: {name} + Style/Bio: {bio} + + GENRE: {genre} + TONE: {tone} + + RULES: + - Exactly ~200 words of prose (no chapter header, no commentary) + - Must reflect the persona's stated sentence structure, vocabulary, and voice + - Show, don't tell — no filter words (felt, saw, heard, realized, noticed) + - Deep POV: immerse the reader in a character's immediate experience + + OUTPUT: Prose only. + """ + try: + resp = ai_models.model_logic.generate_content(sample_prompt) + utils.log_usage(folder, ai_models.model_logic.name, resp.usage_metadata) + sample_text = resp.text + except Exception as e: + utils.log("SYSTEM", f" -> Persona validation sample failed: {e}. Accepting persona.") + return True, 7 + + # Lightweight scoring: focused on voice quality (not full 13-rubric) + score_prompt = f""" + ROLE: Literary Editor + TASK: Score this prose sample for author voice quality. + + EXPECTED_PERSONA: + {bio} + + SAMPLE: + {sample_text} + + CRITERIA: + 1. Does the prose reflect the stated author persona? (voice, register, sentence style) + 2. Is the prose free of filter words (felt, saw, heard, noticed, realized)? + 3. Is it deep POV — immediate, immersive, not distant narration? + 4. Is there genuine sentence variety and strong verb choice? + + SCORING (1-10): + - 8-10: Voice is distinct, matches persona, clean deep POV + - 6-7: Reasonable voice, minor filter word issues + - 1-5: Generic AI prose, heavy filter words, or persona not reflected + + OUTPUT_FORMAT (JSON): {{"score": int, "reason": "One sentence."}} + """ + try: + resp2 = ai_models.model_logic.generate_content(score_prompt) + utils.log_usage(folder, ai_models.model_logic.name, resp2.usage_metadata) + data = json.loads(utils.clean_json(resp2.text)) + score = int(data.get('score', 7)) + reason = data.get('reason', '') + is_valid = score >= 7 + utils.log("SYSTEM", f" -> Persona validation: {score}/10 {'✅ Accepted' if is_valid else '❌ Rejected'} — {reason}") + return is_valid, score + except Exception as e: + utils.log("SYSTEM", f" -> Persona scoring failed: {e}. Accepting persona.") + return True, 7 + + def refine_persona(bp, text, folder): utils.log("SYSTEM", "Refining Author Persona based on recent chapters...") ad = bp.get('book_metadata', {}).get('author_details', {}) diff --git a/story/writer.py b/story/writer.py index f85752a..8f60969 100644 --- a/story/writer.py +++ b/story/writer.py @@ -362,7 +362,51 @@ def write_chapter(chap, bp, folder, prev_sum, tracking=None, prev_content=None, utils.log("WRITER", f"⚠️ Failed Ch {chap['chapter_number']}: {e}") return f"## Chapter {chap['chapter_number']} Failed\n\nError: {e}" - max_attempts = 3 + # Exp 7: Two-Pass Drafting — Polish the rough draft with the logic (Pro) model + # before evaluation. Produces cleaner prose with fewer rewrite cycles. + if current_text: + utils.log("WRITER", f" -> Two-pass polish (Pro model)...") + guidelines = get_style_guidelines() + fw_list = '", "'.join(guidelines['filter_words']) + polish_prompt = f""" + ROLE: Senior Fiction Editor + TASK: Polish this rough draft into publication-ready prose. + + AUTHOR_VOICE: + {persona_info} + + GENRE: {genre} + TARGET_WORDS: ~{est_words} + BEATS (must all be covered): {json.dumps(chap.get('beats', []))} + + POLISH_CHECKLIST: + 1. FILTER_REMOVAL: Remove all filter words [{fw_list}] — rewrite each to show the sensation directly. + 2. DEEP_POV: Ensure the reader is inside the POV character's experience at all times — no external narration. + 3. ACTIVE_VOICE: Replace all 'was/were + -ing' constructions with active alternatives. + 4. SENTENCE_VARIETY: No two consecutive sentences starting with the same word. Vary length for rhythm. + 5. STRONG_VERBS: Delete adverbs; replace with precise verbs. + 6. NO_AI_ISMS: Remove: 'testament to', 'tapestry', 'palpable tension', 'azure', 'cerulean', 'bustling', 'a sense of'. + 7. CHAPTER_HOOK: Ensure the final paragraph ends on unresolved tension, a question, or a threat. + 8. PRESERVE: Keep all narrative beats, approximate word count (±15%), and chapter header. + + ROUGH_DRAFT: + {current_text} + + OUTPUT: Complete polished chapter in Markdown. + """ + try: + resp_polish = ai_models.model_logic.generate_content(polish_prompt) + utils.log_usage(folder, ai_models.model_logic.name, resp_polish.usage_metadata) + polished = resp_polish.text + if polished: + polished_words = len(polished.split()) + utils.log("WRITER", f" -> Polished: {polished_words:,} words.") + current_text = polished + except Exception as e: + utils.log("WRITER", f" -> Polish pass failed: {e}. Proceeding with raw draft.") + + # Reduced from 3 → 2 attempts since polish pass already refines prose before evaluation + max_attempts = 2 SCORE_AUTO_ACCEPT = 8 # Adaptive passing threshold: lenient for early setup chapters, strict for climax/resolution. # chapter_position=0.0 → setup (SCORE_PASSING=6.5), chapter_position=1.0 → climax (7.5)