diff --git a/index.html b/index.html index a527642..013442e 100644 --- a/index.html +++ b/index.html @@ -13,7 +13,7 @@ - +
diff --git a/src/components/plugin-blog/HeroSection.tsx b/src/components/plugin-blog/HeroSection.tsx index d52fa8d..c185283 100644 --- a/src/components/plugin-blog/HeroSection.tsx +++ b/src/components/plugin-blog/HeroSection.tsx @@ -1,4 +1,4 @@ -import { AnimatedSection } from '../common/AnimatedSection'; +import { AnimatedSection } from "../common/AnimatedSection"; export const HeroSection = () => (
@@ -10,7 +10,9 @@ export const HeroSection = () => (

How We Built AI-Powered Policy Analysis

-

The story of turning a general-purpose AI into a policy expert

+

+ The story of turning a general-purpose AI into a policy expert +

@@ -19,17 +21,12 @@ export const HeroSection = () => ( and multiple countries. Our ecosystem spans 40+ repositories, thousands of parameters, and a codebase where a single misread regulation can produce wrong numbers for millions of households. -

-

- - This is the story of how we turned a general-purpose AI into a - domain expert. - + Here’s how we taught Claude to navigate all of it.

See how you can use what we built → diff --git a/src/components/plugin-blog/TimelineSection.tsx b/src/components/plugin-blog/TimelineSection.tsx index c58cd88..885d687 100644 --- a/src/components/plugin-blog/TimelineSection.tsx +++ b/src/components/plugin-blog/TimelineSection.tsx @@ -1,5 +1,5 @@ -import type { ReactNode } from 'react'; -import { AnimatedSection } from '../common/AnimatedSection'; +import type { ReactNode } from "react"; +import { AnimatedSection } from "../common/AnimatedSection"; /* ---------- sub-components ---------- */ @@ -12,7 +12,7 @@ const PipelineStep = ({ }) => ( <> {label} @@ -22,75 +22,30 @@ const PipelineStep = ({ /* ---------- inline data ---------- */ -const ideas = [ +const takeaways = [ { - title: 'Documentation Pointers, Not Stale Examples', - paragraphs: [ - "Most knowledge bases rot. You write example code, the codebase evolves, and now your examples are wrong.", - 'We solved this by having skills point to live code in the active repository instead of hardcoding examples. When Claude reads a skill, it gets instructions like \u201clook at the current implementation in the relevant country model repo\u201d\u2014always fresh, always branch-aware, zero maintenance.', - ], + bold: "Point to live code, not stale examples.", + text: " Skills reference the active repo instead of hardcoding examples\u2014always fresh, zero maintenance.", }, { - title: 'Legal Code Is the Source of Truth', - paragraphs: [ - "When implementing a government benefit program, the temptation is to copy what another jurisdiction does and tweak it. Our agents are instructed differently: read the actual regulation first, understand what the law says, then implement exactly that. Pattern-matching across jurisdictions is a tool, not a shortcut.", - ], + bold: "The law is the source of truth.", + text: " Agents read the actual regulation first, not patterns from other jurisdictions.", }, { - title: 'Zero Hard-Coding', - paragraphs: [ - 'Every dollar amount, every threshold, every phase-out rate lives in a parameter file\u2014never as a magic number in code. This is what makes PolicyEngine work: you can simulate policy reforms by changing parameters alone. Our agents enforce this automatically.', - ], + bold: "Zero hard-coding.", + text: " Every dollar amount and threshold lives in a parameter file\u2014agents enforce this automatically.", }, { - title: 'Claude Policing Claude', - paragraphs: [ - "We use Claude Code hooks\u2014prompts that run before or after tool calls\u2014to enforce architectural rules. When Claude writes a file, another Claude prompt checks whether tax logic ended up somewhere it shouldn\u2019t be. If it did, the write gets blocked.", - "The same mechanism auto-detects which PolicyEngine repo you\u2019re in and routes you to the right specialized agents. Open Claude Code in any PolicyEngine repository and it knows which agents to load\u2014country models get the rules-engineer, the API repos get the api-reviewer, the frontend gets the app-reviewer.", - ], - }, -]; - -const lessons = [ - { - bold: 'Structure beats volume.', - text: ' A well-organized 200-line skill file is worth more than a 2,000-line knowledge dump. Claude works best when knowledge is modular and clearly scoped.', - }, - { - bold: 'Agents need constraints, not just capabilities.', - text: " The most impactful additions weren\u2019t new features\u2014they were guardrails. Regulatory checkpoints. Architecture enforcement. The rule that agents must read the law before writing code.", - }, - { - bold: 'Plugins are prompt engineering at scale.', - text: " You\u2019re not training a model. You\u2019re building a structured context that makes a general-purpose model behave like a domain expert. That\u2019s powerful and accessible\u2014anyone can do it.", - }, -]; - -const researcherCapabilities = [ - { - title: 'Population-Level Impact Analysis', - paragraphs: [ - 'The analysis-tools plugin turns Claude into a microsimulation analyst. Point it at any tax or benefit reform and it runs population-level analysis using PolicyEngine\u2019s weighted survey data\u2014covering income, demographics, and household structure for the entire US population.', - 'The result: cost estimates, revenue projections, and counts of who wins and who loses under a proposed change\u2014all generated from a plain-English description of a policy reform.', - ], - }, - { - title: 'Distributional and Inequality Analysis', - paragraphs: [ - 'Beyond aggregate numbers, Claude breaks down impacts by income decile, calculates changes to the Gini coefficient, and measures effects on poverty rates. You get the full distributional picture\u2014who bears the cost and who receives the benefit\u2014without writing a single line of analysis code.', - ], + bold: "Claude policing Claude.", + text: " Hooks check every file write against architectural rules and block violations.", }, { - title: 'Congressional District Analysis', - paragraphs: [ - 'Using geographic microdata from HuggingFace datasets, Claude can map reform impacts to every congressional district. This turns abstract national estimates into localized numbers that matter for legislative strategy and constituent communication.', - ], + bold: "Structure beats volume.", + text: " A 200-line skill file outperforms a 2,000-line knowledge dump.", }, { - title: 'Dashboards and Visualizations', - paragraphs: [ - 'Claude doesn\u2019t just compute numbers\u2014it builds interactive tools. Streamlit dashboards, Plotly charts, and household calculators that let stakeholders explore reform scenarios themselves. The analysis becomes a shareable, interactive product.', - ], + bold: "Plugins are prompt engineering at scale.", + text: " You\u2019re building structured context, not training a model.", }, ]; @@ -104,54 +59,33 @@ interface Milestone { const milestones: Milestone[] = [ { - date: 'Feb 2025', - title: 'Claude Code first release \u2014 we start using it', + date: "Feb 2025", + title: "Claude Code first release \u2014 we start using it", body: ( <>

Anthropic releases Claude Code, a CLI coding agent. We adopt it immediately and discover it’s remarkably useful for - research—like having the Claude web chat but right in your - terminal. We stop switching to the browser and start doing everything - from the command line. + research—having the Claude web chat but right in your terminal. + We slowly migrate to the command line from the browser.

- For coding, the early wins are the repetitive tasks nobody wants to - do by hand: renaming files, updating import paths, bulk reformatting. - Claude handles them instantly. It’s not writing policy logic - yet—but it’s already saving hours a week. + For coding, the early wins are the repetitive tasks: renaming files, + updating import paths, bulk reformatting. Claude handles them + instantly. It’s not writing policy logic yet—but + it’s already saving hours a week.

), }, { - date: 'Oct 2025', - title: 'Claude Code launches with plugin support', - body: ( - <> -

- Anthropic adds an open plugin architecture to Claude Code. Plugins - let you package domain knowledge, custom agents, and automated - workflows into something Claude loads at runtime. No fine-tuning, no - training data. -

-

- We see the potential immediately. PolicyEngine models tax and benefit - policy across 40+ repositories, thousands of parameters, and dozens - of government programs. Every implementation has to match real - legislation. We need an AI that can navigate all of it. -

- - ), - }, - { - date: 'Aug 2025', - title: 'First experiments \u2014 and first failures', + date: "Apr 2025", + title: "First experiments \u2014 and first failures", body: ( <>

We point Claude at our codebase and start asking it to implement - government programs. The results are rough. It hardcodes dollar + government programs. The results are mixed. It hardcodes dollar amounts that should live in parameter files. It mixes up federal and state logic. It skips regulatory sources and guesses at eligibility rules. @@ -160,63 +94,83 @@ const milestones: Milestone[] = [ A typical failure: Claude implements a state TANF program by copying patterns from another state, changing a few numbers, and calling it done. The code compiles, the tests pass—but the income - thresholds are wrong because it never read the actual state - regulation. + thresholds are wrong because it never read the primary sources.

The model is capable. The problem is context. Claude doesn’t - know how PolicyEngine is structured, what our conventions are, or - that the law is the source of truth—not existing code. + know how PolicyEngine is structured, what our conventions are, or that + the law is the source of truth.

), }, { - date: 'Oct 2025', - title: 'Skills and agents take shape', + date: "Aug 2025", + title: "Skills and agents take shape", body: ( <>

We start writing skill files—structured documents Claude reads at runtime. Variable naming conventions. - Parameter file structures. How PolicyEngine’s tax-benefit - logic is organized. How to properly use adds and{' '} - subtracts annotations. Which patterns to follow and - which to avoid. + Parameter file structures. How PolicyEngine’s tax-benefit logic + is organized. How to properly use adds and{" "} + subtracts annotations. Which patterns to follow and which + to avoid.

- Then come specialized agents. A{' '} + Then come specialized agents. A{" "} rules-engineer that knows how to implement tax - variables—it reads the regulation, creates parameter files - with proper metadata, and writes vectorized formulas. A{' '} + variables—it reads the regulation, creates parameter files with + proper metadata, and writes vectorized formulas. A{" "} test-creator that builds integration tests from real - household scenarios. A document-collector that - researches government regulations and extracts the specific - provisions needed before anyone writes a line of code. + household scenarios. A document-collector that researches + government regulations and extracts the specific provisions needed + before anyone writes a line of code.

- The difference is immediate. Claude stops guessing. It reads the - skill file, understands the convention, and follows the pattern. - Error rates on parameter structure drop to near zero. + The difference is immediate. Claude stops guessing. It reads the skill + file, understands the convention, and follows the pattern. Error rates + on parameter structure drop dramatically.

), }, { - date: 'Dec 2025', - title: 'Orchestrated commands chain it all together', + date: "Oct 2025", + title: "Claude Code launches with plugin support", + body: ( + <> +

+ Anthropic adds an open plugin architecture to Claude Code. Plugins let + you package domain knowledge, custom agents, and automated workflows + into something Claude loads at runtime. No fine-tuning, no training + data. +

+

+ This changes everything. The skills and agents we’ve been + building since August can now be packaged as a proper + plugin—portable, versioned, and shareable. We consolidate our + scattered skill files into a single plugin that loads automatically in + any PolicyEngine repository. +

+ + ), + }, + { + date: "Dec 2025", + title: "Orchestrated commands chain it all together", body: ( <>

Individual agents are useful, but the real power comes from chaining - them. We build orchestrated commands—multi-agent - pipelines that run end-to-end workflows.{' '} + them. We build orchestrated commands + —multi-agent pipelines that run end-to-end workflows.{" "} /encode-policy is the flagship: it takes a government program from legal text to working, tested code.

- We also build /review-pr for automated code review,{' '} - /fix-pr to resolve CI failures, and{' '} + We also build /review-pr for automated code review,{" "} + /fix-pr to resolve CI failures, and{" "} /audit-state-tax for tax implementation audits. Each command encodes a workflow our team runs daily—now automated with guardrails built in. @@ -225,39 +179,38 @@ const milestones: Milestone[] = [ ), }, { - date: 'Feb 2026', - title: 'A new level', + date: "Feb 2026", + title: "Agent teams unlock parallel research", body: ( <>

- Opus 4.6 launched with agent teams—multiple agents - collaborating in parallel within a single session. With{' '} - /encode-policy already working well, agent teams - unlocked workflows that weren’t feasible before. The biggest: - backdating historical policy. + Opus 4.6 launched with agent teams—multiple agents collaborating + in parallel within a single session. With /encode-policy{" "} + already working well, agent teams unlocked workflows that + weren’t feasible before. The biggest: backdating historical + policy.

- Agent teams solved this by splitting the work: a discovery agent - finds historical PDFs, prep agents download and render them, and - multiple research agents read different documents in - parallel—communicating directly with each other, not through - a central coordinator. This made /backdate-policy{' '} - possible and dramatically expanded what a single session could - accomplish. + Agent teams solved this by splitting the work: a discovery agent finds + historical PDFs, prep agents download and render them, and multiple + research agents read different documents in + parallel—communicating directly with each other, not through a + central coordinator. This made /backdate-policy possible + and dramatically expanded what a single session could accomplish.

), }, { - date: 'Today', - title: 'Still building', + date: "Today", + title: "Still building", body: (

- These numbers keep growing. We’re still building new skills, - agents, and commands every week—and not just for coding - policy. New workflows cover writing policy analysis, building - interactive dashboards, and generating content. The plugin is - expanding beyond implementation into every part of how we work. + We’re still building new skills, agents, and commands every + week—and not just for coding policy. New workflows cover writing + policy analysis, building interactive dashboards, and generating + content. The plugin is expanding beyond implementation into every part + of how we work.

), }, @@ -284,7 +237,9 @@ export const TimelineSection = () => { >
- {i < milestones.length - 1 &&
} + {i < milestones.length - 1 && ( +
+ )}
{m.date} @@ -301,7 +256,7 @@ export const TimelineSection = () => {
-

What We Built

+

The Pipeline

@@ -310,10 +265,10 @@ export const TimelineSection = () => {

- Our most complex workflow chains 6+ agents across - 8 phases—from reading the law to pushing a validated PR.{' '} + Our most complex workflow chains 6+ agents across 8 + phases—from reading the law to pushing a validated PR.{" "} @@ -341,64 +296,23 @@ export const TimelineSection = () => {

- {/* ===== WHAT RESEARCHERS CAN DO ===== */} + {/* ===== TAKEAWAYS ===== */}
-

What Researchers Can Do

-

- Beyond building the plugin, we focused on what policy researchers - actually need from an AI assistant. See our{' '} - - multi-agent AI workflow post - {' '} - for a deep dive into how these capabilities work in practice. -

+

What Made It Work

- {researcherCapabilities.map((cap) => ( - -

{cap.title}

- {cap.paragraphs.map((p, i) => ( -

{p}

- ))} -
- ))} -
-
- - {/* ===== IDEAS + LESSONS ===== */} -
-
-

The Ideas That Made It Work

-
- - {ideas.map((idea) => ( - -

{idea.title}

- {idea.paragraphs.map((p, i) => ( -

{p}

+
+ {takeaways.map((t) => ( +
+ {t.bold} + {t.text} +
))} - - ))} - - -

What We Learned

+
- - {lessons.map((l) => ( - -

- {l.bold} - {l.text} -

-
- ))}
@@ -407,16 +321,23 @@ export const TimelineSection = () => {

Try it yourself

-

- We built this for ourselves and we’re making it public. - See what the plugin can do, or explore the source code. +

+ We built this for ourselves and we’re making it public. See + what the plugin can do, or explore the source code.

-
+
See what it can do diff --git a/src/pages/PluginBlog.css b/src/pages/PluginBlog.css index 17a269a..adefeca 100644 --- a/src/pages/PluginBlog.css +++ b/src/pages/PluginBlog.css @@ -19,7 +19,7 @@ --radius: 8px; --radius-sm: 4px; - font-family: 'Roboto', sans-serif; + font-family: 'Inter', sans-serif; font-size: 1rem; font-weight: 400; line-height: 1.75; @@ -53,7 +53,7 @@ } .pb code { - font-family: 'Roboto Mono', monospace; + font-family: 'JetBrains Mono', monospace; font-size: 0.85em; background: var(--teal-bg); color: var(--teal); @@ -75,7 +75,7 @@ /* ===== Utility ===== */ .pb .spaced-sans { - font-family: 'Roboto', sans-serif; + font-family: 'Inter', sans-serif; font-weight: 500; font-size: 13px; letter-spacing: 2px; @@ -149,7 +149,7 @@ } .pb .hero h1 { - font-family: 'Roboto Serif', serif; + font-family: 'Inter', sans-serif; font-weight: 700; font-size: clamp(2.2rem, 5vw, 3.2rem); line-height: 1.15; @@ -159,7 +159,7 @@ } .pb .hero-subtitle { - font-family: 'Roboto Serif', serif; + font-family: 'Inter', sans-serif; font-weight: 400; font-size: 1.2rem; color: var(--teal); @@ -184,7 +184,7 @@ /* ===== HEADINGS ===== */ .pb h2 { - font-family: 'Roboto Serif', serif; + font-family: 'Inter', sans-serif; font-weight: 700; font-size: clamp(1.7rem, 3.5vw, 2.1rem); color: var(--dark); @@ -293,7 +293,7 @@ } .pb .cap-count { - font-family: 'Roboto', sans-serif; + font-family: 'Inter', sans-serif; font-size: 3rem; font-weight: 700; color: var(--teal-light); @@ -338,7 +338,7 @@ } .pb .pipeline-step { - font-family: 'Roboto Mono', monospace; + font-family: 'JetBrains Mono', monospace; font-size: 0.75rem; padding: 8px 14px; border-radius: var(--radius-sm); @@ -415,7 +415,7 @@ } .pb .timeline-content h3 { - font-family: 'Roboto Serif', serif; + font-family: 'Inter', sans-serif; font-weight: 600; font-size: 1.15rem; color: var(--dark); @@ -497,28 +497,38 @@ margin-bottom: 0; } -/* ===== LESSONS ===== */ -.pb .lesson { - margin-bottom: 20px; - padding: 28px 32px; +/* ===== TAKEAWAY CARDS ===== */ +.pb .takeaway-grid { + display: grid; + grid-template-columns: repeat(3, 1fr); + gap: 20px; + margin-top: 8px; +} + +.pb .takeaway-card { + padding: 24px; background: var(--white); - border-left: 3px solid var(--teal); border-radius: var(--radius); + border: 1px solid var(--border); box-shadow: 0 1px 4px rgba(0, 0, 0, 0.04); + transition: box-shadow 0.2s ease; } -.pb .lesson:last-child { - margin-bottom: 0; +.pb .takeaway-card:hover { + box-shadow: 0 4px 12px rgba(0, 0, 0, 0.08); } -.pb .lesson strong { +.pb .takeaway-card strong { + display: block; color: var(--dark); font-weight: 700; + margin-bottom: 6px; } -.pb .lesson p { - color: var(--text); - line-height: 1.75; +.pb .takeaway-card span { + color: var(--text-secondary); + font-size: 0.95rem; + line-height: 1.6; } /* ===== CTA ===== */ @@ -526,7 +536,7 @@ display: inline-flex; align-items: center; justify-content: center; - font-family: 'Roboto', sans-serif; + font-family: 'Inter', sans-serif; font-size: 14px; font-weight: 500; letter-spacing: 2px; @@ -551,7 +561,7 @@ text-align: center; padding: 24px; color: rgba(255, 255, 255, 0.4); - font-family: 'Roboto', sans-serif; + font-family: 'Inter', sans-serif; font-size: 0.8rem; letter-spacing: 1px; } @@ -569,6 +579,10 @@ grid-template-columns: 1fr; gap: 16px; } + + .pb .takeaway-grid { + grid-template-columns: repeat(2, 1fr); + } } @media (max-width: 600px) { @@ -590,7 +604,7 @@ padding: 6px 10px; } - .pb .lesson { - padding: 20px; + .pb .takeaway-grid { + grid-template-columns: 1fr; } } diff --git a/src/pages/PluginBlog.tsx b/src/pages/PluginBlog.tsx index d0f40d4..3445882 100644 --- a/src/pages/PluginBlog.tsx +++ b/src/pages/PluginBlog.tsx @@ -6,6 +6,6 @@ export const PluginBlog = () => (
- +
);