diff --git a/changelog_entry.yaml b/changelog_entry.yaml
index e69de29bb..e4c231aa2 100644
--- a/changelog_entry.yaml
+++ b/changelog_entry.yaml
@@ -0,0 +1,22 @@
+- bump: minor
+  changes:
+    added:
+    - Census-block-first calibration pipeline (calibration/ package) ported from PR #516
+    - Clone-and-assign module for population-weighted census block sampling
+    - Unified matrix builder with clone-by-clone simulation, COO caching, and target_overview-based querying
+    - Unified calibration CLI with L0 optimization and seeded takeup re-randomization
+    - 28 new tests for the calibration pipeline
+    - Integration test for build_matrix geographic masking (national/state/CD)
+    - Tests for drop_target_groups utility
+    - voluntary_filing.yaml takeup rate parameter
+    changed:
+    - Rewrote local_area_calibration_setup.ipynb for clone-based pipeline
+    - Renamed _get_geo_level to get_geo_level (now cross-module public API)
+    fixed:
+    - Fix Jupyter import error in unified_calibration.py (OutStream.reconfigure moved to main)
+    - Fix modal_app/remote_calibration_runner.py referencing deleted fit_calibration_weights.py
+    - Fix _coo_parts stale state bug on build_matrix re-call after failure
+    - Remove hardcoded voluntary_filing rate in favor of YAML parameter
+    removed:
+    - SparseMatrixBuilder, MatrixTracer, and fit_calibration_weights (replaced by unified pipeline)
+    - 8 old SparseMatrixBuilder-dependent tests (replaced by new test_calibration suite)
diff --git a/docs/calibration_matrix.ipynb b/docs/calibration_matrix.ipynb
index b7edbe507..41497b1e8 100644
--- a/docs/calibration_matrix.ipynb
+++ b/docs/calibration_matrix.ipynb
@@ -6,11 +6,13 @@
    "source": [
     "# The Calibration Matrix\n",
     "\n",
-    "The calibration pipeline has three stages: (1) compute uprated target values ([`hierarchical_uprating.ipynb`](hierarchical_uprating.ipynb)), (2) assemble the sparse constraint matrix (this notebook), and (3) optimize weights ([`fit_calibration_weights.py`](../policyengine_us_data/datasets/cps/local_area_calibration/fit_calibration_weights.py)). This notebook is the diagnostic checkpoint between stages 1 and 2 — understand your matrix before you optimize.\n",
+    "The calibration pipeline has three stages: (1) compute uprated target values, (2) assemble the sparse constraint matrix (this notebook), and (3) optimize weights (`unified_calibration.py`). This notebook is the diagnostic checkpoint between stages 1 and 2 — understand your matrix before you optimize.\n",
     "\n",
-    "We build the full calibration matrix using `SparseMatrixBuilder`, then use `MatrixTracer` to inspect its structure: what rows and columns represent, how target groups partition the loss function, and where sparsity patterns emerge.\n",
+    "We build the full calibration matrix using `UnifiedMatrixBuilder` with clone-based geography from `assign_random_geography`, then inspect its structure: what rows and columns represent, how target groups partition the loss function, and where sparsity patterns emerge.\n",
     "\n",
-    "**Requirements:** `policy_data.db` and the stratified CPS h5 file in `STORAGE_FOLDER`."
+    "**Column layout:** `col = clone_idx * n_records + record_idx`\n",
+    "\n",
+    "**Requirements:** `policy_data.db`, `block_cd_distributions.csv.gz`, and the stratified CPS h5 file in `STORAGE_FOLDER`."
    ]
   },
   {
@@ -22,39 +24,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/baogorek/envs/sep/lib/python3.13/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    }
-   ],
-   "source": [
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "from policyengine_us import Microsimulation\n",
-    "from policyengine_us_data.storage import STORAGE_FOLDER\n",
-    "from policyengine_us_data.datasets.cps.local_area_calibration.sparse_matrix_builder import (\n",
-    "    SparseMatrixBuilder,\n",
-    ")\n",
-    "from policyengine_us_data.datasets.cps.local_area_calibration.calibration_utils import (\n",
-    "    get_all_cds_from_database,\n",
-    "    create_target_groups,\n",
-    "    STATE_CODES,\n",
-    ")\n",
-    "from policyengine_us_data.datasets.cps.local_area_calibration.matrix_tracer import (\n",
-    "    MatrixTracer,\n",
-    ")\n",
-    "\n",
-    "db_path = STORAGE_FOLDER / \"calibration\" / \"policy_data.db\"\n",
-    "db_uri = f\"sqlite:///{db_path}\"\n",
-    "dataset_path = STORAGE_FOLDER / \"stratified_extended_cps_2024.h5\""
-   ]
+   "outputs": [],
+   "source": "import numpy as np\nimport pandas as pd\nfrom policyengine_us import Microsimulation\nfrom policyengine_us_data.storage import STORAGE_FOLDER\nfrom policyengine_us_data.calibration.unified_matrix_builder import (\n    UnifiedMatrixBuilder,\n)\nfrom policyengine_us_data.calibration.clone_and_assign import (\n    assign_random_geography,\n)\nfrom policyengine_us_data.datasets.cps.local_area_calibration.calibration_utils import (\n    create_target_groups,\n    drop_target_groups,\n    get_geo_level,\n    STATE_CODES,\n)\n\ndb_path = STORAGE_FOLDER / \"calibration\" / \"policy_data.db\"\ndb_uri = f\"sqlite:///{db_path}\"\ndataset_path = STORAGE_FOLDER / \"stratified_extended_cps_2024.h5\""
   },
   {
    "cell_type": "code",
@@ -65,32 +38,34 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Matrix shape: (1411, 5231564)\n",
-      "Non-zero entries: 2,199,033\n"
+      "Records: 11,999, Clones: 3, Total columns: 35,997\n",
+      "Matrix shape: (1411, 35997)\n",
+      "Non-zero entries: 14,946\n"
      ]
     }
    ],
    "source": [
     "sim = Microsimulation(dataset=str(dataset_path))\n",
-    "cds_to_calibrate = get_all_cds_from_database(db_uri)\n",
+    "n_records = sim.calculate(\"household_id\", map_to=\"household\").values.shape[0]\n",
+    "\n",
+    "N_CLONES = 3  # keep small for diagnostics\n",
+    "geography = assign_random_geography(n_records, n_clones=N_CLONES, seed=42)\n",
     "\n",
-    "builder = SparseMatrixBuilder(\n",
+    "builder = UnifiedMatrixBuilder(\n",
     "    db_uri=db_uri,\n",
     "    time_period=2024,\n",
-    "    cds_to_calibrate=cds_to_calibrate,\n",
     "    dataset_path=str(dataset_path),\n",
     ")\n",
     "\n",
-    "targets_df, X_sparse, household_id_mapping = builder.build_matrix(\n",
+    "targets_df, X_sparse, target_names = builder.build_matrix(\n",
+    "    geography,\n",
     "    sim,\n",
     "    target_filter={\"domain_variables\": [\"aca_ptc\", \"snap\"]},\n",
     "    hierarchical_domains=[\"aca_ptc\", \"snap\"],\n",
     ")\n",
     "\n",
-    "tracer = MatrixTracer(\n",
-    "    targets_df, X_sparse, household_id_mapping, cds_to_calibrate, sim\n",
-    ")\n",
-    "\n",
+    "n_total = n_records * N_CLONES\n",
+    "print(f\"Records: {n_records:,}, Clones: {N_CLONES}, Total columns: {n_total:,}\")\n",
     "print(f\"Matrix shape: {X_sparse.shape}\")\n",
     "print(f\"Non-zero entries: {X_sparse.nnz:,}\")"
    ]
@@ -104,91 +79,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "================================================================================\n",
-      "MATRIX STRUCTURE BREAKDOWN\n",
-      "================================================================================\n",
-      "\n",
-      "Matrix dimensions: 1411 rows x 5231564 columns\n",
-      "  Rows = 1411 targets\n",
-      "  Columns = 11999 households x 436 CDs\n",
-      "           = 11,999 x 436 = 5,231,564\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "COLUMN STRUCTURE (Households stacked by CD)\n",
-      "--------------------------------------------------------------------------------\n",
-      "\n",
-      "Showing first and last 5 CDs of 436 total:\n",
-      "\n",
-      "First 5 CDs:\n",
-      "cd_geoid  start_col  end_col  n_households\n",
-      "    1001          0    11998         11999\n",
-      "     101      11999    23997         11999\n",
-      "     102      23998    35996         11999\n",
-      "     103      35997    47995         11999\n",
-      "     104      47996    59994         11999\n",
-      "\n",
-      "Last 5 CDs:\n",
-      "cd_geoid  start_col  end_col  n_households\n",
-      "     901    5171569  5183567         11999\n",
-      "     902    5183568  5195566         11999\n",
-      "     903    5195567  5207565         11999\n",
-      "     904    5207566  5219564         11999\n",
-      "     905    5219565  5231563         11999\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "ROW STRUCTURE (Targets)\n",
-      "--------------------------------------------------------------------------------\n",
-      "\n",
-      "Total targets: 1411\n",
-      "\n",
-      "Targets by domain variable:\n",
-      "                 n_targets  n_unique_vars\n",
-      "domain_variable                          \n",
-      "aca_ptc                873              3\n",
-      "snap                   538              2\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "TARGET GROUPS (for loss calculation)\n",
-      "--------------------------------------------------------------------------------\n",
-      "\n",
-      "=== Creating Target Groups ===\n",
-      "\n",
-      "National targets:\n",
-      "  Group 0: ACA PTC Person Count = 19,743,689\n",
-      "\n",
-      "State targets:\n",
-      "  Group 1: SNAP Household Count (51 targets)\n",
-      "  Group 2: Snap (51 targets)\n",
-      "\n",
-      "District targets:\n",
-      "  Group 3: Aca Ptc (436 targets)\n",
-      "  Group 4: ACA PTC Tax Unit Count (436 targets)\n",
-      "  Group 5: SNAP Household Count (436 targets)\n",
-      "\n",
-      "Total groups created: 6\n",
-      "========================================\n",
-      "  Group 0: National ACA PTC Person Count (1 target, value=19,743,689) - rows [0]\n",
-      "  Group 1: State SNAP Household Count (51 targets) - rows [1, 2, 3, ..., 50, 51]\n",
-      "  Group 2: State Snap (51 targets) - rows [52, 53, 54, ..., 101, 102]\n",
-      "  Group 3: District Aca Ptc (436 targets) - rows [103, 104, 105, ..., 537, 538]\n",
-      "  Group 4: District ACA PTC Tax Unit Count (436 targets) - rows [975, 976, 977, ..., 1409, 1410]\n",
-      "  Group 5: District SNAP Household Count (436 targets) - rows [539, 540, 541, ..., 973, 974]\n",
-      "\n",
-      "================================================================================\n"
-     ]
-    }
-   ],
-   "source": [
-    "tracer.print_matrix_structure()"
-   ]
+   "outputs": [],
+   "source": "print(f\"Targets: {X_sparse.shape[0]}\")\nprint(f\"Columns: {X_sparse.shape[1]:,} ({N_CLONES} clones x {n_records:,} records)\")\nprint(f\"Non-zeros: {X_sparse.nnz:,}\")\nprint(f\"Density: {X_sparse.nnz / (X_sparse.shape[0] * X_sparse.shape[1]):.6f}\")\n\ngeo_levels = targets_df[\"geographic_id\"].apply(get_geo_level)\nlevel_names = {0: \"National\", 1: \"State\", 2: \"District\"}\nfor level in [0, 1, 2]:\n    n = (geo_levels == level).sum()\n    if n > 0:\n        print(f\"  {level_names[level]}: {n} targets\")"
   },
   {
    "cell_type": "markdown",
@@ -196,7 +90,7 @@
    "source": [
     "## 3. Anatomy of a row\n",
     "\n",
-    "Each row is one calibration target — a known aggregate (dollar total, household count, person count) that the optimizer tries to match. The row vector's non-zero entries identify which (household, CD) pairs can contribute to that target."
+    "Each row is one calibration target — a known aggregate (dollar total, household count, person count) that the optimizer tries to match. The row vector's non-zero entries identify which cloned records can contribute to that target."
    ]
   },
   {
@@ -208,23 +102,24 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Row 705:\n",
-      "  row_index: 705\n",
+      "Row 705: cd_3402/household_count/[snap>0]\n",
       "  variable: household_count\n",
-      "  variable_desc: Households represented\n",
       "  geographic_id: 3402\n",
-      "  target_value: 48652.0536866581\n",
-      "  stratum_id: 9625\n",
-      "  domain_variable: snap\n"
+      "  geo_level: district\n",
+      "  target value: 48,652\n",
+      "  uprating_factor: 1.0\n"
      ]
     }
    ],
    "source": [
     "mid_row = X_sparse.shape[0] // 2\n",
-    "row_info = tracer.get_row_info(mid_row)\n",
-    "print(f\"Row {mid_row}:\")\n",
-    "for k, v in row_info.items():\n",
-    "    print(f\"  {k}: {v}\")"
+    "row = targets_df.iloc[mid_row]\n",
+    "print(f\"Row {mid_row}: {target_names[mid_row]}\")\n",
+    "print(f\"  variable: {row['variable']}\")\n",
+    "print(f\"  geographic_id: {row['geographic_id']}\")\n",
+    "print(f\"  geo_level: {row['geo_level']}\")\n",
+    "print(f\"  target value: {row['value']:,.0f}\")\n",
+    "print(f\"  uprating_factor: {row.get('uprating_factor', 'N/A')}\")"
    ]
   },
   {
@@ -236,21 +131,16 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Row 705 has 1,841 non-zero columns\n",
-      "\n",
-      "First non-zero column (1991877):\n",
-      "  column_index: 1991877\n",
-      "  cd_geoid: 3402\n",
-      "  household_id: 952\n",
-      "  household_index: 43\n",
-      "\n",
-      "Last non-zero column (2003831):\n",
-      "  column_index: 2003831\n",
+      "Row 705 has 9 non-zero columns\n",
+      "  Spans 3 clone(s)\n",
+      "  Spans 9 unique record(s)\n",
+      "\n",
+      "First non-zero column (8000):\n",
+      "  clone_idx: 0\n",
+      "  record_idx: 8000\n",
+      "  state_fips: 34\n",
       "  cd_geoid: 3402\n",
-      "  household_id: 177860\n",
-      "  household_index: 11997\n",
-      "\n",
-      "Spans 1 CD(s)\n"
+      "  value: 1.00\n"
      ]
     }
    ],
@@ -260,19 +150,18 @@
     "print(f\"Row {mid_row} has {len(nz_cols):,} non-zero columns\")\n",
     "\n",
     "if len(nz_cols) > 0:\n",
-    "    first_col = tracer.get_column_info(nz_cols[0])\n",
-    "    last_col = tracer.get_column_info(nz_cols[-1])\n",
-    "    print(f\"\\nFirst non-zero column ({nz_cols[0]}):\")\n",
-    "    for k, v in first_col.items():\n",
-    "        print(f\"  {k}: {v}\")\n",
-    "    print(f\"\\nLast non-zero column ({nz_cols[-1]}):\")\n",
-    "    for k, v in last_col.items():\n",
-    "        print(f\"  {k}: {v}\")\n",
-    "\n",
-    "    unique_cds = set(\n",
-    "        tracer.get_column_info(c)[\"cd_geoid\"] for c in nz_cols\n",
-    "    )\n",
-    "    print(f\"\\nSpans {len(unique_cds)} CD(s)\")"
+    "    clone_indices = nz_cols // n_records\n",
+    "    record_indices = nz_cols % n_records\n",
+    "    print(f\"  Spans {len(np.unique(clone_indices))} clone(s)\")\n",
+    "    print(f\"  Spans {len(np.unique(record_indices))} unique record(s)\")\n",
+    "\n",
+    "    first_col = nz_cols[0]\n",
+    "    print(f\"\\nFirst non-zero column ({first_col}):\")\n",
+    "    print(f\"  clone_idx: {first_col // n_records}\")\n",
+    "    print(f\"  record_idx: {first_col % n_records}\")\n",
+    "    print(f\"  state_fips: {geography.state_fips[first_col]}\")\n",
+    "    print(f\"  cd_geoid: {geography.cd_geoid[first_col]}\")\n",
+    "    print(f\"  value: {X_sparse[mid_row, first_col]:.2f}\")"
    ]
   },
   {
@@ -281,9 +170,9 @@
    "source": [
     "## 4. Anatomy of a column\n",
     "\n",
-    "Each column represents one (household, CD) pair. The columns are organized in blocks: the first `n_households` columns belong to CD 1, the next to CD 2, and so on. The block formula is:\n",
+    "Each column represents one (record, clone) pair. Columns are organized in clone blocks: the first `n_records` columns belong to clone 0, the next to clone 1, and so on. The block formula is:\n",
     "\n",
-    "$$\\text{column\\_idx} = \\text{cd\\_block} \\times n_{\\text{households}} + \\text{hh\\_index}$$"
+    "$$\\text{column\\_idx} = \\text{clone\\_idx} \\times n_{\\text{records}} + \\text{record\\_idx}$$"
    ]
   },
   {
@@ -295,22 +184,27 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Column 60037:\n",
-      "  column_index: 60037\n",
-      "  cd_geoid: 105\n",
-      "  household_id: 946\n",
-      "  household_index: 42\n",
+      "Column 12041:\n",
+      "  clone_idx: 1\n",
+      "  record_idx: 42\n",
+      "  state_fips: 45\n",
+      "  cd_geoid: 4507\n",
+      "  block_geoid: 450510801013029\n",
       "\n",
       "This column has non-zero values in 0 target rows\n"
      ]
     }
    ],
    "source": [
-    "col_idx = tracer.n_households * 5 + 42\n",
-    "col_info = tracer.get_column_info(col_idx)\n",
+    "col_idx = 1 * n_records + 42  # clone 1, record 42\n",
+    "clone_idx = col_idx // n_records\n",
+    "record_idx = col_idx % n_records\n",
     "print(f\"Column {col_idx}:\")\n",
-    "for k, v in col_info.items():\n",
-    "    print(f\"  {k}: {v}\")\n",
+    "print(f\"  clone_idx: {clone_idx}\")\n",
+    "print(f\"  record_idx: {record_idx}\")\n",
+    "print(f\"  state_fips: {geography.state_fips[col_idx]}\")\n",
+    "print(f\"  cd_geoid: {geography.cd_geoid[col_idx]}\")\n",
+    "print(f\"  block_geoid: {geography.block_geoid[col_idx]}\")\n",
     "\n",
     "col_vec = X_sparse[:, col_idx]\n",
     "nz_rows = col_vec.nonzero()[0]\n",
@@ -318,10 +212,10 @@
     "if len(nz_rows) > 0:\n",
     "    print(\"First 5 target rows:\")\n",
     "    for r in nz_rows[:5]:\n",
-    "        ri = tracer.get_row_info(r)\n",
+    "        row = targets_df.iloc[r]\n",
     "        print(\n",
-    "            f\"  row {r}: {ri['variable']} \"\n",
-    "            f\"(geo={ri['geographic_id']}, \"\n",
+    "            f\"  row {r}: {row['variable']} \"\n",
+    "            f\"(geo={row['geographic_id']}, \"\n",
     "            f\"val={X_sparse[r, col_idx]:.2f})\"\n",
     "        )"
    ]
@@ -335,16 +229,16 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Block formula verified: cd_block=5 * n_hh=11999 + hh_idx=42 = 60037\n"
+      "Block formula verified: clone_idx=1 * n_records=11999 + record_idx=42 = 12041\n"
      ]
     }
    ],
    "source": [
-    "expected_col = 5 * tracer.n_households + 42\n",
+    "expected_col = 1 * n_records + 42\n",
     "assert col_idx == expected_col, f\"{col_idx} != {expected_col}\"\n",
     "print(\n",
     "    f\"Block formula verified: \"\n",
-    "    f\"cd_block=5 * n_hh={tracer.n_households} + hh_idx=42 = {expected_col}\"\n",
+    "    f\"clone_idx=1 * n_records={n_records} + record_idx=42 = {expected_col}\"\n",
     ")"
    ]
   },
@@ -424,30 +318,30 @@
      "text": [
       "\n",
       "--- Group 0: National ACA PTC Person Count (1 target, value=19,743,689) ---\n",
-      " row_index     variable      variable_desc geographic_id  target_value  stratum_id domain_variable\n",
-      "         0 person_count People represented            US    19743689.0         491         aca_ptc\n",
+      "    variable geographic_id      value\n",
+      "person_count            US 19743689.0\n",
       "\n",
       "--- Group 2: State Snap (51 targets) ---\n",
-      " row_index variable  variable_desc geographic_id  target_value  stratum_id domain_variable\n",
-      "        52     snap SNAP allotment             1  1733693703.0        9330            snap\n",
-      "        53     snap SNAP allotment            10   254854243.0        9337            snap\n",
-      "        54     snap SNAP allotment            11   319119173.0        9338            snap\n",
-      "        55     snap SNAP allotment            12  6604797454.0        9339            snap\n",
-      "        56     snap SNAP allotment            13  3281329856.0        9340            snap\n",
-      "        57     snap SNAP allotment            15   731331421.0        9341            snap\n",
-      "        58     snap SNAP allotment            16   281230283.0        9342            snap\n",
-      "        59     snap SNAP allotment            17  4469341818.0        9343            snap\n",
+      "variable geographic_id        value\n",
+      "    snap             1 1733693703.0\n",
+      "    snap            10  254854243.0\n",
+      "    snap            11  319119173.0\n",
+      "    snap            12 6604797454.0\n",
+      "    snap            13 3281329856.0\n",
+      "    snap            15  731331421.0\n",
+      "    snap            16  281230283.0\n",
+      "    snap            17 4469341818.0\n",
       "\n",
       "--- Group 4: District ACA PTC Tax Unit Count (436 targets) ---\n",
-      " row_index       variable         variable_desc geographic_id  target_value  stratum_id domain_variable\n",
-      "       975 tax_unit_count Tax units represented          1001  25064.255490       21717         aca_ptc\n",
-      "       976 tax_unit_count Tax units represented           101   9794.081624       21631         aca_ptc\n",
-      "       977 tax_unit_count Tax units represented           102  11597.544977       21632         aca_ptc\n",
-      "       978 tax_unit_count Tax units represented           103   9160.097959       21633         aca_ptc\n",
-      "       979 tax_unit_count Tax units represented           104   9786.728220       21634         aca_ptc\n",
-      "       980 tax_unit_count Tax units represented           105  18266.234326       21635         aca_ptc\n",
-      "       981 tax_unit_count Tax units represented           106  25397.026846       21636         aca_ptc\n",
-      "       982 tax_unit_count Tax units represented           107  11798.642968       21637         aca_ptc\n"
+      "      variable geographic_id        value\n",
+      "tax_unit_count          1001 25064.255490\n",
+      "tax_unit_count           101  9794.081624\n",
+      "tax_unit_count           102 11597.544977\n",
+      "tax_unit_count           103  9160.097959\n",
+      "tax_unit_count           104  9786.728220\n",
+      "tax_unit_count           105 18266.234326\n",
+      "tax_unit_count           106 25397.026846\n",
+      "tax_unit_count           107 11798.642968\n"
      ]
     }
    ],
@@ -455,18 +349,19 @@
     "for gid in [0, 2, 4]:\n",
     "    if gid >= len(group_info):\n",
     "        continue\n",
-    "    rows = tracer.get_group_rows(gid)\n",
+    "    mask = target_groups == gid\n",
+    "    rows = targets_df[mask][[\"variable\", \"geographic_id\", \"value\"]].head(8)\n",
     "    print(f\"\\n--- {group_info[gid]} ---\")\n",
-    "    print(rows.head(8).to_string(index=False))"
+    "    print(rows.to_string(index=False))"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## 6. Tracing a household\n",
+    "## 6. Tracing a household across clones\n",
     "\n",
-    "One CPS household appears in every CD block (once per CD = 436 column positions). But most of those columns are zero — the household only contributes where its characteristics match the target constraints."
+    "One CPS record appears once per clone (N_CLONES column positions). Each clone places it in a different census block/CD/state, so it contributes to different geographic targets depending on the clone."
    ]
   },
   {
@@ -478,9 +373,13 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Example SNAP-receiving household: 654\n",
+      "Example SNAP-receiving household: record index 23\n",
       "SNAP value: $70\n",
-      "Column positions across CDs: 436\n"
+      "\n",
+      "Column positions across 3 clones:\n",
+      "  col 23: TX (state=48, CD=4829) — 0 non-zero rows\n",
+      "  col 12022: IL (state=17, CD=1708) — 0 non-zero rows\n",
+      "  col 24021: FL (state=12, CD=1220) — 3 non-zero rows\n"
      ]
     }
    ],
@@ -488,12 +387,20 @@
     "snap_values = sim.calculate(\"snap\", map_to=\"household\").values\n",
     "hh_ids = sim.calculate(\"household_id\", map_to=\"household\").values\n",
     "positive_snap = hh_ids[snap_values > 0]\n",
-    "example_hh = int(positive_snap[0])\n",
-    "print(f\"Example SNAP-receiving household: {example_hh}\")\n",
-    "print(f\"SNAP value: ${snap_values[hh_ids == example_hh][0]:,.0f}\")\n",
-    "\n",
-    "positions = tracer.get_household_column_positions(example_hh)\n",
-    "print(f\"Column positions across CDs: {len(positions)}\")"
+    "example_hh_idx = int(np.where(snap_values > 0)[0][0])\n",
+    "print(f\"Example SNAP-receiving household: record index {example_hh_idx}\")\n",
+    "print(f\"SNAP value: ${snap_values[example_hh_idx]:,.0f}\")\n",
+    "\n",
+    "clone_cols = [c * n_records + example_hh_idx for c in range(N_CLONES)]\n",
+    "print(f\"\\nColumn positions across {N_CLONES} clones:\")\n",
+    "for col in clone_cols:\n",
+    "    state = geography.state_fips[col]\n",
+    "    cd = geography.cd_geoid[col]\n",
+    "    block = geography.block_geoid[col]\n",
+    "    col_vec = X_sparse[:, col]\n",
+    "    nnz = col_vec.nnz\n",
+    "    abbr = STATE_CODES.get(state, \"??\")\n",
+    "    print(f\"  col {col}: {abbr} (state={state}, CD={cd}) — {nnz} non-zero rows\")"
    ]
   },
   {
@@ -505,42 +412,30 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "CDs with non-zero entries: 160\n",
-      "CDs with all-zero columns: 276\n",
       "\n",
-      "Top 10 CDs by activity for household 654:\n",
-      "  CD 1001 (DE): 3 non-zero rows\n",
-      "  CD 1101 (DC): 3 non-zero rows\n",
-      "  CD 1201 (FL): 3 non-zero rows\n",
-      "  CD 1202 (FL): 3 non-zero rows\n",
-      "  CD 1203 (FL): 3 non-zero rows\n",
-      "  CD 1204 (FL): 3 non-zero rows\n",
-      "  CD 1205 (FL): 3 non-zero rows\n",
-      "  CD 1206 (FL): 3 non-zero rows\n",
-      "  CD 1207 (FL): 3 non-zero rows\n",
-      "  CD 1208 (FL): 3 non-zero rows\n"
+      "Clone 2 (col 24021, CD 1220):\n",
+      "  household_count (geo=12): 1.00\n",
+      "  snap (geo=12): 70.08\n",
+      "  household_count (geo=1220): 1.00\n"
      ]
     }
    ],
    "source": [
-    "cd_activity = []\n",
-    "for cd_geoid, col_pos in positions.items():\n",
-    "    col_vec = X_sparse[:, col_pos]\n",
-    "    nnz = col_vec.nnz\n",
-    "    cd_activity.append({\"cd_geoid\": cd_geoid, \"col_pos\": col_pos, \"nnz\": nnz})\n",
-    "\n",
-    "cd_df = pd.DataFrame(cd_activity)\n",
-    "n_active = (cd_df[\"nnz\"] > 0).sum()\n",
-    "n_zero = (cd_df[\"nnz\"] == 0).sum()\n",
-    "print(f\"CDs with non-zero entries: {n_active}\")\n",
-    "print(f\"CDs with all-zero columns: {n_zero}\")\n",
-    "\n",
-    "top10 = cd_df.nlargest(10, \"nnz\")\n",
-    "print(f\"\\nTop 10 CDs by activity for household {example_hh}:\")\n",
-    "for _, r in top10.iterrows():\n",
-    "    state_fips = int(r[\"cd_geoid\"]) // 100\n",
-    "    abbr = STATE_CODES.get(state_fips, \"??\")\n",
-    "    print(f\"  CD {r['cd_geoid']} ({abbr}): {r['nnz']} non-zero rows\")"
+    "for col in clone_cols:\n",
+    "    col_vec = X_sparse[:, col]\n",
+    "    nz_rows = col_vec.nonzero()[0]\n",
+    "    if len(nz_rows) == 0:\n",
+    "        continue\n",
+    "    clone_i = col // n_records\n",
+    "    print(f\"\\nClone {clone_i} (col {col}, CD {geography.cd_geoid[col]}):\")\n",
+    "    for r in nz_rows[:5]:\n",
+    "        row = targets_df.iloc[r]\n",
+    "        print(\n",
+    "            f\"  {row['variable']} (geo={row['geographic_id']}): \"\n",
+    "            f\"{X_sparse[r, col]:.2f}\"\n",
+    "        )\n",
+    "    if len(nz_rows) > 5:\n",
+    "        print(f\"  ... and {len(nz_rows) - 5} more\")"
    ]
   },
   {
@@ -559,10 +454,10 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Total cells: 7,381,736,804\n",
-      "Non-zero entries: 2,199,033\n",
-      "Density: 0.000298\n",
-      "Sparsity: 99.9702%\n"
+      "Total cells: 50,791,767\n",
+      "Non-zero entries: 14,946\n",
+      "Density: 0.000294\n",
+      "Sparsity: 99.9706%\n"
      ]
     }
    ],
@@ -577,52 +472,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Non-zeros per row:\n",
-      "  min:    0\n",
-      "  median: 0\n",
-      "  mean:   1,558\n",
-      "  max:    77,116\n",
-      "\n",
-      "By geographic level:\n",
-      "  National  : n=   1, median nnz=      0, range=[0, 0]\n",
-      "  State     : n= 102, median nnz= 10,423, range=[1,468, 77,116]\n",
-      "  District  : n=1308, median nnz=      0, range=[0, 1,988]\n"
-     ]
-    }
-   ],
-   "source": [
-    "nnz_per_row = np.diff(X_sparse.indptr)\n",
-    "print(f\"Non-zeros per row:\")\n",
-    "print(f\"  min:    {nnz_per_row.min():,}\")\n",
-    "print(f\"  median: {int(np.median(nnz_per_row)):,}\")\n",
-    "print(f\"  mean:   {nnz_per_row.mean():,.0f}\")\n",
-    "print(f\"  max:    {nnz_per_row.max():,}\")\n",
-    "\n",
-    "from policyengine_us_data.datasets.cps.local_area_calibration.calibration_utils import (\n",
-    "    _get_geo_level,\n",
-    ")\n",
-    "\n",
-    "geo_levels = targets_df[\"geographic_id\"].apply(_get_geo_level)\n",
-    "level_names = {0: \"National\", 1: \"State\", 2: \"District\"}\n",
-    "print(\"\\nBy geographic level:\")\n",
-    "for level in [0, 1, 2]:\n",
-    "    mask = (geo_levels == level).values\n",
-    "    if mask.any():\n",
-    "        vals = nnz_per_row[mask]\n",
-    "        print(\n",
-    "            f\"  {level_names[level]:10s}: \"\n",
-    "            f\"n={mask.sum():>4d}, \"\n",
-    "            f\"median nnz={int(np.median(vals)):>7,}, \"\n",
-    "            f\"range=[{vals.min():,}, {vals.max():,}]\"\n",
-    "        )"
-   ]
+   "outputs": [],
+   "source": "nnz_per_row = np.diff(X_sparse.indptr)\nprint(f\"Non-zeros per row:\")\nprint(f\"  min:    {nnz_per_row.min():,}\")\nprint(f\"  median: {int(np.median(nnz_per_row)):,}\")\nprint(f\"  mean:   {nnz_per_row.mean():,.0f}\")\nprint(f\"  max:    {nnz_per_row.max():,}\")\n\ngeo_levels = targets_df[\"geographic_id\"].apply(get_geo_level)\nlevel_names = {0: \"National\", 1: \"State\", 2: \"District\"}\nprint(\"\\nBy geographic level:\")\nfor level in [0, 1, 2]:\n    mask = (geo_levels == level).values\n    if mask.any():\n        vals = nnz_per_row[mask]\n        print(\n            f\"  {level_names[level]:10s}: \"\n            f\"n={mask.sum():>4d}, \"\n            f\"median nnz={int(np.median(vals)):>7,}, \"\n            f\"range=[{vals.min():,}, {vals.max():,}]\"\n        )"
   },
   {
    "cell_type": "code",
@@ -633,38 +486,39 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Non-zeros per CD block:\n",
-      "  min:    4,326 (CD 2801)\n",
-      "  median: 4,884\n",
-      "  max:    5,964 (CD 1101)\n"
+      "Non-zeros per clone block:\n",
+      " clone  nnz  unique_states\n",
+      "     0 4962             50\n",
+      "     1 4988             50\n",
+      "     2 4996             50\n"
      ]
     }
    ],
    "source": [
-    "n_hh = tracer.n_households\n",
-    "n_cds = tracer.n_geographies\n",
-    "cd_nnz = []\n",
-    "for cd_idx in range(n_cds):\n",
-    "    block = X_sparse[:, cd_idx * n_hh : (cd_idx + 1) * n_hh]\n",
-    "    cd_nnz.append({\n",
-    "        \"cd_geoid\": cds_to_calibrate[cd_idx],\n",
+    "clone_nnz = []\n",
+    "for ci in range(N_CLONES):\n",
+    "    block = X_sparse[:, ci * n_records : (ci + 1) * n_records]\n",
+    "    n_states = len(np.unique(geography.state_fips[ci * n_records : (ci + 1) * n_records]))\n",
+    "    clone_nnz.append({\n",
+    "        \"clone\": ci,\n",
     "        \"nnz\": block.nnz,\n",
+    "        \"unique_states\": n_states,\n",
     "    })\n",
     "\n",
-    "cd_nnz_df = pd.DataFrame(cd_nnz)\n",
-    "print(f\"Non-zeros per CD block:\")\n",
-    "print(f\"  min:    {cd_nnz_df['nnz'].min():,} (CD {cd_nnz_df.loc[cd_nnz_df['nnz'].idxmin(), 'cd_geoid']})\")\n",
-    "print(f\"  median: {int(cd_nnz_df['nnz'].median()):,}\")\n",
-    "print(f\"  max:    {cd_nnz_df['nnz'].max():,} (CD {cd_nnz_df.loc[cd_nnz_df['nnz'].idxmax(), 'cd_geoid']})\")"
+    "clone_df = pd.DataFrame(clone_nnz)\n",
+    "print(\"Non-zeros per clone block:\")\n",
+    "print(clone_df.to_string(index=False))"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## 8. Group exclusion\n",
+    "## 8. Dropping target groups\n",
+    "\n",
+    "Some target groups are redundant after hierarchical uprating. For example, state-level SNAP Household Count (Group 1) is redundant with district-level SNAP Household Count (Group 5) — the district targets were already reconciled to sum to the state totals.\n",
     "\n",
-    "`GROUPS_TO_EXCLUDE` removes redundant or harmful constraints before training. For example, state-level SNAP household counts (Group 1) are redundant with reconciled district rows (Group 4) and can confuse the optimizer. Group IDs depend on database contents, so always check `print_matrix_structure()` output first."
+    "Specify drops as `(variable_label, geo_level)` pairs. The labels come from the group descriptions above; the geo level is \"National\", \"State\", or \"District\"."
    ]
   },
   {
@@ -676,24 +530,27 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Before exclusion: 1411 rows\n",
-      "Excluding groups [1]: dropping 51 rows\n",
-      "After exclusion: 1360 rows\n"
+      "Matrix before: 1411 rows\n",
+      "  DROPPING Group 1: State SNAP Household Count (51 targets) (51 rows)\n",
+      "\n",
+      "  KEEPING  Group 0: National ACA PTC Person Count (1 target, value=19,743,689) (1 rows)\n",
+      "  KEEPING  Group 2: State Snap (51 targets) (51 rows)\n",
+      "  KEEPING  Group 3: District Aca Ptc (436 targets) (436 rows)\n",
+      "  KEEPING  Group 4: District ACA PTC Tax Unit Count (436 targets) (436 rows)\n",
+      "  KEEPING  Group 5: District SNAP Household Count (436 targets) (436 rows)\n",
+      "\n",
+      "Matrix after: 1360 rows\n"
      ]
     }
    ],
    "source": [
-    "GROUPS_TO_EXCLUDE = [1]\n",
-    "\n",
-    "print(f\"Before exclusion: {X_sparse.shape[0]} rows\")\n",
+    "GROUPS_TO_DROP = [\n",
+    "    (\"SNAP Household Count\", \"State\"),\n",
+    "]\n",
     "\n",
-    "keep_mask = ~np.isin(tracer.target_groups, GROUPS_TO_EXCLUDE)\n",
-    "n_dropped = (~keep_mask).sum()\n",
-    "print(f\"Excluding groups {GROUPS_TO_EXCLUDE}: dropping {n_dropped} rows\")\n",
-    "\n",
-    "X_filtered = X_sparse[keep_mask, :]\n",
-    "targets_filtered = targets_df[keep_mask].reset_index(drop=True)\n",
-    "print(f\"After exclusion: {X_filtered.shape[0]} rows\")"
+    "targets_filtered, X_filtered = drop_target_groups(\n",
+    "    targets_df, X_sparse, target_groups, group_info, GROUPS_TO_DROP\n",
+    ")"
    ]
   },
   {
@@ -756,883 +613,15 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Achievable targets: 487\n",
-      "Impossible targets: 873\n",
-      "\n",
-      "Impossible targets:\n",
-      "  aca_ptc/person_count (geo=US)\n",
-      "  aca_ptc/aca_ptc (geo=1001)\n",
-      "  aca_ptc/aca_ptc (geo=101)\n",
-      "  aca_ptc/aca_ptc (geo=102)\n",
-      "  aca_ptc/aca_ptc (geo=103)\n",
-      "  aca_ptc/aca_ptc (geo=104)\n",
-      "  aca_ptc/aca_ptc (geo=105)\n",
-      "  aca_ptc/aca_ptc (geo=106)\n",
-      "  aca_ptc/aca_ptc (geo=107)\n",
-      "  aca_ptc/aca_ptc (geo=1101)\n",
-      "  aca_ptc/aca_ptc (geo=1201)\n",
-      "  aca_ptc/aca_ptc (geo=1202)\n",
-      "  aca_ptc/aca_ptc (geo=1203)\n",
-      "  aca_ptc/aca_ptc (geo=1204)\n",
-      "  aca_ptc/aca_ptc (geo=1205)\n",
-      "  aca_ptc/aca_ptc (geo=1206)\n",
-      "  aca_ptc/aca_ptc (geo=1207)\n",
-      "  aca_ptc/aca_ptc (geo=1208)\n",
-      "  aca_ptc/aca_ptc (geo=1209)\n",
-      "  aca_ptc/aca_ptc (geo=1210)\n",
-      "  aca_ptc/aca_ptc (geo=1211)\n",
-      "  aca_ptc/aca_ptc (geo=1212)\n",
-      "  aca_ptc/aca_ptc (geo=1213)\n",
-      "  aca_ptc/aca_ptc (geo=1214)\n",
-      "  aca_ptc/aca_ptc (geo=1215)\n",
-      "  aca_ptc/aca_ptc (geo=1216)\n",
-      "  aca_ptc/aca_ptc (geo=1217)\n",
-      "  aca_ptc/aca_ptc (geo=1218)\n",
-      "  aca_ptc/aca_ptc (geo=1219)\n",
-      "  aca_ptc/aca_ptc (geo=1220)\n",
-      "  aca_ptc/aca_ptc (geo=1221)\n",
-      "  aca_ptc/aca_ptc (geo=1222)\n",
-      "  aca_ptc/aca_ptc (geo=1223)\n",
-      "  aca_ptc/aca_ptc (geo=1224)\n",
-      "  aca_ptc/aca_ptc (geo=1225)\n",
-      "  aca_ptc/aca_ptc (geo=1226)\n",
-      "  aca_ptc/aca_ptc (geo=1227)\n",
-      "  aca_ptc/aca_ptc (geo=1228)\n",
-      "  aca_ptc/aca_ptc (geo=1301)\n",
-      "  aca_ptc/aca_ptc (geo=1302)\n",
-      "  aca_ptc/aca_ptc (geo=1303)\n",
-      "  aca_ptc/aca_ptc (geo=1304)\n",
-      "  aca_ptc/aca_ptc (geo=1305)\n",
-      "  aca_ptc/aca_ptc (geo=1306)\n",
-      "  aca_ptc/aca_ptc (geo=1307)\n",
-      "  aca_ptc/aca_ptc (geo=1308)\n",
-      "  aca_ptc/aca_ptc (geo=1309)\n",
-      "  aca_ptc/aca_ptc (geo=1310)\n",
-      "  aca_ptc/aca_ptc (geo=1311)\n",
-      "  aca_ptc/aca_ptc (geo=1312)\n",
-      "  aca_ptc/aca_ptc (geo=1313)\n",
-      "  aca_ptc/aca_ptc (geo=1314)\n",
-      "  aca_ptc/aca_ptc (geo=1501)\n",
-      "  aca_ptc/aca_ptc (geo=1502)\n",
-      "  aca_ptc/aca_ptc (geo=1601)\n",
-      "  aca_ptc/aca_ptc (geo=1602)\n",
-      "  aca_ptc/aca_ptc (geo=1701)\n",
-      "  aca_ptc/aca_ptc (geo=1702)\n",
-      "  aca_ptc/aca_ptc (geo=1703)\n",
-      "  aca_ptc/aca_ptc (geo=1704)\n",
-      "  aca_ptc/aca_ptc (geo=1705)\n",
-      "  aca_ptc/aca_ptc (geo=1706)\n",
-      "  aca_ptc/aca_ptc (geo=1707)\n",
-      "  aca_ptc/aca_ptc (geo=1708)\n",
-      "  aca_ptc/aca_ptc (geo=1709)\n",
-      "  aca_ptc/aca_ptc (geo=1710)\n",
-      "  aca_ptc/aca_ptc (geo=1711)\n",
-      "  aca_ptc/aca_ptc (geo=1712)\n",
-      "  aca_ptc/aca_ptc (geo=1713)\n",
-      "  aca_ptc/aca_ptc (geo=1714)\n",
-      "  aca_ptc/aca_ptc (geo=1715)\n",
-      "  aca_ptc/aca_ptc (geo=1716)\n",
-      "  aca_ptc/aca_ptc (geo=1717)\n",
-      "  aca_ptc/aca_ptc (geo=1801)\n",
-      "  aca_ptc/aca_ptc (geo=1802)\n",
-      "  aca_ptc/aca_ptc (geo=1803)\n",
-      "  aca_ptc/aca_ptc (geo=1804)\n",
-      "  aca_ptc/aca_ptc (geo=1805)\n",
-      "  aca_ptc/aca_ptc (geo=1806)\n",
-      "  aca_ptc/aca_ptc (geo=1807)\n",
-      "  aca_ptc/aca_ptc (geo=1808)\n",
-      "  aca_ptc/aca_ptc (geo=1809)\n",
-      "  aca_ptc/aca_ptc (geo=1901)\n",
-      "  aca_ptc/aca_ptc (geo=1902)\n",
-      "  aca_ptc/aca_ptc (geo=1903)\n",
-      "  aca_ptc/aca_ptc (geo=1904)\n",
-      "  aca_ptc/aca_ptc (geo=2001)\n",
-      "  aca_ptc/aca_ptc (geo=2002)\n",
-      "  aca_ptc/aca_ptc (geo=2003)\n",
-      "  aca_ptc/aca_ptc (geo=2004)\n",
-      "  aca_ptc/aca_ptc (geo=201)\n",
-      "  aca_ptc/aca_ptc (geo=2101)\n",
-      "  aca_ptc/aca_ptc (geo=2102)\n",
-      "  aca_ptc/aca_ptc (geo=2103)\n",
-      "  aca_ptc/aca_ptc (geo=2104)\n",
-      "  aca_ptc/aca_ptc (geo=2105)\n",
-      "  aca_ptc/aca_ptc (geo=2106)\n",
-      "  aca_ptc/aca_ptc (geo=2201)\n",
-      "  aca_ptc/aca_ptc (geo=2202)\n",
-      "  aca_ptc/aca_ptc (geo=2203)\n",
-      "  aca_ptc/aca_ptc (geo=2204)\n",
-      "  aca_ptc/aca_ptc (geo=2205)\n",
-      "  aca_ptc/aca_ptc (geo=2206)\n",
-      "  aca_ptc/aca_ptc (geo=2301)\n",
-      "  aca_ptc/aca_ptc (geo=2302)\n",
-      "  aca_ptc/aca_ptc (geo=2401)\n",
-      "  aca_ptc/aca_ptc (geo=2402)\n",
-      "  aca_ptc/aca_ptc (geo=2403)\n",
-      "  aca_ptc/aca_ptc (geo=2404)\n",
-      "  aca_ptc/aca_ptc (geo=2405)\n",
-      "  aca_ptc/aca_ptc (geo=2406)\n",
-      "  aca_ptc/aca_ptc (geo=2407)\n",
-      "  aca_ptc/aca_ptc (geo=2408)\n",
-      "  aca_ptc/aca_ptc (geo=2501)\n",
-      "  aca_ptc/aca_ptc (geo=2502)\n",
-      "  aca_ptc/aca_ptc (geo=2503)\n",
-      "  aca_ptc/aca_ptc (geo=2504)\n",
-      "  aca_ptc/aca_ptc (geo=2505)\n",
-      "  aca_ptc/aca_ptc (geo=2506)\n",
-      "  aca_ptc/aca_ptc (geo=2507)\n",
-      "  aca_ptc/aca_ptc (geo=2508)\n",
-      "  aca_ptc/aca_ptc (geo=2509)\n",
-      "  aca_ptc/aca_ptc (geo=2601)\n",
-      "  aca_ptc/aca_ptc (geo=2602)\n",
-      "  aca_ptc/aca_ptc (geo=2603)\n",
-      "  aca_ptc/aca_ptc (geo=2604)\n",
-      "  aca_ptc/aca_ptc (geo=2605)\n",
-      "  aca_ptc/aca_ptc (geo=2606)\n",
-      "  aca_ptc/aca_ptc (geo=2607)\n",
-      "  aca_ptc/aca_ptc (geo=2608)\n",
-      "  aca_ptc/aca_ptc (geo=2609)\n",
-      "  aca_ptc/aca_ptc (geo=2610)\n",
-      "  aca_ptc/aca_ptc (geo=2611)\n",
-      "  aca_ptc/aca_ptc (geo=2612)\n",
-      "  aca_ptc/aca_ptc (geo=2613)\n",
-      "  aca_ptc/aca_ptc (geo=2701)\n",
-      "  aca_ptc/aca_ptc (geo=2702)\n",
-      "  aca_ptc/aca_ptc (geo=2703)\n",
-      "  aca_ptc/aca_ptc (geo=2704)\n",
-      "  aca_ptc/aca_ptc (geo=2705)\n",
-      "  aca_ptc/aca_ptc (geo=2706)\n",
-      "  aca_ptc/aca_ptc (geo=2707)\n",
-      "  aca_ptc/aca_ptc (geo=2708)\n",
-      "  aca_ptc/aca_ptc (geo=2801)\n",
-      "  aca_ptc/aca_ptc (geo=2802)\n",
-      "  aca_ptc/aca_ptc (geo=2803)\n",
-      "  aca_ptc/aca_ptc (geo=2804)\n",
-      "  aca_ptc/aca_ptc (geo=2901)\n",
-      "  aca_ptc/aca_ptc (geo=2902)\n",
-      "  aca_ptc/aca_ptc (geo=2903)\n",
-      "  aca_ptc/aca_ptc (geo=2904)\n",
-      "  aca_ptc/aca_ptc (geo=2905)\n",
-      "  aca_ptc/aca_ptc (geo=2906)\n",
-      "  aca_ptc/aca_ptc (geo=2907)\n",
-      "  aca_ptc/aca_ptc (geo=2908)\n",
-      "  aca_ptc/aca_ptc (geo=3001)\n",
-      "  aca_ptc/aca_ptc (geo=3002)\n",
-      "  aca_ptc/aca_ptc (geo=3101)\n",
-      "  aca_ptc/aca_ptc (geo=3102)\n",
-      "  aca_ptc/aca_ptc (geo=3103)\n",
-      "  aca_ptc/aca_ptc (geo=3201)\n",
-      "  aca_ptc/aca_ptc (geo=3202)\n",
-      "  aca_ptc/aca_ptc (geo=3203)\n",
-      "  aca_ptc/aca_ptc (geo=3204)\n",
-      "  aca_ptc/aca_ptc (geo=3301)\n",
-      "  aca_ptc/aca_ptc (geo=3302)\n",
-      "  aca_ptc/aca_ptc (geo=3401)\n",
-      "  aca_ptc/aca_ptc (geo=3402)\n",
-      "  aca_ptc/aca_ptc (geo=3403)\n",
-      "  aca_ptc/aca_ptc (geo=3404)\n",
-      "  aca_ptc/aca_ptc (geo=3405)\n",
-      "  aca_ptc/aca_ptc (geo=3406)\n",
-      "  aca_ptc/aca_ptc (geo=3407)\n",
-      "  aca_ptc/aca_ptc (geo=3408)\n",
-      "  aca_ptc/aca_ptc (geo=3409)\n",
-      "  aca_ptc/aca_ptc (geo=3410)\n",
-      "  aca_ptc/aca_ptc (geo=3411)\n",
-      "  aca_ptc/aca_ptc (geo=3412)\n",
-      "  aca_ptc/aca_ptc (geo=3501)\n",
-      "  aca_ptc/aca_ptc (geo=3502)\n",
-      "  aca_ptc/aca_ptc (geo=3503)\n",
-      "  aca_ptc/aca_ptc (geo=3601)\n",
-      "  aca_ptc/aca_ptc (geo=3602)\n",
-      "  aca_ptc/aca_ptc (geo=3603)\n",
-      "  aca_ptc/aca_ptc (geo=3604)\n",
-      "  aca_ptc/aca_ptc (geo=3605)\n",
-      "  aca_ptc/aca_ptc (geo=3606)\n",
-      "  aca_ptc/aca_ptc (geo=3607)\n",
-      "  aca_ptc/aca_ptc (geo=3608)\n",
-      "  aca_ptc/aca_ptc (geo=3609)\n",
-      "  aca_ptc/aca_ptc (geo=3610)\n",
-      "  aca_ptc/aca_ptc (geo=3611)\n",
-      "  aca_ptc/aca_ptc (geo=3612)\n",
-      "  aca_ptc/aca_ptc (geo=3613)\n",
-      "  aca_ptc/aca_ptc (geo=3614)\n",
-      "  aca_ptc/aca_ptc (geo=3615)\n",
-      "  aca_ptc/aca_ptc (geo=3616)\n",
-      "  aca_ptc/aca_ptc (geo=3617)\n",
-      "  aca_ptc/aca_ptc (geo=3618)\n",
-      "  aca_ptc/aca_ptc (geo=3619)\n",
-      "  aca_ptc/aca_ptc (geo=3620)\n",
-      "  aca_ptc/aca_ptc (geo=3621)\n",
-      "  aca_ptc/aca_ptc (geo=3622)\n",
-      "  aca_ptc/aca_ptc (geo=3623)\n",
-      "  aca_ptc/aca_ptc (geo=3624)\n",
-      "  aca_ptc/aca_ptc (geo=3625)\n",
-      "  aca_ptc/aca_ptc (geo=3626)\n",
-      "  aca_ptc/aca_ptc (geo=3701)\n",
-      "  aca_ptc/aca_ptc (geo=3702)\n",
-      "  aca_ptc/aca_ptc (geo=3703)\n",
-      "  aca_ptc/aca_ptc (geo=3704)\n",
-      "  aca_ptc/aca_ptc (geo=3705)\n",
-      "  aca_ptc/aca_ptc (geo=3706)\n",
-      "  aca_ptc/aca_ptc (geo=3707)\n",
-      "  aca_ptc/aca_ptc (geo=3708)\n",
-      "  aca_ptc/aca_ptc (geo=3709)\n",
-      "  aca_ptc/aca_ptc (geo=3710)\n",
-      "  aca_ptc/aca_ptc (geo=3711)\n",
-      "  aca_ptc/aca_ptc (geo=3712)\n",
-      "  aca_ptc/aca_ptc (geo=3713)\n",
-      "  aca_ptc/aca_ptc (geo=3714)\n",
-      "  aca_ptc/aca_ptc (geo=3801)\n",
-      "  aca_ptc/aca_ptc (geo=3901)\n",
-      "  aca_ptc/aca_ptc (geo=3902)\n",
-      "  aca_ptc/aca_ptc (geo=3903)\n",
-      "  aca_ptc/aca_ptc (geo=3904)\n",
-      "  aca_ptc/aca_ptc (geo=3905)\n",
-      "  aca_ptc/aca_ptc (geo=3906)\n",
-      "  aca_ptc/aca_ptc (geo=3907)\n",
-      "  aca_ptc/aca_ptc (geo=3908)\n",
-      "  aca_ptc/aca_ptc (geo=3909)\n",
-      "  aca_ptc/aca_ptc (geo=3910)\n",
-      "  aca_ptc/aca_ptc (geo=3911)\n",
-      "  aca_ptc/aca_ptc (geo=3912)\n",
-      "  aca_ptc/aca_ptc (geo=3913)\n",
-      "  aca_ptc/aca_ptc (geo=3914)\n",
-      "  aca_ptc/aca_ptc (geo=3915)\n",
-      "  aca_ptc/aca_ptc (geo=4001)\n",
-      "  aca_ptc/aca_ptc (geo=4002)\n",
-      "  aca_ptc/aca_ptc (geo=4003)\n",
-      "  aca_ptc/aca_ptc (geo=4004)\n",
-      "  aca_ptc/aca_ptc (geo=4005)\n",
-      "  aca_ptc/aca_ptc (geo=401)\n",
-      "  aca_ptc/aca_ptc (geo=402)\n",
-      "  aca_ptc/aca_ptc (geo=403)\n",
-      "  aca_ptc/aca_ptc (geo=404)\n",
-      "  aca_ptc/aca_ptc (geo=405)\n",
-      "  aca_ptc/aca_ptc (geo=406)\n",
-      "  aca_ptc/aca_ptc (geo=407)\n",
-      "  aca_ptc/aca_ptc (geo=408)\n",
-      "  aca_ptc/aca_ptc (geo=409)\n",
-      "  aca_ptc/aca_ptc (geo=4101)\n",
-      "  aca_ptc/aca_ptc (geo=4102)\n",
-      "  aca_ptc/aca_ptc (geo=4103)\n",
-      "  aca_ptc/aca_ptc (geo=4104)\n",
-      "  aca_ptc/aca_ptc (geo=4105)\n",
-      "  aca_ptc/aca_ptc (geo=4106)\n",
-      "  aca_ptc/aca_ptc (geo=4201)\n",
-      "  aca_ptc/aca_ptc (geo=4202)\n",
-      "  aca_ptc/aca_ptc (geo=4203)\n",
-      "  aca_ptc/aca_ptc (geo=4204)\n",
-      "  aca_ptc/aca_ptc (geo=4205)\n",
-      "  aca_ptc/aca_ptc (geo=4206)\n",
-      "  aca_ptc/aca_ptc (geo=4207)\n",
-      "  aca_ptc/aca_ptc (geo=4208)\n",
-      "  aca_ptc/aca_ptc (geo=4209)\n",
-      "  aca_ptc/aca_ptc (geo=4210)\n",
-      "  aca_ptc/aca_ptc (geo=4211)\n",
-      "  aca_ptc/aca_ptc (geo=4212)\n",
-      "  aca_ptc/aca_ptc (geo=4213)\n",
-      "  aca_ptc/aca_ptc (geo=4214)\n",
-      "  aca_ptc/aca_ptc (geo=4215)\n",
-      "  aca_ptc/aca_ptc (geo=4216)\n",
-      "  aca_ptc/aca_ptc (geo=4217)\n",
-      "  aca_ptc/aca_ptc (geo=4401)\n",
-      "  aca_ptc/aca_ptc (geo=4402)\n",
-      "  aca_ptc/aca_ptc (geo=4501)\n",
-      "  aca_ptc/aca_ptc (geo=4502)\n",
-      "  aca_ptc/aca_ptc (geo=4503)\n",
-      "  aca_ptc/aca_ptc (geo=4504)\n",
-      "  aca_ptc/aca_ptc (geo=4505)\n",
-      "  aca_ptc/aca_ptc (geo=4506)\n",
-      "  aca_ptc/aca_ptc (geo=4507)\n",
-      "  aca_ptc/aca_ptc (geo=4601)\n",
-      "  aca_ptc/aca_ptc (geo=4701)\n",
-      "  aca_ptc/aca_ptc (geo=4702)\n",
-      "  aca_ptc/aca_ptc (geo=4703)\n",
-      "  aca_ptc/aca_ptc (geo=4704)\n",
-      "  aca_ptc/aca_ptc (geo=4705)\n",
-      "  aca_ptc/aca_ptc (geo=4706)\n",
-      "  aca_ptc/aca_ptc (geo=4707)\n",
-      "  aca_ptc/aca_ptc (geo=4708)\n",
-      "  aca_ptc/aca_ptc (geo=4709)\n",
-      "  aca_ptc/aca_ptc (geo=4801)\n",
-      "  aca_ptc/aca_ptc (geo=4802)\n",
-      "  aca_ptc/aca_ptc (geo=4803)\n",
-      "  aca_ptc/aca_ptc (geo=4804)\n",
-      "  aca_ptc/aca_ptc (geo=4805)\n",
-      "  aca_ptc/aca_ptc (geo=4806)\n",
-      "  aca_ptc/aca_ptc (geo=4807)\n",
-      "  aca_ptc/aca_ptc (geo=4808)\n",
-      "  aca_ptc/aca_ptc (geo=4809)\n",
-      "  aca_ptc/aca_ptc (geo=4810)\n",
-      "  aca_ptc/aca_ptc (geo=4811)\n",
-      "  aca_ptc/aca_ptc (geo=4812)\n",
-      "  aca_ptc/aca_ptc (geo=4813)\n",
-      "  aca_ptc/aca_ptc (geo=4814)\n",
-      "  aca_ptc/aca_ptc (geo=4815)\n",
-      "  aca_ptc/aca_ptc (geo=4816)\n",
-      "  aca_ptc/aca_ptc (geo=4817)\n",
-      "  aca_ptc/aca_ptc (geo=4818)\n",
-      "  aca_ptc/aca_ptc (geo=4819)\n",
-      "  aca_ptc/aca_ptc (geo=4820)\n",
-      "  aca_ptc/aca_ptc (geo=4821)\n",
-      "  aca_ptc/aca_ptc (geo=4822)\n",
-      "  aca_ptc/aca_ptc (geo=4823)\n",
-      "  aca_ptc/aca_ptc (geo=4824)\n",
-      "  aca_ptc/aca_ptc (geo=4825)\n",
-      "  aca_ptc/aca_ptc (geo=4826)\n",
-      "  aca_ptc/aca_ptc (geo=4827)\n",
-      "  aca_ptc/aca_ptc (geo=4828)\n",
-      "  aca_ptc/aca_ptc (geo=4829)\n",
-      "  aca_ptc/aca_ptc (geo=4830)\n",
-      "  aca_ptc/aca_ptc (geo=4831)\n",
-      "  aca_ptc/aca_ptc (geo=4832)\n",
-      "  aca_ptc/aca_ptc (geo=4833)\n",
-      "  aca_ptc/aca_ptc (geo=4834)\n",
-      "  aca_ptc/aca_ptc (geo=4835)\n",
-      "  aca_ptc/aca_ptc (geo=4836)\n",
-      "  aca_ptc/aca_ptc (geo=4837)\n",
-      "  aca_ptc/aca_ptc (geo=4838)\n",
-      "  aca_ptc/aca_ptc (geo=4901)\n",
-      "  aca_ptc/aca_ptc (geo=4902)\n",
-      "  aca_ptc/aca_ptc (geo=4903)\n",
-      "  aca_ptc/aca_ptc (geo=4904)\n",
-      "  aca_ptc/aca_ptc (geo=5001)\n",
-      "  aca_ptc/aca_ptc (geo=501)\n",
-      "  aca_ptc/aca_ptc (geo=502)\n",
-      "  aca_ptc/aca_ptc (geo=503)\n",
-      "  aca_ptc/aca_ptc (geo=504)\n",
-      "  aca_ptc/aca_ptc (geo=5101)\n",
-      "  aca_ptc/aca_ptc (geo=5102)\n",
-      "  aca_ptc/aca_ptc (geo=5103)\n",
-      "  aca_ptc/aca_ptc (geo=5104)\n",
-      "  aca_ptc/aca_ptc (geo=5105)\n",
-      "  aca_ptc/aca_ptc (geo=5106)\n",
-      "  aca_ptc/aca_ptc (geo=5107)\n",
-      "  aca_ptc/aca_ptc (geo=5108)\n",
-      "  aca_ptc/aca_ptc (geo=5109)\n",
-      "  aca_ptc/aca_ptc (geo=5110)\n",
-      "  aca_ptc/aca_ptc (geo=5111)\n",
-      "  aca_ptc/aca_ptc (geo=5301)\n",
-      "  aca_ptc/aca_ptc (geo=5302)\n",
-      "  aca_ptc/aca_ptc (geo=5303)\n",
-      "  aca_ptc/aca_ptc (geo=5304)\n",
-      "  aca_ptc/aca_ptc (geo=5305)\n",
-      "  aca_ptc/aca_ptc (geo=5306)\n",
-      "  aca_ptc/aca_ptc (geo=5307)\n",
-      "  aca_ptc/aca_ptc (geo=5308)\n",
-      "  aca_ptc/aca_ptc (geo=5309)\n",
-      "  aca_ptc/aca_ptc (geo=5310)\n",
-      "  aca_ptc/aca_ptc (geo=5401)\n",
-      "  aca_ptc/aca_ptc (geo=5402)\n",
-      "  aca_ptc/aca_ptc (geo=5501)\n",
-      "  aca_ptc/aca_ptc (geo=5502)\n",
-      "  aca_ptc/aca_ptc (geo=5503)\n",
-      "  aca_ptc/aca_ptc (geo=5504)\n",
-      "  aca_ptc/aca_ptc (geo=5505)\n",
-      "  aca_ptc/aca_ptc (geo=5506)\n",
-      "  aca_ptc/aca_ptc (geo=5507)\n",
-      "  aca_ptc/aca_ptc (geo=5508)\n",
-      "  aca_ptc/aca_ptc (geo=5601)\n",
-      "  aca_ptc/aca_ptc (geo=601)\n",
-      "  aca_ptc/aca_ptc (geo=602)\n",
-      "  aca_ptc/aca_ptc (geo=603)\n",
-      "  aca_ptc/aca_ptc (geo=604)\n",
-      "  aca_ptc/aca_ptc (geo=605)\n",
-      "  aca_ptc/aca_ptc (geo=606)\n",
-      "  aca_ptc/aca_ptc (geo=607)\n",
-      "  aca_ptc/aca_ptc (geo=608)\n",
-      "  aca_ptc/aca_ptc (geo=609)\n",
-      "  aca_ptc/aca_ptc (geo=610)\n",
-      "  aca_ptc/aca_ptc (geo=611)\n",
-      "  aca_ptc/aca_ptc (geo=612)\n",
-      "  aca_ptc/aca_ptc (geo=613)\n",
-      "  aca_ptc/aca_ptc (geo=614)\n",
-      "  aca_ptc/aca_ptc (geo=615)\n",
-      "  aca_ptc/aca_ptc (geo=616)\n",
-      "  aca_ptc/aca_ptc (geo=617)\n",
-      "  aca_ptc/aca_ptc (geo=618)\n",
-      "  aca_ptc/aca_ptc (geo=619)\n",
-      "  aca_ptc/aca_ptc (geo=620)\n",
-      "  aca_ptc/aca_ptc (geo=621)\n",
-      "  aca_ptc/aca_ptc (geo=622)\n",
-      "  aca_ptc/aca_ptc (geo=623)\n",
-      "  aca_ptc/aca_ptc (geo=624)\n",
-      "  aca_ptc/aca_ptc (geo=625)\n",
-      "  aca_ptc/aca_ptc (geo=626)\n",
-      "  aca_ptc/aca_ptc (geo=627)\n",
-      "  aca_ptc/aca_ptc (geo=628)\n",
-      "  aca_ptc/aca_ptc (geo=629)\n",
-      "  aca_ptc/aca_ptc (geo=630)\n",
-      "  aca_ptc/aca_ptc (geo=631)\n",
-      "  aca_ptc/aca_ptc (geo=632)\n",
-      "  aca_ptc/aca_ptc (geo=633)\n",
-      "  aca_ptc/aca_ptc (geo=634)\n",
-      "  aca_ptc/aca_ptc (geo=635)\n",
-      "  aca_ptc/aca_ptc (geo=636)\n",
-      "  aca_ptc/aca_ptc (geo=637)\n",
-      "  aca_ptc/aca_ptc (geo=638)\n",
-      "  aca_ptc/aca_ptc (geo=639)\n",
-      "  aca_ptc/aca_ptc (geo=640)\n",
-      "  aca_ptc/aca_ptc (geo=641)\n",
-      "  aca_ptc/aca_ptc (geo=642)\n",
-      "  aca_ptc/aca_ptc (geo=643)\n",
-      "  aca_ptc/aca_ptc (geo=644)\n",
-      "  aca_ptc/aca_ptc (geo=645)\n",
-      "  aca_ptc/aca_ptc (geo=646)\n",
-      "  aca_ptc/aca_ptc (geo=647)\n",
-      "  aca_ptc/aca_ptc (geo=648)\n",
-      "  aca_ptc/aca_ptc (geo=649)\n",
-      "  aca_ptc/aca_ptc (geo=650)\n",
-      "  aca_ptc/aca_ptc (geo=651)\n",
-      "  aca_ptc/aca_ptc (geo=652)\n",
-      "  aca_ptc/aca_ptc (geo=801)\n",
-      "  aca_ptc/aca_ptc (geo=802)\n",
-      "  aca_ptc/aca_ptc (geo=803)\n",
-      "  aca_ptc/aca_ptc (geo=804)\n",
-      "  aca_ptc/aca_ptc (geo=805)\n",
-      "  aca_ptc/aca_ptc (geo=806)\n",
-      "  aca_ptc/aca_ptc (geo=807)\n",
-      "  aca_ptc/aca_ptc (geo=808)\n",
-      "  aca_ptc/aca_ptc (geo=901)\n",
-      "  aca_ptc/aca_ptc (geo=902)\n",
-      "  aca_ptc/aca_ptc (geo=903)\n",
-      "  aca_ptc/aca_ptc (geo=904)\n",
-      "  aca_ptc/aca_ptc (geo=905)\n",
-      "  aca_ptc/tax_unit_count (geo=1001)\n",
-      "  aca_ptc/tax_unit_count (geo=101)\n",
-      "  aca_ptc/tax_unit_count (geo=102)\n",
-      "  aca_ptc/tax_unit_count (geo=103)\n",
-      "  aca_ptc/tax_unit_count (geo=104)\n",
-      "  aca_ptc/tax_unit_count (geo=105)\n",
-      "  aca_ptc/tax_unit_count (geo=106)\n",
-      "  aca_ptc/tax_unit_count (geo=107)\n",
-      "  aca_ptc/tax_unit_count (geo=1101)\n",
-      "  aca_ptc/tax_unit_count (geo=1201)\n",
-      "  aca_ptc/tax_unit_count (geo=1202)\n",
-      "  aca_ptc/tax_unit_count (geo=1203)\n",
-      "  aca_ptc/tax_unit_count (geo=1204)\n",
-      "  aca_ptc/tax_unit_count (geo=1205)\n",
-      "  aca_ptc/tax_unit_count (geo=1206)\n",
-      "  aca_ptc/tax_unit_count (geo=1207)\n",
-      "  aca_ptc/tax_unit_count (geo=1208)\n",
-      "  aca_ptc/tax_unit_count (geo=1209)\n",
-      "  aca_ptc/tax_unit_count (geo=1210)\n",
-      "  aca_ptc/tax_unit_count (geo=1211)\n",
-      "  aca_ptc/tax_unit_count (geo=1212)\n",
-      "  aca_ptc/tax_unit_count (geo=1213)\n",
-      "  aca_ptc/tax_unit_count (geo=1214)\n",
-      "  aca_ptc/tax_unit_count (geo=1215)\n",
-      "  aca_ptc/tax_unit_count (geo=1216)\n",
-      "  aca_ptc/tax_unit_count (geo=1217)\n",
-      "  aca_ptc/tax_unit_count (geo=1218)\n",
-      "  aca_ptc/tax_unit_count (geo=1219)\n",
-      "  aca_ptc/tax_unit_count (geo=1220)\n",
-      "  aca_ptc/tax_unit_count (geo=1221)\n",
-      "  aca_ptc/tax_unit_count (geo=1222)\n",
-      "  aca_ptc/tax_unit_count (geo=1223)\n",
-      "  aca_ptc/tax_unit_count (geo=1224)\n",
-      "  aca_ptc/tax_unit_count (geo=1225)\n",
-      "  aca_ptc/tax_unit_count (geo=1226)\n",
-      "  aca_ptc/tax_unit_count (geo=1227)\n",
-      "  aca_ptc/tax_unit_count (geo=1228)\n",
-      "  aca_ptc/tax_unit_count (geo=1301)\n",
-      "  aca_ptc/tax_unit_count (geo=1302)\n",
-      "  aca_ptc/tax_unit_count (geo=1303)\n",
-      "  aca_ptc/tax_unit_count (geo=1304)\n",
-      "  aca_ptc/tax_unit_count (geo=1305)\n",
-      "  aca_ptc/tax_unit_count (geo=1306)\n",
-      "  aca_ptc/tax_unit_count (geo=1307)\n",
-      "  aca_ptc/tax_unit_count (geo=1308)\n",
-      "  aca_ptc/tax_unit_count (geo=1309)\n",
-      "  aca_ptc/tax_unit_count (geo=1310)\n",
-      "  aca_ptc/tax_unit_count (geo=1311)\n",
-      "  aca_ptc/tax_unit_count (geo=1312)\n",
-      "  aca_ptc/tax_unit_count (geo=1313)\n",
-      "  aca_ptc/tax_unit_count (geo=1314)\n",
-      "  aca_ptc/tax_unit_count (geo=1501)\n",
-      "  aca_ptc/tax_unit_count (geo=1502)\n",
-      "  aca_ptc/tax_unit_count (geo=1601)\n",
-      "  aca_ptc/tax_unit_count (geo=1602)\n",
-      "  aca_ptc/tax_unit_count (geo=1701)\n",
-      "  aca_ptc/tax_unit_count (geo=1702)\n",
-      "  aca_ptc/tax_unit_count (geo=1703)\n",
-      "  aca_ptc/tax_unit_count (geo=1704)\n",
-      "  aca_ptc/tax_unit_count (geo=1705)\n",
-      "  aca_ptc/tax_unit_count (geo=1706)\n",
-      "  aca_ptc/tax_unit_count (geo=1707)\n",
-      "  aca_ptc/tax_unit_count (geo=1708)\n",
-      "  aca_ptc/tax_unit_count (geo=1709)\n",
-      "  aca_ptc/tax_unit_count (geo=1710)\n",
-      "  aca_ptc/tax_unit_count (geo=1711)\n",
-      "  aca_ptc/tax_unit_count (geo=1712)\n",
-      "  aca_ptc/tax_unit_count (geo=1713)\n",
-      "  aca_ptc/tax_unit_count (geo=1714)\n",
-      "  aca_ptc/tax_unit_count (geo=1715)\n",
-      "  aca_ptc/tax_unit_count (geo=1716)\n",
-      "  aca_ptc/tax_unit_count (geo=1717)\n",
-      "  aca_ptc/tax_unit_count (geo=1801)\n",
-      "  aca_ptc/tax_unit_count (geo=1802)\n",
-      "  aca_ptc/tax_unit_count (geo=1803)\n",
-      "  aca_ptc/tax_unit_count (geo=1804)\n",
-      "  aca_ptc/tax_unit_count (geo=1805)\n",
-      "  aca_ptc/tax_unit_count (geo=1806)\n",
-      "  aca_ptc/tax_unit_count (geo=1807)\n",
-      "  aca_ptc/tax_unit_count (geo=1808)\n",
-      "  aca_ptc/tax_unit_count (geo=1809)\n",
-      "  aca_ptc/tax_unit_count (geo=1901)\n",
-      "  aca_ptc/tax_unit_count (geo=1902)\n",
-      "  aca_ptc/tax_unit_count (geo=1903)\n",
-      "  aca_ptc/tax_unit_count (geo=1904)\n",
-      "  aca_ptc/tax_unit_count (geo=2001)\n",
-      "  aca_ptc/tax_unit_count (geo=2002)\n",
-      "  aca_ptc/tax_unit_count (geo=2003)\n",
-      "  aca_ptc/tax_unit_count (geo=2004)\n",
-      "  aca_ptc/tax_unit_count (geo=201)\n",
-      "  aca_ptc/tax_unit_count (geo=2101)\n",
-      "  aca_ptc/tax_unit_count (geo=2102)\n",
-      "  aca_ptc/tax_unit_count (geo=2103)\n",
-      "  aca_ptc/tax_unit_count (geo=2104)\n",
-      "  aca_ptc/tax_unit_count (geo=2105)\n",
-      "  aca_ptc/tax_unit_count (geo=2106)\n",
-      "  aca_ptc/tax_unit_count (geo=2201)\n",
-      "  aca_ptc/tax_unit_count (geo=2202)\n",
-      "  aca_ptc/tax_unit_count (geo=2203)\n",
-      "  aca_ptc/tax_unit_count (geo=2204)\n",
-      "  aca_ptc/tax_unit_count (geo=2205)\n",
-      "  aca_ptc/tax_unit_count (geo=2206)\n",
-      "  aca_ptc/tax_unit_count (geo=2301)\n",
-      "  aca_ptc/tax_unit_count (geo=2302)\n",
-      "  aca_ptc/tax_unit_count (geo=2401)\n",
-      "  aca_ptc/tax_unit_count (geo=2402)\n",
-      "  aca_ptc/tax_unit_count (geo=2403)\n",
-      "  aca_ptc/tax_unit_count (geo=2404)\n",
-      "  aca_ptc/tax_unit_count (geo=2405)\n",
-      "  aca_ptc/tax_unit_count (geo=2406)\n",
-      "  aca_ptc/tax_unit_count (geo=2407)\n",
-      "  aca_ptc/tax_unit_count (geo=2408)\n",
-      "  aca_ptc/tax_unit_count (geo=2501)\n",
-      "  aca_ptc/tax_unit_count (geo=2502)\n",
-      "  aca_ptc/tax_unit_count (geo=2503)\n",
-      "  aca_ptc/tax_unit_count (geo=2504)\n",
-      "  aca_ptc/tax_unit_count (geo=2505)\n",
-      "  aca_ptc/tax_unit_count (geo=2506)\n",
-      "  aca_ptc/tax_unit_count (geo=2507)\n",
-      "  aca_ptc/tax_unit_count (geo=2508)\n",
-      "  aca_ptc/tax_unit_count (geo=2509)\n",
-      "  aca_ptc/tax_unit_count (geo=2601)\n",
-      "  aca_ptc/tax_unit_count (geo=2602)\n",
-      "  aca_ptc/tax_unit_count (geo=2603)\n",
-      "  aca_ptc/tax_unit_count (geo=2604)\n",
-      "  aca_ptc/tax_unit_count (geo=2605)\n",
-      "  aca_ptc/tax_unit_count (geo=2606)\n",
-      "  aca_ptc/tax_unit_count (geo=2607)\n",
-      "  aca_ptc/tax_unit_count (geo=2608)\n",
-      "  aca_ptc/tax_unit_count (geo=2609)\n",
-      "  aca_ptc/tax_unit_count (geo=2610)\n",
-      "  aca_ptc/tax_unit_count (geo=2611)\n",
-      "  aca_ptc/tax_unit_count (geo=2612)\n",
-      "  aca_ptc/tax_unit_count (geo=2613)\n",
-      "  aca_ptc/tax_unit_count (geo=2701)\n",
-      "  aca_ptc/tax_unit_count (geo=2702)\n",
-      "  aca_ptc/tax_unit_count (geo=2703)\n",
-      "  aca_ptc/tax_unit_count (geo=2704)\n",
-      "  aca_ptc/tax_unit_count (geo=2705)\n",
-      "  aca_ptc/tax_unit_count (geo=2706)\n",
-      "  aca_ptc/tax_unit_count (geo=2707)\n",
-      "  aca_ptc/tax_unit_count (geo=2708)\n",
-      "  aca_ptc/tax_unit_count (geo=2801)\n",
-      "  aca_ptc/tax_unit_count (geo=2802)\n",
-      "  aca_ptc/tax_unit_count (geo=2803)\n",
-      "  aca_ptc/tax_unit_count (geo=2804)\n",
-      "  aca_ptc/tax_unit_count (geo=2901)\n",
-      "  aca_ptc/tax_unit_count (geo=2902)\n",
-      "  aca_ptc/tax_unit_count (geo=2903)\n",
-      "  aca_ptc/tax_unit_count (geo=2904)\n",
-      "  aca_ptc/tax_unit_count (geo=2905)\n",
-      "  aca_ptc/tax_unit_count (geo=2906)\n",
-      "  aca_ptc/tax_unit_count (geo=2907)\n",
-      "  aca_ptc/tax_unit_count (geo=2908)\n",
-      "  aca_ptc/tax_unit_count (geo=3001)\n",
-      "  aca_ptc/tax_unit_count (geo=3002)\n",
-      "  aca_ptc/tax_unit_count (geo=3101)\n",
-      "  aca_ptc/tax_unit_count (geo=3102)\n",
-      "  aca_ptc/tax_unit_count (geo=3103)\n",
-      "  aca_ptc/tax_unit_count (geo=3201)\n",
-      "  aca_ptc/tax_unit_count (geo=3202)\n",
-      "  aca_ptc/tax_unit_count (geo=3203)\n",
-      "  aca_ptc/tax_unit_count (geo=3204)\n",
-      "  aca_ptc/tax_unit_count (geo=3301)\n",
-      "  aca_ptc/tax_unit_count (geo=3302)\n",
-      "  aca_ptc/tax_unit_count (geo=3401)\n",
-      "  aca_ptc/tax_unit_count (geo=3402)\n",
-      "  aca_ptc/tax_unit_count (geo=3403)\n",
-      "  aca_ptc/tax_unit_count (geo=3404)\n",
-      "  aca_ptc/tax_unit_count (geo=3405)\n",
-      "  aca_ptc/tax_unit_count (geo=3406)\n",
-      "  aca_ptc/tax_unit_count (geo=3407)\n",
-      "  aca_ptc/tax_unit_count (geo=3408)\n",
-      "  aca_ptc/tax_unit_count (geo=3409)\n",
-      "  aca_ptc/tax_unit_count (geo=3410)\n",
-      "  aca_ptc/tax_unit_count (geo=3411)\n",
-      "  aca_ptc/tax_unit_count (geo=3412)\n",
-      "  aca_ptc/tax_unit_count (geo=3501)\n",
-      "  aca_ptc/tax_unit_count (geo=3502)\n",
-      "  aca_ptc/tax_unit_count (geo=3503)\n",
-      "  aca_ptc/tax_unit_count (geo=3601)\n",
-      "  aca_ptc/tax_unit_count (geo=3602)\n",
-      "  aca_ptc/tax_unit_count (geo=3603)\n",
-      "  aca_ptc/tax_unit_count (geo=3604)\n",
-      "  aca_ptc/tax_unit_count (geo=3605)\n",
-      "  aca_ptc/tax_unit_count (geo=3606)\n",
-      "  aca_ptc/tax_unit_count (geo=3607)\n",
-      "  aca_ptc/tax_unit_count (geo=3608)\n",
-      "  aca_ptc/tax_unit_count (geo=3609)\n",
-      "  aca_ptc/tax_unit_count (geo=3610)\n",
-      "  aca_ptc/tax_unit_count (geo=3611)\n",
-      "  aca_ptc/tax_unit_count (geo=3612)\n",
-      "  aca_ptc/tax_unit_count (geo=3613)\n",
-      "  aca_ptc/tax_unit_count (geo=3614)\n",
-      "  aca_ptc/tax_unit_count (geo=3615)\n",
-      "  aca_ptc/tax_unit_count (geo=3616)\n",
-      "  aca_ptc/tax_unit_count (geo=3617)\n",
-      "  aca_ptc/tax_unit_count (geo=3618)\n",
-      "  aca_ptc/tax_unit_count (geo=3619)\n",
-      "  aca_ptc/tax_unit_count (geo=3620)\n",
-      "  aca_ptc/tax_unit_count (geo=3621)\n",
-      "  aca_ptc/tax_unit_count (geo=3622)\n",
-      "  aca_ptc/tax_unit_count (geo=3623)\n",
-      "  aca_ptc/tax_unit_count (geo=3624)\n",
-      "  aca_ptc/tax_unit_count (geo=3625)\n",
-      "  aca_ptc/tax_unit_count (geo=3626)\n",
-      "  aca_ptc/tax_unit_count (geo=3701)\n",
-      "  aca_ptc/tax_unit_count (geo=3702)\n",
-      "  aca_ptc/tax_unit_count (geo=3703)\n",
-      "  aca_ptc/tax_unit_count (geo=3704)\n",
-      "  aca_ptc/tax_unit_count (geo=3705)\n",
-      "  aca_ptc/tax_unit_count (geo=3706)\n",
-      "  aca_ptc/tax_unit_count (geo=3707)\n",
-      "  aca_ptc/tax_unit_count (geo=3708)\n",
-      "  aca_ptc/tax_unit_count (geo=3709)\n",
-      "  aca_ptc/tax_unit_count (geo=3710)\n",
-      "  aca_ptc/tax_unit_count (geo=3711)\n",
-      "  aca_ptc/tax_unit_count (geo=3712)\n",
-      "  aca_ptc/tax_unit_count (geo=3713)\n",
-      "  aca_ptc/tax_unit_count (geo=3714)\n",
-      "  aca_ptc/tax_unit_count (geo=3801)\n",
-      "  aca_ptc/tax_unit_count (geo=3901)\n",
-      "  aca_ptc/tax_unit_count (geo=3902)\n",
-      "  aca_ptc/tax_unit_count (geo=3903)\n",
-      "  aca_ptc/tax_unit_count (geo=3904)\n",
-      "  aca_ptc/tax_unit_count (geo=3905)\n",
-      "  aca_ptc/tax_unit_count (geo=3906)\n",
-      "  aca_ptc/tax_unit_count (geo=3907)\n",
-      "  aca_ptc/tax_unit_count (geo=3908)\n",
-      "  aca_ptc/tax_unit_count (geo=3909)\n",
-      "  aca_ptc/tax_unit_count (geo=3910)\n",
-      "  aca_ptc/tax_unit_count (geo=3911)\n",
-      "  aca_ptc/tax_unit_count (geo=3912)\n",
-      "  aca_ptc/tax_unit_count (geo=3913)\n",
-      "  aca_ptc/tax_unit_count (geo=3914)\n",
-      "  aca_ptc/tax_unit_count (geo=3915)\n",
-      "  aca_ptc/tax_unit_count (geo=4001)\n",
-      "  aca_ptc/tax_unit_count (geo=4002)\n",
-      "  aca_ptc/tax_unit_count (geo=4003)\n",
-      "  aca_ptc/tax_unit_count (geo=4004)\n",
-      "  aca_ptc/tax_unit_count (geo=4005)\n",
-      "  aca_ptc/tax_unit_count (geo=401)\n",
-      "  aca_ptc/tax_unit_count (geo=402)\n",
-      "  aca_ptc/tax_unit_count (geo=403)\n",
-      "  aca_ptc/tax_unit_count (geo=404)\n",
-      "  aca_ptc/tax_unit_count (geo=405)\n",
-      "  aca_ptc/tax_unit_count (geo=406)\n",
-      "  aca_ptc/tax_unit_count (geo=407)\n",
-      "  aca_ptc/tax_unit_count (geo=408)\n",
-      "  aca_ptc/tax_unit_count (geo=409)\n",
-      "  aca_ptc/tax_unit_count (geo=4101)\n",
-      "  aca_ptc/tax_unit_count (geo=4102)\n",
-      "  aca_ptc/tax_unit_count (geo=4103)\n",
-      "  aca_ptc/tax_unit_count (geo=4104)\n",
-      "  aca_ptc/tax_unit_count (geo=4105)\n",
-      "  aca_ptc/tax_unit_count (geo=4106)\n",
-      "  aca_ptc/tax_unit_count (geo=4201)\n",
-      "  aca_ptc/tax_unit_count (geo=4202)\n",
-      "  aca_ptc/tax_unit_count (geo=4203)\n",
-      "  aca_ptc/tax_unit_count (geo=4204)\n",
-      "  aca_ptc/tax_unit_count (geo=4205)\n",
-      "  aca_ptc/tax_unit_count (geo=4206)\n",
-      "  aca_ptc/tax_unit_count (geo=4207)\n",
-      "  aca_ptc/tax_unit_count (geo=4208)\n",
-      "  aca_ptc/tax_unit_count (geo=4209)\n",
-      "  aca_ptc/tax_unit_count (geo=4210)\n",
-      "  aca_ptc/tax_unit_count (geo=4211)\n",
-      "  aca_ptc/tax_unit_count (geo=4212)\n",
-      "  aca_ptc/tax_unit_count (geo=4213)\n",
-      "  aca_ptc/tax_unit_count (geo=4214)\n",
-      "  aca_ptc/tax_unit_count (geo=4215)\n",
-      "  aca_ptc/tax_unit_count (geo=4216)\n",
-      "  aca_ptc/tax_unit_count (geo=4217)\n",
-      "  aca_ptc/tax_unit_count (geo=4401)\n",
-      "  aca_ptc/tax_unit_count (geo=4402)\n",
-      "  aca_ptc/tax_unit_count (geo=4501)\n",
-      "  aca_ptc/tax_unit_count (geo=4502)\n",
-      "  aca_ptc/tax_unit_count (geo=4503)\n",
-      "  aca_ptc/tax_unit_count (geo=4504)\n",
-      "  aca_ptc/tax_unit_count (geo=4505)\n",
-      "  aca_ptc/tax_unit_count (geo=4506)\n",
-      "  aca_ptc/tax_unit_count (geo=4507)\n",
-      "  aca_ptc/tax_unit_count (geo=4601)\n",
-      "  aca_ptc/tax_unit_count (geo=4701)\n",
-      "  aca_ptc/tax_unit_count (geo=4702)\n",
-      "  aca_ptc/tax_unit_count (geo=4703)\n",
-      "  aca_ptc/tax_unit_count (geo=4704)\n",
-      "  aca_ptc/tax_unit_count (geo=4705)\n",
-      "  aca_ptc/tax_unit_count (geo=4706)\n",
-      "  aca_ptc/tax_unit_count (geo=4707)\n",
-      "  aca_ptc/tax_unit_count (geo=4708)\n",
-      "  aca_ptc/tax_unit_count (geo=4709)\n",
-      "  aca_ptc/tax_unit_count (geo=4801)\n",
-      "  aca_ptc/tax_unit_count (geo=4802)\n",
-      "  aca_ptc/tax_unit_count (geo=4803)\n",
-      "  aca_ptc/tax_unit_count (geo=4804)\n",
-      "  aca_ptc/tax_unit_count (geo=4805)\n",
-      "  aca_ptc/tax_unit_count (geo=4806)\n",
-      "  aca_ptc/tax_unit_count (geo=4807)\n",
-      "  aca_ptc/tax_unit_count (geo=4808)\n",
-      "  aca_ptc/tax_unit_count (geo=4809)\n",
-      "  aca_ptc/tax_unit_count (geo=4810)\n",
-      "  aca_ptc/tax_unit_count (geo=4811)\n",
-      "  aca_ptc/tax_unit_count (geo=4812)\n",
-      "  aca_ptc/tax_unit_count (geo=4813)\n",
-      "  aca_ptc/tax_unit_count (geo=4814)\n",
-      "  aca_ptc/tax_unit_count (geo=4815)\n",
-      "  aca_ptc/tax_unit_count (geo=4816)\n",
-      "  aca_ptc/tax_unit_count (geo=4817)\n",
-      "  aca_ptc/tax_unit_count (geo=4818)\n",
-      "  aca_ptc/tax_unit_count (geo=4819)\n",
-      "  aca_ptc/tax_unit_count (geo=4820)\n",
-      "  aca_ptc/tax_unit_count (geo=4821)\n",
-      "  aca_ptc/tax_unit_count (geo=4822)\n",
-      "  aca_ptc/tax_unit_count (geo=4823)\n",
-      "  aca_ptc/tax_unit_count (geo=4824)\n",
-      "  aca_ptc/tax_unit_count (geo=4825)\n",
-      "  aca_ptc/tax_unit_count (geo=4826)\n",
-      "  aca_ptc/tax_unit_count (geo=4827)\n",
-      "  aca_ptc/tax_unit_count (geo=4828)\n",
-      "  aca_ptc/tax_unit_count (geo=4829)\n",
-      "  aca_ptc/tax_unit_count (geo=4830)\n",
-      "  aca_ptc/tax_unit_count (geo=4831)\n",
-      "  aca_ptc/tax_unit_count (geo=4832)\n",
-      "  aca_ptc/tax_unit_count (geo=4833)\n",
-      "  aca_ptc/tax_unit_count (geo=4834)\n",
-      "  aca_ptc/tax_unit_count (geo=4835)\n",
-      "  aca_ptc/tax_unit_count (geo=4836)\n",
-      "  aca_ptc/tax_unit_count (geo=4837)\n",
-      "  aca_ptc/tax_unit_count (geo=4838)\n",
-      "  aca_ptc/tax_unit_count (geo=4901)\n",
-      "  aca_ptc/tax_unit_count (geo=4902)\n",
-      "  aca_ptc/tax_unit_count (geo=4903)\n",
-      "  aca_ptc/tax_unit_count (geo=4904)\n",
-      "  aca_ptc/tax_unit_count (geo=5001)\n",
-      "  aca_ptc/tax_unit_count (geo=501)\n",
-      "  aca_ptc/tax_unit_count (geo=502)\n",
-      "  aca_ptc/tax_unit_count (geo=503)\n",
-      "  aca_ptc/tax_unit_count (geo=504)\n",
-      "  aca_ptc/tax_unit_count (geo=5101)\n",
-      "  aca_ptc/tax_unit_count (geo=5102)\n",
-      "  aca_ptc/tax_unit_count (geo=5103)\n",
-      "  aca_ptc/tax_unit_count (geo=5104)\n",
-      "  aca_ptc/tax_unit_count (geo=5105)\n",
-      "  aca_ptc/tax_unit_count (geo=5106)\n",
-      "  aca_ptc/tax_unit_count (geo=5107)\n",
-      "  aca_ptc/tax_unit_count (geo=5108)\n",
-      "  aca_ptc/tax_unit_count (geo=5109)\n",
-      "  aca_ptc/tax_unit_count (geo=5110)\n",
-      "  aca_ptc/tax_unit_count (geo=5111)\n",
-      "  aca_ptc/tax_unit_count (geo=5301)\n",
-      "  aca_ptc/tax_unit_count (geo=5302)\n",
-      "  aca_ptc/tax_unit_count (geo=5303)\n",
-      "  aca_ptc/tax_unit_count (geo=5304)\n",
-      "  aca_ptc/tax_unit_count (geo=5305)\n",
-      "  aca_ptc/tax_unit_count (geo=5306)\n",
-      "  aca_ptc/tax_unit_count (geo=5307)\n",
-      "  aca_ptc/tax_unit_count (geo=5308)\n",
-      "  aca_ptc/tax_unit_count (geo=5309)\n",
-      "  aca_ptc/tax_unit_count (geo=5310)\n",
-      "  aca_ptc/tax_unit_count (geo=5401)\n",
-      "  aca_ptc/tax_unit_count (geo=5402)\n",
-      "  aca_ptc/tax_unit_count (geo=5501)\n",
-      "  aca_ptc/tax_unit_count (geo=5502)\n",
-      "  aca_ptc/tax_unit_count (geo=5503)\n",
-      "  aca_ptc/tax_unit_count (geo=5504)\n",
-      "  aca_ptc/tax_unit_count (geo=5505)\n",
-      "  aca_ptc/tax_unit_count (geo=5506)\n",
-      "  aca_ptc/tax_unit_count (geo=5507)\n",
-      "  aca_ptc/tax_unit_count (geo=5508)\n",
-      "  aca_ptc/tax_unit_count (geo=5601)\n",
-      "  aca_ptc/tax_unit_count (geo=601)\n",
-      "  aca_ptc/tax_unit_count (geo=602)\n",
-      "  aca_ptc/tax_unit_count (geo=603)\n",
-      "  aca_ptc/tax_unit_count (geo=604)\n",
-      "  aca_ptc/tax_unit_count (geo=605)\n",
-      "  aca_ptc/tax_unit_count (geo=606)\n",
-      "  aca_ptc/tax_unit_count (geo=607)\n",
-      "  aca_ptc/tax_unit_count (geo=608)\n",
-      "  aca_ptc/tax_unit_count (geo=609)\n",
-      "  aca_ptc/tax_unit_count (geo=610)\n",
-      "  aca_ptc/tax_unit_count (geo=611)\n",
-      "  aca_ptc/tax_unit_count (geo=612)\n",
-      "  aca_ptc/tax_unit_count (geo=613)\n",
-      "  aca_ptc/tax_unit_count (geo=614)\n",
-      "  aca_ptc/tax_unit_count (geo=615)\n",
-      "  aca_ptc/tax_unit_count (geo=616)\n",
-      "  aca_ptc/tax_unit_count (geo=617)\n",
-      "  aca_ptc/tax_unit_count (geo=618)\n",
-      "  aca_ptc/tax_unit_count (geo=619)\n",
-      "  aca_ptc/tax_unit_count (geo=620)\n",
-      "  aca_ptc/tax_unit_count (geo=621)\n",
-      "  aca_ptc/tax_unit_count (geo=622)\n",
-      "  aca_ptc/tax_unit_count (geo=623)\n",
-      "  aca_ptc/tax_unit_count (geo=624)\n",
-      "  aca_ptc/tax_unit_count (geo=625)\n",
-      "  aca_ptc/tax_unit_count (geo=626)\n",
-      "  aca_ptc/tax_unit_count (geo=627)\n",
-      "  aca_ptc/tax_unit_count (geo=628)\n",
-      "  aca_ptc/tax_unit_count (geo=629)\n",
-      "  aca_ptc/tax_unit_count (geo=630)\n",
-      "  aca_ptc/tax_unit_count (geo=631)\n",
-      "  aca_ptc/tax_unit_count (geo=632)\n",
-      "  aca_ptc/tax_unit_count (geo=633)\n",
-      "  aca_ptc/tax_unit_count (geo=634)\n",
-      "  aca_ptc/tax_unit_count (geo=635)\n",
-      "  aca_ptc/tax_unit_count (geo=636)\n",
-      "  aca_ptc/tax_unit_count (geo=637)\n",
-      "  aca_ptc/tax_unit_count (geo=638)\n",
-      "  aca_ptc/tax_unit_count (geo=639)\n",
-      "  aca_ptc/tax_unit_count (geo=640)\n",
-      "  aca_ptc/tax_unit_count (geo=641)\n",
-      "  aca_ptc/tax_unit_count (geo=642)\n",
-      "  aca_ptc/tax_unit_count (geo=643)\n",
-      "  aca_ptc/tax_unit_count (geo=644)\n",
-      "  aca_ptc/tax_unit_count (geo=645)\n",
-      "  aca_ptc/tax_unit_count (geo=646)\n",
-      "  aca_ptc/tax_unit_count (geo=647)\n",
-      "  aca_ptc/tax_unit_count (geo=648)\n",
-      "  aca_ptc/tax_unit_count (geo=649)\n",
-      "  aca_ptc/tax_unit_count (geo=650)\n",
-      "  aca_ptc/tax_unit_count (geo=651)\n",
-      "  aca_ptc/tax_unit_count (geo=652)\n",
-      "  aca_ptc/tax_unit_count (geo=801)\n",
-      "  aca_ptc/tax_unit_count (geo=802)\n",
-      "  aca_ptc/tax_unit_count (geo=803)\n",
-      "  aca_ptc/tax_unit_count (geo=804)\n",
-      "  aca_ptc/tax_unit_count (geo=805)\n",
-      "  aca_ptc/tax_unit_count (geo=806)\n",
-      "  aca_ptc/tax_unit_count (geo=807)\n",
-      "  aca_ptc/tax_unit_count (geo=808)\n",
-      "  aca_ptc/tax_unit_count (geo=901)\n",
-      "  aca_ptc/tax_unit_count (geo=902)\n",
-      "  aca_ptc/tax_unit_count (geo=903)\n",
-      "  aca_ptc/tax_unit_count (geo=904)\n",
-      "  aca_ptc/tax_unit_count (geo=905)\n"
+      "Achievable targets: 479\n",
+      "Impossible targets: 881\n",
+      "\n",
+      "Impossible targets by (domain, variable):\n",
+      "  aca_ptc/aca_ptc: 436\n",
+      "  aca_ptc/tax_unit_count: 436\n",
+      "  snap/household_count: 7\n",
+      "  aca_ptc/person_count: 1\n",
+      "  snap/snap: 1\n"
      ]
     }
    ],
@@ -1647,12 +636,15 @@
     "\n",
     "if n_impossible > 0:\n",
     "    impossible = targets_filtered[~achievable_mask]\n",
-    "    print(\"\\nImpossible targets:\")\n",
-    "    for _, r in impossible.iterrows():\n",
-    "        print(\n",
-    "            f\"  {r.get('domain_variable', '?')}/{r['variable']} \"\n",
-    "            f\"(geo={r['geographic_id']})\"\n",
-    "        )"
+    "    by_var = (\n",
+    "        impossible.groupby([\"domain_variable\", \"variable\"])\n",
+    "        .agg(count=(\"value\", \"size\"))\n",
+    "        .reset_index()\n",
+    "        .sort_values(\"count\", ascending=False)\n",
+    "    )\n",
+    "    print(\"\\nImpossible targets by (domain, variable):\")\n",
+    "    for _, r in by_var.iterrows():\n",
+    "        print(f\"  {r['domain_variable']}/{r['variable']}: {r['count']}\")"
    ]
   },
   {
@@ -1665,11 +657,11 @@
      "output_type": "stream",
      "text": [
       "Hardest targets (lowest row_sum / target_value ratio):\n",
-      "  snap/household_count (geo=3615): ratio=0.0088, row_sum=1,535, target=173,591\n",
-      "  snap/household_count (geo=3613): ratio=0.0110, row_sum=1,535, target=139,162\n",
-      "  snap/household_count (geo=621): ratio=0.0124, row_sum=1,483, target=119,148\n",
-      "  snap/household_count (geo=3608): ratio=0.0129, row_sum=1,535, target=118,977\n",
-      "  snap/household_count (geo=634): ratio=0.0130, row_sum=1,483, target=113,916\n"
+      "  snap/household_count (geo=621): ratio=0.0000, row_sum=4, target=119,148\n",
+      "  snap/household_count (geo=3615): ratio=0.0001, row_sum=9, target=173,591\n",
+      "  snap/snap (geo=46): ratio=0.0001, row_sum=9,421, target=180,195,817\n",
+      "  snap/household_count (geo=3625): ratio=0.0001, row_sum=4, target=67,315\n",
+      "  snap/household_count (geo=1702): ratio=0.0001, row_sum=6, target=97,494\n"
      ]
     }
    ],
@@ -1700,9 +692,9 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Final matrix shape: (487, 5231564)\n",
-      "Final non-zero entries: 1,466,022\n",
-      "Final density: 0.000575\n",
+      "Final matrix shape: (479, 35997)\n",
+      "Final non-zero entries: 9,944\n",
+      "Final density: 0.000577\n",
       "\n",
       "This is what the optimizer receives.\n"
      ]
@@ -1724,10 +716,10 @@
     "\n",
     "The calibration matrix pipeline has five steps:\n",
     "\n",
-    "1. **Build** — `SparseMatrixBuilder.build_matrix()` queries targets, applies hierarchical uprating, evaluates constraints, and assembles the sparse CSR matrix.\n",
-    "2. **Read** — `MatrixTracer` decodes rows (targets) and columns (household-CD pairs) so you can verify the matrix makes sense.\n",
+    "1. **Clone + assign** — `assign_random_geography()` creates N clones of each CPS record, each with a random census block (and derived CD/state).\n",
+    "2. **Build** — `UnifiedMatrixBuilder.build_matrix()` queries targets, applies hierarchical uprating, simulates each clone with its assigned geography, and assembles the sparse CSR matrix.\n",
     "3. **Groups** — `create_target_groups()` partitions rows for balanced loss weighting. `GROUPS_TO_EXCLUDE` drops redundant constraints.\n",
-    "4. **Sparsity** — Most of the matrix is zero. District-level targets confine non-zeros to single CD blocks; national targets span all blocks.\n",
+    "4. **Sparsity** — Most of the matrix is zero. District-level targets confine non-zeros to clones assigned to that district; national targets span all clones.\n",
     "5. **Filter** — Remove impossible targets (row sum = 0) before handing to the optimizer.\n",
     "\n",
     "When adding new domains or variables to the calibration, re-run this notebook to verify the new targets appear correctly and don't introduce impossible constraints."
@@ -1755,4 +747,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}
\ No newline at end of file
diff --git a/docs/hierarchical_uprating.ipynb b/docs/hierarchical_uprating.ipynb
index 76530225c..4da30d82c 100644
--- a/docs/hierarchical_uprating.ipynb
+++ b/docs/hierarchical_uprating.ipynb
@@ -51,20 +51,16 @@
     "import pandas as pd\n",
     "\n",
     "from policyengine_us_data.storage import STORAGE_FOLDER\n",
-    "from policyengine_us_data.datasets.cps.local_area_calibration.sparse_matrix_builder import (\n",
-    "    SparseMatrixBuilder,\n",
+    "from policyengine_us_data.calibration.unified_matrix_builder import (\n",
+    "    UnifiedMatrixBuilder,\n",
     ")\n",
     "from policyengine_us_data.datasets.cps.local_area_calibration.calibration_utils import (\n",
-    "    get_all_cds_from_database,\n",
     "    STATE_CODES,\n",
     ")\n",
     "\n",
     "db_path = STORAGE_FOLDER / \"calibration\" / \"policy_data.db\"\n",
     "db_uri = f\"sqlite:///{db_path}\"\n",
-    "cds = get_all_cds_from_database(db_uri)\n",
-    "builder = SparseMatrixBuilder(\n",
-    "    db_uri, time_period=2024, cds_to_calibrate=cds\n",
-    ")"
+    "builder = UnifiedMatrixBuilder(db_uri, time_period=2024)"
    ]
   },
   {
diff --git a/docs/local_area_calibration_setup.ipynb b/docs/local_area_calibration_setup.ipynb
index c21264e9a..2e8614aa9 100644
--- a/docs/local_area_calibration_setup.ipynb
+++ b/docs/local_area_calibration_setup.ipynb
@@ -7,7 +7,21 @@
    "source": [
     "# Local Area Calibration Setup\n",
     "\n",
-    "This notebook demonstrates the sparse matrix construction for local area (congressional district) calibration. It uses a subset of CDs from NC, HI, MT, and AK for manageable runtime."
+    "This notebook demonstrates the clone-based calibration pipeline: how raw CPS records become a calibration matrix and, ultimately, CD-level stacked datasets.\n",
+    "\n",
+    "The paradigm shift from the old approach: instead of replicating every household into every congressional district, we **clone** each record N times and assign each clone a **random census block** drawn from a population-weighted distribution. Each clone inherits a state, CD, and block — and gets re-simulated under the rules of its assigned state.\n",
+    "\n",
+    "We follow one household (`record_idx=8629`, household_id 128694, SNAP \\$18,396) through the entire pipeline:\n",
+    "1. Clone and assign geography\n",
+    "2. Simulate under new state rules (`_simulate_clone`)\n",
+    "3. Geographic column masking\n",
+    "4. Re-randomize takeup per census block\n",
+    "5. Build the calibration matrix\n",
+    "6. Create stacked datasets from calibrated weights\n",
+    "\n",
+    "**Companion notebook:** [calibration_matrix.ipynb](calibration_matrix.ipynb) covers the *finished* matrix — row/column anatomy, target groups, sparsity. This notebook covers the *process* that creates it and what happens after (stacked datasets).\n",
+    "\n",
+    "**Requirements:** `policy_data.db`, `block_cd_distributions.csv.gz`, and the stratified CPS h5 file in `STORAGE_FOLDER`."
    ]
   },
   {
@@ -23,24 +37,52 @@
    "execution_count": 1,
    "id": "cell-2",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/baogorek/envs/sep/lib/python3.13/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    }
+   ],
    "source": [
-    "from sqlalchemy import create_engine, text\n",
-    "import pandas as pd\n",
     "import numpy as np\n",
+    "import pandas as pd\n",
+    "from collections import defaultdict\n",
     "\n",
     "from policyengine_us import Microsimulation\n",
     "from policyengine_us_data.storage import STORAGE_FOLDER\n",
-    "from policyengine_us_data.datasets.cps.local_area_calibration.sparse_matrix_builder import (\n",
-    "    SparseMatrixBuilder,\n",
+    "from policyengine_us_data.calibration.clone_and_assign import (\n",
+    "    assign_random_geography,\n",
+    "    GeographyAssignment,\n",
+    "    load_global_block_distribution,\n",
+    ")\n",
+    "from policyengine_us_data.calibration.unified_matrix_builder import (\n",
+    "    UnifiedMatrixBuilder,\n",
     ")\n",
-    "from policyengine_us_data.datasets.cps.local_area_calibration.matrix_tracer import (\n",
-    "    MatrixTracer,\n",
+    "from policyengine_us_data.calibration.unified_calibration import (\n",
+    "    rerandomize_takeup,\n",
+    "    SIMPLE_TAKEUP_VARS,\n",
     ")\n",
+    "from policyengine_us_data.utils.randomness import seeded_rng\n",
+    "from policyengine_us_data.parameters import load_take_up_rate\n",
     "from policyengine_us_data.datasets.cps.local_area_calibration.calibration_utils import (\n",
     "    get_calculated_variables,\n",
-    "    create_target_groups,\n",
-    ")"
+    "    STATE_CODES,\n",
+    "    get_all_cds_from_database,\n",
+    ")\n",
+    "from policyengine_us_data.datasets.cps.local_area_calibration.stacked_dataset_builder import (\n",
+    "    create_sparse_cd_stacked_dataset,\n",
+    ")\n",
+    "\n",
+    "db_path = STORAGE_FOLDER / \"calibration\" / \"policy_data.db\"\n",
+    "db_uri = f\"sqlite:///{db_path}\"\n",
+    "dataset_path = str(STORAGE_FOLDER / \"stratified_extended_cps_2024.h5\")\n",
+    "\n",
+    "N_CLONES = 3\n",
+    "SEED = 42"
    ]
   },
   {
@@ -48,13 +90,30 @@
    "execution_count": 2,
    "id": "cell-3",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Base dataset: 11,999 households\n",
+      "Example household: record_idx=8629, household_id=128694, SNAP=$18,396.00\n"
+     ]
+    }
+   ],
    "source": [
-    "db_path = STORAGE_FOLDER / \"calibration\" / \"policy_data.db\"\n",
-    "db_uri = f\"sqlite:///{db_path}\"\n",
-    "dataset_path = str(STORAGE_FOLDER / \"stratified_extended_cps_2024.h5\")\n",
+    "sim = Microsimulation(dataset=dataset_path)\n",
+    "hh_ids = sim.calculate(\"household_id\", map_to=\"household\").values\n",
+    "snap_values = sim.calculate(\"snap\", map_to=\"household\").values\n",
+    "n_records = len(hh_ids)\n",
     "\n",
-    "engine = create_engine(db_uri)"
+    "record_idx = 8629  # High SNAP ($18k), lands in TX/PA/NY with seed=42\n",
+    "example_hh_id = hh_ids[record_idx]\n",
+    "print(f\"Base dataset: {n_records:,} households\")\n",
+    "print(\n",
+    "    f\"Example household: record_idx={record_idx}, \"\n",
+    "    f\"household_id={example_hh_id}, \"\n",
+    "    f\"SNAP=${snap_values[record_idx]:,.2f}\"\n",
+    ")"
    ]
   },
   {
@@ -62,13 +121,9 @@
    "id": "cell-4",
    "metadata": {},
    "source": [
-    "## Section 2: Select Test Congressional Districts\n",
+    "## Section 2: Geography Assignment\n",
     "\n",
-    "We use CDs from 4 states for testing:\n",
-    "- **NC (37)**: 14 CDs (3701-3714) - provides same-state different-CD test cases\n",
-    "- **HI (15)**: 2 CDs (1501-1502)\n",
-    "- **MT (30)**: 2 CDs (3001-3002)\n",
-    "- **AK (2)**: 1 CD (200)"
+    "`assign_random_geography` creates `n_records * n_clones` total records, each assigned a random census block from a population-weighted distribution. State and CD are derived from the block GEOID. The result is a `GeographyAssignment` dataclass with arrays indexed as `clone_idx * n_records + record_idx`."
    ]
   },
   {
@@ -81,557 +136,850 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Testing with 19 congressional districts:\n",
-      "  NC (37): ['3701', '3702', '3703', '3704', '3705', '3706', '3707', '3708', '3709', '3710', '3711', '3712', '3713', '3714']\n",
-      "  HI (15): ['1501', '1502']\n",
-      "  MT (30): ['3001', '3002']\n",
-      "  AK (2):  ['201']\n"
+      "Total cloned records: 35,997\n",
+      "Unique states: 50\n",
+      "Unique CDs: 435\n",
+      "Unique blocks: 35508\n"
      ]
     }
    ],
    "source": [
-    "query = \"\"\"\n",
-    "SELECT DISTINCT sc.value as cd_geoid\n",
-    "FROM stratum_constraints sc\n",
-    "WHERE sc.constraint_variable = 'congressional_district_geoid'\n",
-    "  AND (\n",
-    "    sc.value LIKE '37__'\n",
-    "    OR sc.value LIKE '150_'\n",
-    "    OR sc.value LIKE '300_'\n",
-    "    OR sc.value = '200' OR sc.value = '201'\n",
-    "  )\n",
-    "ORDER BY sc.value\n",
-    "\"\"\"\n",
-    "\n",
-    "with engine.connect() as conn:\n",
-    "    result = conn.execute(text(query)).fetchall()\n",
-    "    test_cds = [row[0] for row in result]\n",
-    "\n",
-    "print(f\"Testing with {len(test_cds)} congressional districts:\")\n",
-    "print(f\"  NC (37): {[cd for cd in test_cds if cd.startswith('37')]}\")\n",
-    "print(f\"  HI (15): {[cd for cd in test_cds if cd.startswith('15')]}\")\n",
-    "print(f\"  MT (30): {[cd for cd in test_cds if cd.startswith('30')]}\")\n",
-    "print(f\"  AK (2):  {[cd for cd in test_cds if cd.startswith('20')]}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "cell-6",
-   "metadata": {},
-   "source": [
-    "## Section 3: Build the Sparse Matrix\n",
+    "geography = assign_random_geography(n_records, n_clones=N_CLONES, seed=SEED)\n",
+    "n_total = n_records * N_CLONES\n",
     "\n",
-    "The sparse matrix `X_sparse` has:\n",
-    "- **Rows**: Calibration targets (e.g., SNAP totals by geography)\n",
-    "- **Columns**: (household × CD) pairs - each household appears once per CD\n",
-    "\n",
-    "We filter to SNAP targets using the `domain_variables` filter for this demonstration."
+    "print(f\"Total cloned records: {n_total:,}\")\n",
+    "print(f\"Unique states: {len(np.unique(geography.state_fips))}\")\n",
+    "print(f\"Unique CDs: {len(np.unique(geography.cd_geoid))}\")\n",
+    "print(f\"Unique blocks: {len(np.unique(geography.block_geoid))}\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 4,
-   "id": "cell-7",
+   "id": "cell-6",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "X_sparse shape: (539, 227981)\n",
-      "  Rows (targets): 539\n",
-      "  Columns (household × CD pairs): 227981\n",
-      "  Non-zero entries: 141,536\n",
-      "  Sparsity: 99.88%\n"
+      "Example household (record_idx=8629) across 3 clones:\n",
+      "\n"
      ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>clone</th>\n",
+       "      <th>col</th>\n",
+       "      <th>state_fips</th>\n",
+       "      <th>abbr</th>\n",
+       "      <th>cd_geoid</th>\n",
+       "      <th>block_geoid</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>8629</td>\n",
+       "      <td>48</td>\n",
+       "      <td>TX</td>\n",
+       "      <td>4817</td>\n",
+       "      <td>481450004002026</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>20628</td>\n",
+       "      <td>42</td>\n",
+       "      <td>PA</td>\n",
+       "      <td>4201</td>\n",
+       "      <td>420171058013029</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>32627</td>\n",
+       "      <td>36</td>\n",
+       "      <td>NY</td>\n",
+       "      <td>3611</td>\n",
+       "      <td>360850208041023</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   clone    col  state_fips abbr cd_geoid      block_geoid\n",
+       "0      0   8629          48   TX     4817  481450004002026\n",
+       "1      1  20628          42   PA     4201  420171058013029\n",
+       "2      2  32627          36   NY     3611  360850208041023"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
-    "sim = Microsimulation(dataset=dataset_path)\n",
-    "\n",
-    "builder = SparseMatrixBuilder(\n",
-    "    db_uri,\n",
-    "    time_period=2024,\n",
-    "    cds_to_calibrate=test_cds,\n",
-    "    dataset_path=dataset_path,\n",
+    "print(\n",
+    "    f\"Example household (record_idx={record_idx}) across {N_CLONES} clones:\\n\"\n",
     ")\n",
-    "\n",
-    "targets_df, X_sparse, household_id_mapping = builder.build_matrix(\n",
-    "    sim, target_filter={\"domain_variables\": [\"snap\"], \"variables\": [\"snap\"]}\n",
-    ")\n",
-    "\n",
-    "print(f\"X_sparse shape: {X_sparse.shape}\")\n",
-    "print(f\"  Rows (targets): {X_sparse.shape[0]}\")\n",
-    "print(f\"  Columns (household × CD pairs): {X_sparse.shape[1]}\")\n",
-    "print(f\"  Non-zero entries: {X_sparse.nnz:,}\")\n",
-    "print(f\"  Sparsity: {1 - X_sparse.nnz / (X_sparse.shape[0] * X_sparse.shape[1]):.2%}\")"
+    "rows = []\n",
+    "for c in range(N_CLONES):\n",
+    "    col = c * n_records + record_idx\n",
+    "    rows.append(\n",
+    "        {\n",
+    "            \"clone\": c,\n",
+    "            \"col\": col,\n",
+    "            \"state_fips\": geography.state_fips[col],\n",
+    "            \"abbr\": STATE_CODES.get(geography.state_fips[col], \"??\"),\n",
+    "            \"cd_geoid\": geography.cd_geoid[col],\n",
+    "            \"block_geoid\": geography.block_geoid[col],\n",
+    "        }\n",
+    "    )\n",
+    "pd.DataFrame(rows)"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "cell-8",
+   "id": "cell-7",
    "metadata": {},
    "source": [
-    "## Section 4: Understanding the Matrix Structure with MatrixTracer\n",
+    "One household, three parallel geographic identities. Each clone will be simulated under different state rules, producing different benefit amounts.\n",
     "\n",
-    "The `MatrixTracer` helps navigate the sparse matrix by providing lookups between:\n",
-    "- Column indices ↔ (household_id, CD) pairs\n",
-    "- Row indices ↔ target definitions"
+    "**Note:** With only N_CLONES=3 (~36K total samples), small-population areas like DC may not appear in the random draw. The production pipeline uses N_CLONES=10, which covers all 51 state-equivalents and 436 CDs."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 5,
-   "id": "cell-9",
+   "id": "cell-8",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\n",
-      "================================================================================\n",
-      "MATRIX STRUCTURE BREAKDOWN\n",
-      "================================================================================\n",
-      "\n",
-      "Matrix dimensions: 539 rows x 227981 columns\n",
-      "  Rows = 539 targets\n",
-      "  Columns = 11999 households x 19 CDs\n",
-      "           = 11,999 x 19 = 227,981\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "COLUMN STRUCTURE (Households stacked by CD)\n",
-      "--------------------------------------------------------------------------------\n",
-      "\n",
-      "Showing first and last 5 CDs of 19 total:\n",
-      "\n",
-      "First 5 CDs:\n",
-      "cd_geoid  start_col  end_col  n_households\n",
-      "    1501          0    11998         11999\n",
-      "    1502      11999    23997         11999\n",
-      "     201      23998    35996         11999\n",
-      "    3001      35997    47995         11999\n",
-      "    3002      47996    59994         11999\n",
-      "\n",
-      "Last 5 CDs:\n",
-      "cd_geoid  start_col  end_col  n_households\n",
-      "    3710     167986   179984         11999\n",
-      "    3711     179985   191983         11999\n",
-      "    3712     191984   203982         11999\n",
-      "    3713     203983   215981         11999\n",
-      "    3714     215982   227980         11999\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "ROW STRUCTURE (Targets)\n",
-      "--------------------------------------------------------------------------------\n",
-      "\n",
-      "Total targets: 539\n",
-      "\n",
-      "Targets by domain variable:\n",
-      "                 n_targets  n_unique_vars\n",
-      "domain_variable                          \n",
-      "snap                   538              2\n",
-      "\n",
-      "--------------------------------------------------------------------------------\n",
-      "TARGET GROUPS (for loss calculation)\n",
-      "--------------------------------------------------------------------------------\n",
-      "\n",
-      "=== Creating Target Groups ===\n",
-      "\n",
-      "National targets:\n",
-      "  Group 0: Snap = 93,730,290,000\n",
-      "\n",
-      "State targets:\n",
-      "  Group 1: SNAP Household Count (51 targets)\n",
-      "  Group 2: Snap (51 targets)\n",
-      "\n",
-      "District targets:\n",
-      "  Group 3: SNAP Household Count (436 targets)\n",
-      "\n",
-      "Total groups created: 4\n",
-      "========================================\n",
-      "  Group 0: National Snap (1 target, value=93,730,290,000) - rows [0]\n",
-      "  Group 1: State SNAP Household Count (51 targets) - rows [1, 2, 3, ..., 50, 51]\n",
-      "  Group 2: State Snap (51 targets) - rows [52, 53, 54, ..., 101, 102]\n",
-      "  Group 3: District SNAP Household Count (436 targets) - rows [103, 104, 105, ..., 537, 538]\n",
-      "\n",
-      "================================================================================\n"
+      "Global block distribution: 5,765,442 blocks\n",
+      "Top 5 states by total probability:\n",
+      "  CA (6): 11.954%\n",
+      "  TX (48): 8.736%\n",
+      "  FL (12): 6.437%\n",
+      "  NY (36): 5.977%\n",
+      "  PA (42): 3.908%\n"
      ]
     }
    ],
    "source": [
-    "tracer = MatrixTracer(\n",
-    "    targets_df, X_sparse, household_id_mapping, test_cds, sim\n",
-    ")\n",
+    "blocks, cds, states, probs = load_global_block_distribution()\n",
+    "print(f\"Global block distribution: {len(blocks):,} blocks\")\n",
+    "print(f\"Top 5 states by total probability:\")\n",
+    "state_prob = pd.Series(probs, index=states).groupby(level=0).sum()\n",
+    "top5 = state_prob.nlargest(5)\n",
+    "for fips, p in top5.items():\n",
+    "    print(f\"  {STATE_CODES.get(fips, '??')} ({fips}): {p:.3%}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cell-9",
+   "metadata": {},
+   "source": [
+    "## Section 3: Inside `_simulate_clone` — State-Swap\n",
+    "\n",
+    "For each clone, `_simulate_clone` does four things:\n",
+    "1. Creates a **fresh** `Microsimulation` from the base dataset\n",
+    "2. Overwrites `state_fips` with the clone's assigned states\n",
+    "3. Optionally calls a `sim_modifier` (e.g., takeup re-randomization)\n",
+    "4. **Clears cached formulas** via `get_calculated_variables` — preserving survey inputs and IDs while forcing recalculation of state-dependent variables like SNAP\n",
     "\n",
-    "tracer.print_matrix_structure()"
+    "Let's reproduce this manually for clone 0."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 6,
-   "id": "cell-11",
+   "id": "cell-10",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\n",
-      "=== Creating Target Groups ===\n",
-      "\n",
-      "National targets:\n",
-      "  Group 0: Snap = 93,730,290,000\n",
-      "\n",
-      "State targets:\n",
-      "  Group 1: SNAP Household Count (51 targets)\n",
-      "  Group 2: Snap (51 targets)\n",
-      "\n",
-      "District targets:\n",
-      "  Group 3: SNAP Household Count (436 targets)\n",
-      "\n",
-      "Total groups created: 4\n",
-      "========================================\n"
+      "Example household (record_idx=8629):\n",
+      "  Original state: NC (37)\n",
+      "  Clone 0 state:  TX (48)\n",
+      "  Original SNAP:  $18,396.00\n",
+      "  Clone 0 SNAP:   $18,396.00\n"
      ]
     }
    ],
    "source": [
-    "target_groups, group_info = create_target_groups(targets_df)"
+    "clone_idx = 0\n",
+    "col_start = clone_idx * n_records\n",
+    "col_end = col_start + n_records\n",
+    "clone_states = geography.state_fips[col_start:col_end]\n",
+    "\n",
+    "clone_sim = Microsimulation(dataset=dataset_path)\n",
+    "clone_sim.set_input(\"state_fips\", 2024, clone_states.astype(np.int32))\n",
+    "for var in get_calculated_variables(clone_sim):\n",
+    "    clone_sim.delete_arrays(var)\n",
+    "\n",
+    "new_snap = clone_sim.calculate(\"snap\", map_to=\"household\").values\n",
+    "\n",
+    "orig_state = sim.calculate(\"state_fips\", map_to=\"household\").values[record_idx]\n",
+    "new_state = clone_states[record_idx]\n",
+    "\n",
+    "print(f\"Example household (record_idx={record_idx}):\")\n",
+    "print(\n",
+    "    f\"  Original state: {STATE_CODES.get(int(orig_state), '??')} \"\n",
+    "    f\"({int(orig_state)})\"\n",
+    ")\n",
+    "print(\n",
+    "    f\"  Clone 0 state:  {STATE_CODES.get(int(new_state), '??')} \"\n",
+    "    f\"({int(new_state)})\"\n",
+    ")\n",
+    "print(f\"  Original SNAP:  ${snap_values[record_idx]:,.2f}\")\n",
+    "print(f\"  Clone 0 SNAP:   ${new_snap[record_idx]:,.2f}\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 7,
-   "id": "7e75756b-a317-4800-bac5-e0fd6bc43b8c",
+   "id": "cell-11",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Row info for North Carolina's SNAP benefit amount:\n",
-      "{'row_index': 80, 'variable': 'snap', 'variable_desc': 'SNAP allotment', 'geographic_id': '37', 'target_value': 2934626410.0, 'stratum_id': 9363, 'domain_variable': 'snap'}\n"
+      "SNAP for record_idx=8629 across all 3 clones:\n",
+      "\n"
      ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>clone</th>\n",
+       "      <th>state</th>\n",
+       "      <th>state_fips</th>\n",
+       "      <th>SNAP</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0</td>\n",
+       "      <td>TX</td>\n",
+       "      <td>48</td>\n",
+       "      <td>$18,396.00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>PA</td>\n",
+       "      <td>42</td>\n",
+       "      <td>$18,396.00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2</td>\n",
+       "      <td>NY</td>\n",
+       "      <td>36</td>\n",
+       "      <td>$18,396.00</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   clone state  state_fips        SNAP\n",
+       "0      0    TX          48  $18,396.00\n",
+       "1      1    PA          42  $18,396.00\n",
+       "2      2    NY          36  $18,396.00"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
-    "target_group = tracer.get_group_rows(2)\n",
-    "row_loc = target_group.iloc[28]['row_index']  # Manually found the index value 28\n",
-    "row_info = tracer.get_row_info(row_loc)\n",
-    "var = row_info['variable']\n",
-    "var_desc = row_info['variable_desc']\n",
-    "target_geo_id = int(row_info['geographic_id'])\n",
+    "print(f\"SNAP for record_idx={record_idx} across all {N_CLONES} clones:\\n\")\n",
+    "rows = []\n",
+    "for c in range(N_CLONES):\n",
+    "    cs = geography.state_fips[c * n_records + record_idx]\n",
+    "    s = Microsimulation(dataset=dataset_path)\n",
+    "    s.set_input(\n",
+    "        \"state_fips\",\n",
+    "        2024,\n",
+    "        geography.state_fips[c * n_records : (c + 1) * n_records].astype(\n",
+    "            np.int32\n",
+    "        ),\n",
+    "    )\n",
+    "    for var in get_calculated_variables(s):\n",
+    "        s.delete_arrays(var)\n",
+    "    clone_snap = s.calculate(\"snap\", map_to=\"household\").values\n",
+    "    rows.append(\n",
+    "        {\n",
+    "            \"clone\": c,\n",
+    "            \"state\": STATE_CODES.get(int(cs), \"??\"),\n",
+    "            \"state_fips\": int(cs),\n",
+    "            \"SNAP\": f\"${clone_snap[record_idx]:,.2f}\",\n",
+    "        }\n",
+    "    )\n",
+    "pd.DataFrame(rows)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cell-12",
+   "metadata": {},
+   "source": [
+    "`get_calculated_variables` is selective: it identifies variables with formulas (state-dependent computations) while preserving survey-reported inputs and entity IDs. This is what allows the same demographic household to produce different benefit amounts under different state rules."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cell-13",
+   "metadata": {},
+   "source": [
+    "## Section 4: Geographic Column Masking\n",
     "\n",
-    "print(\"Row info for North Carolina's SNAP benefit amount:\")\n",
-    "print(row_info)"
+    "When assembling the calibration matrix, each target row only \"sees\" columns (clones) whose geography matches the target's geography. This is implemented via `state_to_cols` and `cd_to_cols` dictionaries built from the `GeographyAssignment`.\n",
+    "\n",
+    "This is step 3 of `build_matrix` — reproduced here for transparency."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 8,
-   "id": "c2be9721-ff11-4f78-ba0b-03407201dd53",
+   "id": "cell-14",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "       household_id  household_weight  state_fips    snap\n",
-      "0                26       1205.310059          23     0.0\n",
-      "1                34       2170.419922          23     0.0\n",
-      "2                38        587.510010          23     0.0\n",
-      "3                46       1010.840027          23     0.0\n",
-      "4                71        957.460022          23     0.0\n",
-      "...             ...               ...         ...     ...\n",
-      "11994        177822          0.000000          15     0.0\n",
-      "11995        177829          0.000000          15     0.0\n",
-      "11996        177831          0.000000          15     0.0\n",
-      "11997        177860          0.000000          15  6294.0\n",
-      "11998        177861          0.000000          15     0.0\n",
+      "Unique states mapped: 50\n",
+      "Unique CDs mapped: 435\n",
       "\n",
-      "[11999 rows x 4 columns]\n"
+      "Columns per state: min=62, median=494, max=4311\n"
      ]
     }
    ],
    "source": [
-    "hh_snap_df = pd.DataFrame(sim.calculate_dataframe([\n",
-    "    \"household_id\", \"household_weight\", \"state_fips\", \"snap\"])                                        \n",
-    ")\n",
-    "print(hh_snap_df)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "438828ac-df94-4d3e-a9a8-227bb6f64933",
-   "metadata": {},
-   "source": [
-    "If we were to include `congressional_district_geoid` above, they would all be zeros. It's not until we do the calibration, i.e., come back with a vector of weights `w` to multiply `X_sparse` with, that we will set `congressional_district_geoid`.\n",
+    "state_col_lists = defaultdict(list)\n",
+    "cd_col_lists = defaultdict(list)\n",
+    "for col in range(n_total):\n",
+    "    state_col_lists[int(geography.state_fips[col])].append(col)\n",
+    "    cd_col_lists[str(geography.cd_geoid[col])].append(col)\n",
     "\n",
-    "However, every household is already a donor to every contressional district. You can get the column positions for every household (remember targets are on the rows, donor households on the columns) by running tracer's get_household_column_positions with the *original* `household_id`."
+    "state_to_cols = {s: np.array(c) for s, c in state_col_lists.items()}\n",
+    "cd_to_cols = {cd: np.array(c) for cd, c in cd_col_lists.items()}\n",
+    "\n",
+    "print(f\"Unique states mapped: {len(state_to_cols)}\")\n",
+    "print(f\"Unique CDs mapped: {len(cd_to_cols)}\")\n",
+    "\n",
+    "state_counts = {s: len(c) for s, c in state_to_cols.items()}\n",
+    "sc_series = pd.Series(state_counts)\n",
+    "print(\n",
+    "    f\"\\nColumns per state: min={sc_series.min()}, \"\n",
+    "    f\"median={sc_series.median():.0f}, max={sc_series.max()}\"\n",
+    ")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 9,
-   "id": "cell-12",
+   "id": "cell-15",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "    household_id  household_weight  state_fips       snap\n",
-      "23           654       1550.660034          23  70.080002\n",
+      "Example household clone visibility:\n",
       "\n",
-      "Evaluating the tracer.get_household_column_positions dictionary:\n",
+      "Clone 0 (TX, CD 4817):\n",
+      "  Visible to TX state targets: col 8629 in state_to_cols[48]? True\n",
+      "  Visible to CD 4817 targets: col 8629 in cd_to_cols['4817']? True\n",
+      "  Visible to NC (37) targets: False\n",
       "\n",
-      "{'1501': 23, '1502': 12022, '201': 24021, '3001': 36020, '3002': 48019, '3701': 60018, '3702': 72017, '3703': 84016, '3704': 96015, '3705': 108014, '3706': 120013, '3707': 132012, '3708': 144011, '3709': 156010, '3710': 168009, '3711': 180008, '3712': 192007, '3713': 204006, '3714': 216005}\n"
+      "Clone 1 (PA, CD 4201):\n",
+      "  Visible to PA state targets: col 20628 in state_to_cols[42]? True\n",
+      "  Visible to CD 4201 targets: col 20628 in cd_to_cols['4201']? True\n",
+      "  Visible to NC (37) targets: False\n",
+      "\n",
+      "Clone 2 (NY, CD 3611):\n",
+      "  Visible to NY state targets: col 32627 in state_to_cols[36]? True\n",
+      "  Visible to CD 3611 targets: col 32627 in cd_to_cols['3611']? True\n",
+      "  Visible to NC (37) targets: False\n",
+      "\n"
      ]
     }
    ],
    "source": [
-    "# Reverse lookup: get all column positions for a specific household\n",
-    "hh_id = hh_snap_df.loc[hh_snap_df.snap > 0].household_id.values[0]\n",
-    "print(hh_snap_df.loc[hh_snap_df.household_id == hh_id])\n",
-    "\n",
-    "print(\"\\nEvaluating the tracer.get_household_column_positions dictionary:\\n\")\n",
-    "positions = tracer.get_household_column_positions(hh_id)\n",
-    "print(positions)"
+    "print(f\"Example household clone visibility:\\n\")\n",
+    "for c in range(N_CLONES):\n",
+    "    col = c * n_records + record_idx\n",
+    "    state = int(geography.state_fips[col])\n",
+    "    cd = str(geography.cd_geoid[col])\n",
+    "    abbr = STATE_CODES.get(state, \"??\")\n",
+    "    print(f\"Clone {c} ({abbr}, CD {cd}):\")\n",
+    "    print(\n",
+    "        f\"  Visible to {abbr} state targets: \"\n",
+    "        f\"col {col} in state_to_cols[{state}]? \"\n",
+    "        f\"{col in state_to_cols.get(state, [])}\"\n",
+    "    )\n",
+    "    print(\n",
+    "        f\"  Visible to CD {cd} targets: \"\n",
+    "        f\"col {col} in cd_to_cols['{cd}']? \"\n",
+    "        f\"{col in cd_to_cols.get(cd, [])}\"\n",
+    "    )\n",
+    "    # Check an unrelated state\n",
+    "    print(\n",
+    "        f\"  Visible to NC (37) targets: \" f\"{col in state_to_cols.get(37, [])}\"\n",
+    "    )\n",
+    "    print()"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "cell-13",
+   "id": "cell-16",
+   "metadata": {},
+   "source": [
+    "This is the mechanism behind the sparsity pattern in `calibration_matrix.ipynb`: a household clone assigned to TX can contribute to TX state targets and TX CD targets, but produces a zero entry for NC or AK targets. The matrix is sparse because each clone only intersects a small fraction of all geographic targets."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cell-17",
    "metadata": {},
    "source": [
-    "## Section 5: Understanding the cells of the X_Sparse matrix and Target vector"
+    "## Section 5: Takeup Re-randomization\n",
+    "\n",
+    "The base CPS has fixed takeup decisions (e.g., \"this household takes up SNAP\"). But when we clone a household into different census blocks, each block should have independently drawn takeup — otherwise every clone of a SNAP-participating household would still participate, regardless of geography.\n",
+    "\n",
+    "`rerandomize_takeup` solves this: for each census block, it uses `seeded_rng(variable_name, salt=block_geoid)` to draw new takeup booleans. The seed is deterministic per (variable, block) pair, so results are reproducible."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 10,
-   "id": "e05aaeab-3786-4ff0-a50b-34577065d2e0",
+   "id": "cell-18",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Remember, this is a North Carolina target:\n",
-      "\n",
-      "target_id                  8942\n",
-      "stratum_id                 9363\n",
-      "variable                   snap\n",
-      "value              2934626410.0\n",
-      "period                     2024\n",
-      "geo_level                 state\n",
-      "geographic_id                37\n",
-      "domain_variable            snap\n",
-      "original_value     2934626410.0\n",
-      "uprating_factor             1.0\n",
-      "Name: 80, dtype: object\n",
+      "8 takeup variables:\n",
       "\n",
-      "NC State target. Household donated to NC's 2nd district, 2024 SNAP dollars:\n",
-      "70.08\n",
-      "\n",
-      "Same target, same household, donated to AK's at Large district, 2024 SNAP dollars:\n",
-      "0.0\n"
+      "  takes_up_snap_if_eligible                entity=spm_unit   rate=82.00%\n",
+      "  takes_up_aca_if_eligible                 entity=tax_unit   rate=67.20%\n",
+      "  takes_up_dc_ptc                          entity=tax_unit   rate=32.00%\n",
+      "  takes_up_head_start_if_eligible          entity=person     rate=30.00%\n",
+      "  takes_up_early_head_start_if_eligible    entity=person     rate=9.00%\n",
+      "  takes_up_ssi_if_eligible                 entity=person     rate=50.00%\n",
+      "  would_file_taxes_voluntarily             entity=tax_unit   rate=5.00%\n",
+      "  takes_up_medicaid_if_eligible            entity=person     rate=dict (51 entries)\n"
      ]
     }
    ],
    "source": [
-    "print(\"Remember, this is a North Carolina target:\\n\")\n",
-    "print(targets_df.iloc[row_loc])\n",
-    "\n",
-    "print(\"\\nNC State target. Household donated to NC's 2nd district, 2024 SNAP dollars:\")\n",
-    "print(X_sparse[row_loc, positions['3702']])  # Household donated to NC's 2nd district\n",
-    "\n",
-    "print(\"\\nSame target, same household, donated to AK's at Large district, 2024 SNAP dollars:\")\n",
-    "print(X_sparse[row_loc, positions['201']])  # Household donated to AK's at Large District"
+    "print(f\"{len(SIMPLE_TAKEUP_VARS)} takeup variables:\\n\")\n",
+    "for spec in SIMPLE_TAKEUP_VARS:\n",
+    "    rate_key = spec[\"rate_key\"]\n",
+    "    if rate_key == \"voluntary_filing\":\n",
+    "        rate = 0.05\n",
+    "    else:\n",
+    "        rate = load_take_up_rate(rate_key, 2024)\n",
+    "    rate_str = (\n",
+    "        f\"{rate:.2%}\"\n",
+    "        if isinstance(rate, float)\n",
+    "        else f\"dict ({len(rate)} entries)\"\n",
+    "    )\n",
+    "    print(\n",
+    "        f\"  {spec['variable']:40s} \"\n",
+    "        f\"entity={spec['entity']:10s} rate={rate_str}\"\n",
+    "    )"
    ]
   },
   {
-   "cell_type": "markdown",
-   "id": "cell-16",
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "cell-19",
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Same block + same var (reproducible):\n",
+      "  [0.50514599 0.75213437 0.9703409  0.18048868 0.31969517]\n",
+      "  [0.50514599 0.75213437 0.9703409  0.18048868 0.31969517]\n",
+      "  Match: True\n",
+      "\n",
+      "Different block, same var:\n",
+      "  [0.15503168 0.96707026 0.79019745 0.67544525 0.85245009]\n",
+      "  Match: False\n",
+      "\n",
+      "Same block, different var:\n",
+      "  [0.93155876 0.8912794  0.50838888 0.32192278 0.01005173]\n",
+      "  Match: False\n"
+     ]
+    }
+   ],
    "source": [
-    "Key property: For state-level targets, only CDs in that state should have non-zero values.\n",
+    "block_a = \"482011234567890\"\n",
+    "block_b = \"170311234567890\"\n",
+    "var = \"takes_up_snap_if_eligible\"\n",
     "\n",
-    "Example: A NC state SNAP target should have zeros for HI, MT, and AK CD columns.\n",
+    "rng_a1 = seeded_rng(var, salt=block_a)\n",
+    "rng_a2 = seeded_rng(var, salt=block_a)\n",
+    "rng_b = seeded_rng(var, salt=block_b)\n",
+    "rng_other = seeded_rng(\"takes_up_aca_if_eligible\", salt=block_a)\n",
     "\n",
-    "So let's see that same household's value for the Alaska state target:"
+    "draws_a1 = rng_a1.random(5)\n",
+    "draws_a2 = rng_a2.random(5)\n",
+    "draws_b = rng_b.random(5)\n",
+    "draws_other = rng_other.random(5)\n",
+    "\n",
+    "print(\"Same block + same var (reproducible):\")\n",
+    "print(f\"  {draws_a1}\")\n",
+    "print(f\"  {draws_a2}\")\n",
+    "print(f\"  Match: {np.allclose(draws_a1, draws_a2)}\")\n",
+    "print(f\"\\nDifferent block, same var:\")\n",
+    "print(f\"  {draws_b}\")\n",
+    "print(f\"  Match: {np.allclose(draws_a1, draws_b)}\")\n",
+    "print(f\"\\nSame block, different var:\")\n",
+    "print(f\"  {draws_other}\")\n",
+    "print(f\"  Match: {np.allclose(draws_a1, draws_other)}\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
-   "id": "8cdc264c-8335-40eb-afd9-4c4d023ec303",
+   "execution_count": 12,
+   "id": "cell-20",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Row info for Alaska's SNAP benefit amount:\n",
-      "{'row_index': 80, 'variable': 'snap', 'variable_desc': 'SNAP allotment', 'geographic_id': '37', 'target_value': 2934626410.0, 'stratum_id': 9363, 'domain_variable': 'snap'}\n"
+      "Takeup rates before/after re-randomization (clone 0):\n",
+      "\n",
+      "  takes_up_snap_if_eligible                before=82.333%  after=82.381%\n",
+      "  takes_up_aca_if_eligible                 before=66.718%  after=67.486%\n",
+      "  takes_up_dc_ptc                          before=31.483%  after=32.044%\n",
+      "  takes_up_head_start_if_eligible          before=29.963%  after=29.689%\n",
+      "  takes_up_early_head_start_if_eligible    before=8.869%  after=8.721%\n",
+      "  takes_up_ssi_if_eligible                 before=100.000%  after=49.776%\n",
+      "  would_file_taxes_voluntarily             before=0.000%  after=4.905%\n",
+      "  takes_up_medicaid_if_eligible            before=84.496%  after=80.051%\n"
      ]
     }
    ],
    "source": [
-    "target_group = tracer.get_group_rows(2)\n",
-    "new_row_loc = target_group.iloc[10]['row_index']   # Manually found the index value 10\n",
-    "row_info = tracer.get_row_info(row_loc)\n",
-    "var = row_info['variable']\n",
-    "var_desc = row_info['variable_desc']\n",
-    "target_geo_id = int(row_info['geographic_id'])\n",
+    "test_sim = Microsimulation(dataset=dataset_path)\n",
+    "clone_0_states = geography.state_fips[:n_records]\n",
+    "clone_0_blocks = geography.block_geoid[:n_records]\n",
+    "test_sim.set_input(\"state_fips\", 2024, clone_0_states.astype(np.int32))\n",
+    "\n",
+    "before = {}\n",
+    "for spec in SIMPLE_TAKEUP_VARS:\n",
+    "    v = spec[\"variable\"]\n",
+    "    vals = test_sim.calculate(v, map_to=spec[\"entity\"]).values\n",
+    "    before[v] = vals.mean()\n",
+    "\n",
+    "rerandomize_takeup(test_sim, clone_0_blocks, clone_0_states, 2024)\n",
     "\n",
-    "print(\"Row info for Alaska's SNAP benefit amount:\")\n",
-    "print(row_info)"
+    "print(\"Takeup rates before/after re-randomization (clone 0):\\n\")\n",
+    "for spec in SIMPLE_TAKEUP_VARS:\n",
+    "    v = spec[\"variable\"]\n",
+    "    vals = test_sim.calculate(v, map_to=spec[\"entity\"]).values\n",
+    "    after = vals.mean()\n",
+    "    print(f\"  {v:40s} before={before[v]:.3%}  after={after:.3%}\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
-   "id": "ac59b6f1-859f-4246-8a05-8cb26384c882",
+   "execution_count": 13,
+   "id": "cell-21",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "Medicaid takeup rates (state-specific), first 10 states:\n",
       "\n",
-      "Household donated to AK's 1st district, 2024 SNAP dollars:\n",
-      "0.0\n"
+      "  AK: 88.00%\n",
+      "  AL: 92.00%\n",
+      "  AR: 79.00%\n",
+      "  AZ: 95.00%\n",
+      "  CA: 78.00%\n",
+      "  CO: 99.00%\n",
+      "  CT: 89.00%\n",
+      "  DC: 99.00%\n",
+      "  DE: 86.00%\n",
+      "  FL: 98.00%\n"
      ]
     }
    ],
    "source": [
-    "print(\"\\nHousehold donated to AK's 1st district, 2024 SNAP dollars:\")\n",
-    "print(X_sparse[new_row_loc, positions['201']])  # Household donated to AK's at Large District"
+    "medicaid_rates = load_take_up_rate(\"medicaid\", 2024)\n",
+    "print(\"Medicaid takeup rates (state-specific), first 10 states:\\n\")\n",
+    "for state, rate in sorted(medicaid_rates.items())[:10]:\n",
+    "    print(f\"  {state}: {rate:.2%}\")"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "cell-18",
+   "id": "cell-22",
+   "metadata": {},
+   "source": [
+    "In the full pipeline, `rerandomize_takeup` is passed to `build_matrix` as a `sim_modifier` callback. For each clone, after `state_fips` is set but before formula caches are cleared, the callback draws new takeup booleans per census block. This means the same household in block A might take up SNAP while in block B it doesn't — matching the statistical reality that takeup varies by geography."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cell-23",
    "metadata": {},
    "source": [
-    "## Section 6: Simulating State-Swapped Calculations\n",
+    "## Section 6: Matrix Build Verification\n",
     "\n",
-    "When a household is \"transplanted\" to a different state, state-dependent benefits like SNAP are recalculated under the destination state's rules."
+    "Let's run the full `build_matrix` pipeline and verify the example household's pattern matches our Section 4 predictions. We use the same `target_filter` as in `calibration_matrix.ipynb` but *without* `sim_modifier` to match that notebook's output."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
-   "id": "cell-19",
+   "execution_count": 14,
+   "id": "cell-24",
    "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2026-02-13 17:11:22,384 - INFO - Processing clone 1/3 (cols 0-11998, 50 unique states)...\n",
+      "2026-02-13 17:11:23,509 - INFO - Processing clone 2/3 (cols 11999-23997, 50 unique states)...\n",
+      "2026-02-13 17:11:24,645 - INFO - Processing clone 3/3 (cols 23998-35996, 50 unique states)...\n",
+      "2026-02-13 17:11:25,769 - INFO - Assembling matrix from 3 clones...\n",
+      "2026-02-13 17:11:25,771 - INFO - Matrix: 538 targets x 35997 cols, 14946 nnz\n"
+     ]
+    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "SNAP values for first 5 households under different state rules:\n",
-      "  NC rules: [0. 0. 0. 0. 0.]\n",
-      "  AK rules: [0. 0. 0. 0. 0.]\n",
-      "  Difference: [0. 0. 0. 0. 0.]\n"
+      "Matrix shape: (538, 35997)\n",
+      "Non-zero entries: 14,946\n",
+      "Density: 0.000772\n"
      ]
     }
    ],
    "source": [
-    "def create_state_simulation(state_fips):\n",
-    "    \"\"\"Create a simulation with all households assigned to a specific state.\"\"\"\n",
-    "    s = Microsimulation(dataset=dataset_path)\n",
-    "    s.set_input(\n",
-    "        \"state_fips\", 2024, np.full(hh_snap_df.shape[0], state_fips, dtype=np.int32)\n",
-    "    )\n",
-    "    for var in get_calculated_variables(s):\n",
-    "        s.delete_arrays(var)\n",
-    "    return s\n",
-    "\n",
-    "# Compare SNAP for first 5 households under NC vs AK rules\n",
-    "nc_sim = create_state_simulation(37)  # NC\n",
-    "ak_sim = create_state_simulation(2)   # AK\n",
+    "builder = UnifiedMatrixBuilder(\n",
+    "    db_uri=db_uri,\n",
+    "    time_period=2024,\n",
+    "    dataset_path=dataset_path,\n",
+    ")\n",
     "\n",
-    "nc_snap = nc_sim.calculate(\"snap\", map_to=\"household\").values[:5]\n",
-    "ak_snap = ak_sim.calculate(\"snap\", map_to=\"household\").values[:5]\n",
+    "targets_df, X_sparse, target_names = builder.build_matrix(\n",
+    "    geography,\n",
+    "    sim,\n",
+    "    target_filter={\"domain_variables\": [\"snap\"]},\n",
+    ")\n",
     "\n",
-    "print(\"SNAP values for first 5 households under different state rules:\")\n",
-    "print(f\"  NC rules: {nc_snap}\")\n",
-    "print(f\"  AK rules: {ak_snap}\")\n",
-    "print(f\"  Difference: {ak_snap - nc_snap}\")"
+    "print(f\"Matrix shape: {X_sparse.shape}\")\n",
+    "print(f\"Non-zero entries: {X_sparse.nnz:,}\")\n",
+    "print(f\"Density: {X_sparse.nnz / (X_sparse.shape[0] * X_sparse.shape[1]):.6f}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "cell-25",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Example household non-zero pattern across clones:\n",
+      "\n",
+      "Clone 0 (TX, CD 4817): 3 non-zero rows\n",
+      "  row 39: household_count (geo=48): 1.00\n",
+      "  row 90: snap (geo=48): 18396.00\n",
+      "  row 410: household_count (geo=4817): 1.00\n",
+      "Clone 1 (PA, CD 4201): 3 non-zero rows\n",
+      "  row 34: household_count (geo=42): 1.00\n",
+      "  row 85: snap (geo=42): 18396.00\n",
+      "  row 358: household_count (geo=4201): 1.00\n",
+      "Clone 2 (NY, CD 3611): 3 non-zero rows\n",
+      "  row 27: household_count (geo=36): 1.00\n",
+      "  row 78: snap (geo=36): 18396.00\n",
+      "  row 292: household_count (geo=3611): 1.00\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(f\"Example household non-zero pattern across clones:\\n\")\n",
+    "for c in range(N_CLONES):\n",
+    "    col = c * n_records + record_idx\n",
+    "    col_vec = X_sparse[:, col]\n",
+    "    nz_rows = col_vec.nonzero()[0]\n",
+    "    state = int(geography.state_fips[col])\n",
+    "    cd = geography.cd_geoid[col]\n",
+    "    abbr = STATE_CODES.get(state, \"??\")\n",
+    "    print(f\"Clone {c} ({abbr}, CD {cd}): {len(nz_rows)} non-zero rows\")\n",
+    "    for r in nz_rows:\n",
+    "        row = targets_df.iloc[r]\n",
+    "        print(\n",
+    "            f\"  row {r}: {row['variable']} \"\n",
+    "            f\"(geo={row['geographic_id']}): \"\n",
+    "            f\"{X_sparse[r, col]:.2f}\"\n",
+    "        )"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "a7a3b4f3-dabc-4160-a781-a529018e889f",
+   "id": "cell-26",
    "metadata": {},
    "source": [
-    "## Section 7: Creating the h5 files\n",
-    "\n",
-    "  `w` (required)\n",
-    "  - The calibrated weight vector from L0 calibration\n",
-    "  - Shape: (n_cds * n_households,) — a flattened matrix where each CD has weights for all households\n",
-    "  - Gets reshaped to (n_cds, n_households) internally\n",
+    "## Section 7: From Weights to Datasets\n",
     "\n",
-    "  `cds_to_calibrate` (required)\n",
-    "  - The ordered list of CD GEOIDs used when building w\n",
-    "  - Serves two purposes:\n",
-    "    a. Tells us how to reshape w (via its length)\n",
-    "    b. Provides the index mapping so we can extract the right rows for any cd_subset\n",
+    "`create_sparse_cd_stacked_dataset` takes calibrated weights and builds an h5 file with only the non-zero-weight households, reindexed per CD. Internally it does its own state-swap simulation — loading the base dataset, assigning `state_fips` for the target CD's state, and recalculating benefits from scratch. This means SNAP values in the output reflect the destination state's rules (e.g., a $70 SNAP household from ME may get $0 under AK rules).\n",
     "\n",
-    "  `cd_subset` (optional, default None)\n",
-    "  - Which CDs to actually include in the output dataset\n",
-    "  - Must be a subset of cds_to_calibrate\n",
-    "  - If None, all CDs are included\n",
-    "  - Use cases: build a single-state file, a single-CD file for testing, etc.\n",
-    "\n",
-    "  `output_path` (optional but effectively required — raises if None)\n",
-    "  - Where to save the resulting .h5 file\n",
-    "  - Creates parent directories if needed\n",
+    "**Format gap:** The calibration produces weights in clone layout `(n_records * n_clones,)` where each clone maps to one specific CD via the `GeographyAssignment`. The stacked dataset builder expects CD layout `(n_cds * n_households,)` where every CD has a weight slot for every household. Converting between these — accumulating clone weights into their assigned CDs — is a separate step not yet implemented. The demo below constructs artificial CD-layout weights directly to show how the builder works."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "cell-27",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Dimension mismatch:\n",
+      "  Calibration output: (11999 * 3,) = 35,997 (clone layout)\n",
+      "  Stacked builder expects: (436 * 11999,) = 5,231,564 (CD layout)\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"Dimension mismatch:\")\n",
+    "print(\n",
+    "    f\"  Calibration output: ({n_records} * {N_CLONES},) \"\n",
+    "    f\"= {n_records * N_CLONES:,} (clone layout)\"\n",
+    ")\n",
     "\n",
-    "  `dataset_path` (optional, default None)\n",
-    "  - Path to the base .h5 dataset that was used during calibration\n",
-    "  - This is the \"template\" — household structure, demographics, etc.\n",
-    "  - The function loads this, reweights households per CD, updates geography, and stacks"
+    "all_cds = get_all_cds_from_database(db_uri)\n",
+    "n_cds = len(all_cds)\n",
+    "print(\n",
+    "    f\"  Stacked builder expects: ({n_cds} * {n_records},) \"\n",
+    "    f\"= {n_cds * n_records:,} (CD layout)\"\n",
+    ")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
-   "id": "e1f8b237-ba42-4fca-8d43-f253f587d49b",
+   "execution_count": 17,
+   "id": "cell-28",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Weight vector: 23,998 entries (2 CDs x 11,999 HH)\n",
+      "Non-zero weights: 277\n",
+      "Example HH weight in CD 3701: 2.5\n",
+      "Example HH weight in CD 201: 3.5\n"
+     ]
+    }
+   ],
    "source": [
     "import os\n",
     "\n",
-    "from policyengine_us_data.datasets.cps.local_area_calibration.stacked_dataset_builder import create_sparse_cd_stacked_dataset\n",
+    "demo_cds = [\"3701\", \"201\"]\n",
+    "n_demo_cds = len(demo_cds)\n",
     "\n",
-    "# Initialize the weights w for demonstration\n",
-    "# We can't allow too many w cells to be positive for a given state, or the reindexing will fail\n",
-    "w = np.random.binomial(n=1, p=0.01, size=X_sparse.shape[1]).astype(float)\n",
+    "w = (\n",
+    "    np.random.default_rng(42)\n",
+    "    .binomial(n=1, p=0.01, size=n_demo_cds * n_records)\n",
+    "    .astype(float)\n",
+    ")\n",
     "\n",
-    "# We'll make sure our earlier household is included:\n",
-    "household_ids = sim.calculate(\"household_id\", map_to=\"household\").values\n",
-    "hh_idx = np.where(household_ids == hh_id)[0][0]\n",
+    "# Seed our example household into both CDs\n",
+    "cd_idx_3701 = demo_cds.index(\"3701\")\n",
+    "w[cd_idx_3701 * n_records + record_idx] = 2.5\n",
     "\n",
-    "cd_idx = test_cds.index('3701')\n",
-    "flat_idx = cd_idx * len(household_ids) + hh_idx\n",
-    "w[flat_idx] = 2.5\n",
+    "cd_idx_201 = demo_cds.index(\"201\")\n",
+    "w[cd_idx_201 * n_records + record_idx] = 3.5\n",
     "\n",
-    "cd_idx = test_cds.index('201')\n",
-    "flat_idx = cd_idx * len(household_ids) + hh_idx\n",
-    "w[flat_idx] = 3.5\n",
+    "output_dir = \"calibration_output\"\n",
+    "os.makedirs(output_dir, exist_ok=True)\n",
+    "output_path = os.path.join(output_dir, \"results.h5\")\n",
     "\n",
-    "# Create a folder for the outputs of the function that is to come.\n",
-    "new_folder_name = \"calibration_output\"\n",
-    "os.makedirs(new_folder_name, exist_ok=True)\n",
-    "output_path = os.path.join(new_folder_name, \"results.h5\")"
+    "print(\n",
+    "    f\"Weight vector: {len(w):,} entries \"\n",
+    "    f\"({n_demo_cds} CDs x {n_records:,} HH)\"\n",
+    ")\n",
+    "print(f\"Non-zero weights: {(w > 0).sum()}\")\n",
+    "print(\n",
+    "    f\"Example HH weight in CD 3701: {w[cd_idx_3701 * n_records + record_idx]}\"\n",
+    ")\n",
+    "print(f\"Example HH weight in CD 201: {w[cd_idx_201 * n_records + record_idx]}\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
-   "id": "650b807d-3d20-48e0-b512-43922ca2aace",
+   "execution_count": 18,
+   "id": "cell-29",
    "metadata": {},
    "outputs": [
     {
@@ -643,27 +991,43 @@
       "\n",
       "Original dataset has 11,999 households\n",
       "Extracted weights for 2 CDs from full weight matrix\n",
-      "Total active household-CD pairs: 230\n",
-      "Total weight in W matrix: 234\n",
-      "Processing CD 201 (2/2)...\n",
+      "Total active household-CD pairs: 277\n",
+      "Total weight in W matrix: 281\n",
+      "Processing CD 201 (2/2)...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2026-02-13 17:11:40,873 - INFO - HTTP Request: GET https://huggingface.co/api/models/policyengine/policyengine-us-data \"HTTP/1.1 200 OK\"\n",
+      "2026-02-13 17:11:40,899 - INFO - HTTP Request: HEAD https://huggingface.co/policyengine/policyengine-us-data/resolve/main/enhanced_cps_2024.h5 \"HTTP/1.1 302 Found\"\n",
+      "Warning: You are sending unauthenticated requests to the HF Hub. Please set a HF_TOKEN to enable higher rate limits and faster downloads.\n",
+      "2026-02-13 17:11:40,899 - WARNING - Warning: You are sending unauthenticated requests to the HF Hub. Please set a HF_TOKEN to enable higher rate limits and faster downloads.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
       "\n",
       "Combining 2 CD DataFrames...\n",
-      "Total households across all CDs: 230\n",
-      "Combined DataFrame shape: (578, 222)\n",
+      "Total households across all CDs: 277\n",
+      "Combined DataFrame shape: (726, 222)\n",
       "\n",
       "Reindexing all entity IDs using 25k ranges per CD...\n",
-      "  Created 230 unique households across 2 CDs\n",
+      "  Created 277 unique households across 2 CDs\n",
       "  Reindexing persons using 25k ranges...\n",
       "  Reindexing tax units...\n",
       "  Reindexing SPM units...\n",
       "  Reindexing marital units...\n",
       "  Reindexing families...\n",
-      "  Final persons: 578\n",
-      "  Final households: 230\n",
-      "  Final tax units: 314\n",
-      "  Final SPM units: 236\n",
-      "  Final marital units: 461\n",
-      "  Final families: 249\n",
+      "  Final persons: 726\n",
+      "  Final households: 277\n",
+      "  Final tax units: 373\n",
+      "  Final SPM units: 291\n",
+      "  Final marital units: 586\n",
+      "  Final families: 309\n",
       "\n",
       "Weights in combined_df AFTER reindexing:\n",
       "  HH weight sum: 0.00M\n",
@@ -671,8 +1035,8 @@
       "  Ratio: 1.00\n",
       "\n",
       "Overflow check:\n",
-      "  Max person ID after reindexing: 5,125,285\n",
-      "  Max person ID × 100: 512,528,500\n",
+      "  Max person ID after reindexing: 5,025,335\n",
+      "  Max person ID × 100: 502,533,500\n",
       "  int32 max: 2,147,483,647\n",
       "  ✓ No overflow risk!\n",
       "\n",
@@ -687,9 +1051,9 @@
       "Household mapping saved to calibration_output/mappings/results_household_mapping.csv\n",
       "\n",
       "Verifying saved file...\n",
-      "  Final households: 230\n",
-      "  Final persons: 578\n",
-      "  Total population (from household weights): 234\n"
+      "  Final households: 277\n",
+      "  Final persons: 726\n",
+      "  Total population (from household weights): 281\n"
      ]
     },
     {
@@ -698,17 +1062,16 @@
        "'calibration_output/results.h5'"
       ]
      },
-     "execution_count": 15,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "cd_subset = ['3701', '201']\n",
     "create_sparse_cd_stacked_dataset(\n",
     "    w,\n",
-    "    test_cds, # cds_to_calibrate - Defines the structure of the weight vector w\n",
-    "    cd_subset=cd_subset, #  cd_subset - Specifies which CDs to actually include in the output dataset (optional, defaults to all).\n",
+    "    demo_cds,\n",
+    "    cd_subset=demo_cds,\n",
     "    dataset_path=dataset_path,\n",
     "    output_path=output_path,\n",
     ")"
@@ -716,280 +1079,101 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
-   "id": "f8d449b4-6069-44e0-8d21-e73944a1a1d2",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[34mmappings\u001b[m\u001b[m/   results.h5\n"
-     ]
-    }
-   ],
-   "source": [
-    "%ls calibration_output"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "04d7b733-bec5-49cb-9272-d167ae9c4693",
-   "metadata": {},
-   "source": [
-    "Note that there is a *mappings* directory that has also been created by create_sparse_cd_stacked_dataset. This contains the CSV file that links the original households to the donor households. The reason it's a seperate folder is to keep the h5 files and the mapping CSVs organized when this function is run for all districts or states."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "id": "5fd7f7cc-6517-4f39-9a14-9cb147af38e7",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "results_household_mapping.csv\n"
-     ]
-    }
-   ],
-   "source": [
-    "%ls calibration_output/mappings"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "id": "578e8a69-b7ec-46bf-82ec-8020a46fd9cf",
+   "execution_count": 19,
+   "id": "cell-30",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "     household_id  congressional_district_geoid  \\\n",
-      "0           50000                           201   \n",
-      "1           50001                           201   \n",
-      "2           50002                           201   \n",
-      "3           50003                           201   \n",
-      "4           50004                           201   \n",
-      "..            ...                           ...   \n",
-      "225        125113                          3701   \n",
-      "226        125114                          3701   \n",
-      "227        125115                          3701   \n",
-      "228        125116                          3701   \n",
-      "229        125117                          3701   \n",
+      "Stacked dataset: 277 households\n",
       "\n",
-      "                              county  household_weight  state_fips  \\\n",
-      "0             NORTH_SLOPE_BOROUGH_AK               3.5           2   \n",
-      "1      ALEUTIANS_WEST_CENSUS_AREA_AK               1.0           2   \n",
-      "2    FAIRBANKS_NORTH_STAR_BOROUGH_AK               1.0           2   \n",
-      "3         KENAI_PENINSULA_BOROUGH_AK               1.0           2   \n",
-      "4       HOONAH_ANGOON_CENSUS_AREA_AK               1.0           2   \n",
-      "..                               ...               ...         ...   \n",
-      "225                TYRRELL_COUNTY_NC               1.0          37   \n",
-      "226                 WILSON_COUNTY_NC               1.0          37   \n",
-      "227                 WARREN_COUNTY_NC               1.0          37   \n",
-      "228                 WILSON_COUNTY_NC               1.0          37   \n",
-      "229                 GREENE_COUNTY_NC               1.0          37   \n",
+      "Example household (original_id=128694) in mapping:\n",
       "\n",
-      "            snap  \n",
-      "0       0.000000  \n",
-      "1       0.000000  \n",
-      "2       0.000000  \n",
-      "3       0.000000  \n",
-      "4       0.000000  \n",
-      "..           ...  \n",
-      "225     0.000000  \n",
-      "226  3438.300293  \n",
-      "227     0.000000  \n",
-      "228     0.000000  \n",
-      "229   885.599792  \n",
+      " new_household_id  original_household_id  congressional_district  state_fips\n",
+      "              108                 128694                     201           2\n",
+      "            25097                 128694                    3701          37\n",
       "\n",
-      "[230 rows x 6 columns]\n"
+      "In stacked dataset:\n",
+      "\n",
+      " household_id  congressional_district_geoid  household_weight  state_fips    snap\n",
+      "          108                           201               3.5           2 23640.0\n",
+      "        25097                          3701               2.5          37 18396.0\n"
      ]
     }
    ],
    "source": [
-    "sim_after = Microsimulation(dataset=\"./calibration_output/results.h5\")\n",
+    "sim_after = Microsimulation(dataset=f\"./{output_path}\")\n",
+    "hh_after_df = pd.DataFrame(\n",
+    "    sim_after.calculate_dataframe(\n",
+    "        [\n",
+    "            \"household_id\",\n",
+    "            \"congressional_district_geoid\",\n",
+    "            \"household_weight\",\n",
+    "            \"state_fips\",\n",
+    "            \"snap\",\n",
+    "        ]\n",
+    "    )\n",
+    ")\n",
+    "print(f\"Stacked dataset: {len(hh_after_df)} households\\n\")\n",
     "\n",
-    "hh_after_df =  pd.DataFrame(sim_after.calculate_dataframe([\n",
-    "    \"household_id\", \"congressional_district_geoid\", \"county\", \"household_weight\", \"state_fips\", \"snap\"])                                        \n",
+    "mapping_df = pd.read_csv(\n",
+    "    f\"{output_dir}/mappings/results_household_mapping.csv\"\n",
     ")\n",
-    "print(hh_after_df)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "83769d86-91e1-41bb-b718-01ee09cc7e2a",
-   "metadata": {},
-   "source": [
-    "We can see one of the correct instances above but let's confirm that this new household id does in fact link back to the original in both cases."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "id": "27baf521-1bd6-4ef0-9f70-4381fd842b52",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>new_household_id</th>\n",
-       "      <th>original_household_id</th>\n",
-       "      <th>congressional_district</th>\n",
-       "      <th>state_fips</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>50000</td>\n",
-       "      <td>654</td>\n",
-       "      <td>201</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>125000</td>\n",
-       "      <td>654</td>\n",
-       "      <td>3701</td>\n",
-       "      <td>37</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   new_household_id  original_household_id  congressional_district  state_fips\n",
-       "0             50000                    654                     201           2\n",
-       "1            125000                    654                    3701          37"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "mapping_df = pd.read_csv(\"calibration_output/mappings/results_household_mapping.csv\")\n",
-    "mapping_df.loc[mapping_df.original_household_id == hh_id]"
+    "example_mapping = mapping_df.loc[\n",
+    "    mapping_df.original_household_id == example_hh_id\n",
+    "]\n",
+    "print(f\"Example household (original_id={example_hh_id}) \" f\"in mapping:\\n\")\n",
+    "print(example_mapping.to_string(index=False))\n",
+    "\n",
+    "new_ids = example_mapping.new_household_id\n",
+    "print(f\"\\nIn stacked dataset:\\n\")\n",
+    "print(\n",
+    "    hh_after_df.loc[hh_after_df.household_id.isin(new_ids)].to_string(\n",
+    "        index=False\n",
+    "    )\n",
+    ")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 20,
-   "id": "36be0858-33f4-4c65-a74f-e18a76ce8eea",
+   "id": "cell-31",
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>household_id</th>\n",
-       "      <th>congressional_district_geoid</th>\n",
-       "      <th>county</th>\n",
-       "      <th>household_weight</th>\n",
-       "      <th>state_fips</th>\n",
-       "      <th>snap</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>50000</td>\n",
-       "      <td>201</td>\n",
-       "      <td>NORTH_SLOPE_BOROUGH_AK</td>\n",
-       "      <td>3.5</td>\n",
-       "      <td>2</td>\n",
-       "      <td>0.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>112</th>\n",
-       "      <td>125000</td>\n",
-       "      <td>3701</td>\n",
-       "      <td>HALIFAX_COUNTY_NC</td>\n",
-       "      <td>2.5</td>\n",
-       "      <td>37</td>\n",
-       "      <td>70.080002</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "     household_id  congressional_district_geoid                  county  \\\n",
-       "0           50000                           201  NORTH_SLOPE_BOROUGH_AK   \n",
-       "112        125000                          3701       HALIFAX_COUNTY_NC   \n",
-       "\n",
-       "     household_weight  state_fips       snap  \n",
-       "0                 3.5           2   0.000000  \n",
-       "112               2.5          37  70.080002  "
-      ]
-     },
-     "execution_count": 20,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Cleaned up calibration_output/\n"
+     ]
     }
    ],
    "source": [
-    "new_hh_ids = mapping_df.loc[mapping_df.original_household_id == hh_id].new_household_id\n",
-    "hh_after_df.loc[hh_after_df.household_id.isin(new_hh_ids)]"
+    "import shutil\n",
+    "\n",
+    "shutil.rmtree(output_dir)\n",
+    "print(f\"Cleaned up {output_dir}/\")"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "96fa8407-008f-4eaa-8f22-a803b72e71e4",
+   "id": "cell-32",
    "metadata": {},
    "source": [
-    "And we can see that the snap numbers still match their values from the different US state systems. However note that due to the use of policyengine-core's random function in a component of snap_gross_income, for some households, the value in the final simulation will not match the one used in creating the X matrix (`X_sparse` here). This is outlined in [Issue 412](https://github.com/PolicyEngine/policyengine-core/issues/412)."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "id": "90ee3a8b-d529-41f2-83ee-d543c53b5492",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%rm -r calibration_output"
+    "## Summary\n",
+    "\n",
+    "The clone-based calibration pipeline has six stages:\n",
+    "\n",
+    "1. **Clone + assign geography** — `assign_random_geography()` creates N copies of each CPS record, each with a population-weighted random census block.\n",
+    "2. **Simulate** — `_simulate_clone()` sets each clone's `state_fips` and recalculates state-dependent benefits.\n",
+    "3. **Geographic masking** — `state_to_cols` / `cd_to_cols` restrict each target row to geographically relevant columns.\n",
+    "4. **Re-randomize takeup** — `rerandomize_takeup()` draws new takeup per census block, breaking the fixed-takeup assumption.\n",
+    "5. **Build matrix** — `UnifiedMatrixBuilder.build_matrix()` assembles the sparse CSR matrix from all clones.\n",
+    "6. **Stacked datasets** — `create_sparse_cd_stacked_dataset()` converts calibrated weights into CD-level h5 files.\n",
+    "\n",
+    "For matrix diagnostics (row/column anatomy, target groups, sparsity analysis), see [calibration_matrix.ipynb](calibration_matrix.ipynb)."
    ]
   }
  ],
diff --git a/modal_app/remote_calibration_runner.py b/modal_app/remote_calibration_runner.py
index 43e354456..689d245dd 100644
--- a/modal_app/remote_calibration_runner.py
+++ b/modal_app/remote_calibration_runner.py
@@ -53,10 +53,7 @@ def _fit_weights_impl(branch: str, epochs: int) -> dict:
         elif line.startswith("DATASET:"):
             dataset_path = line.split("DATASET:")[1].strip()
 
-    script_path = (
-        "policyengine_us_data/datasets/cps/"
-        "local_area_calibration/fit_calibration_weights.py"
-    )
+    script_path = "policyengine_us_data/calibration/unified_calibration.py"
     result = subprocess.run(
         [
             "uv",
@@ -69,7 +66,7 @@ def _fit_weights_impl(branch: str, epochs: int) -> dict:
             str(epochs),
             "--db-path",
             db_path,
-            "--dataset-path",
+            "--dataset",
             dataset_path,
         ],
         capture_output=True,
diff --git a/policyengine_us_data/calibration/__init__.py b/policyengine_us_data/calibration/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/policyengine_us_data/calibration/clone_and_assign.py b/policyengine_us_data/calibration/clone_and_assign.py
new file mode 100644
index 000000000..9aa64cbbc
--- /dev/null
+++ b/policyengine_us_data/calibration/clone_and_assign.py
@@ -0,0 +1,145 @@
+"""Clone CPS records and assign random geography."""
+
+import logging
+from functools import lru_cache
+from dataclasses import dataclass
+
+import numpy as np
+import pandas as pd
+
+from policyengine_us_data.storage import STORAGE_FOLDER
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class GeographyAssignment:
+    """Random geography assignment for cloned CPS records.
+
+    All arrays have length n_records * n_clones.
+    Index i corresponds to clone i // n_records,
+    record i % n_records.
+    """
+
+    block_geoid: np.ndarray  # str array, 15-char block GEOIDs
+    cd_geoid: np.ndarray  # str array of CD GEOIDs
+    state_fips: np.ndarray  # int array of 2-digit state FIPS
+    n_records: int
+    n_clones: int
+
+
+@lru_cache(maxsize=1)
+def load_global_block_distribution():
+    """Load block_cd_distributions.csv.gz and build
+    global distribution.
+
+    Returns:
+        Tuple of (block_geoids, cd_geoids, state_fips,
+        probabilities) where each is a numpy array indexed
+        by block row. Probabilities are normalized to sum
+        to 1 globally.
+
+    Raises:
+        FileNotFoundError: If the CSV file does not exist.
+    """
+    csv_path = STORAGE_FOLDER / "block_cd_distributions.csv.gz"
+    if not csv_path.exists():
+        raise FileNotFoundError(
+            f"{csv_path} not found. "
+            "Run make_block_cd_distributions.py to generate."
+        )
+
+    df = pd.read_csv(csv_path, dtype={"block_geoid": str})
+
+    block_geoids = df["block_geoid"].values
+    cd_geoids = df["cd_geoid"].astype(str).values
+    state_fips = np.array([int(b[:2]) for b in block_geoids])
+
+    probs = df["probability"].values.astype(np.float64)
+    probs = probs / probs.sum()
+
+    return block_geoids, cd_geoids, state_fips, probs
+
+
+def assign_random_geography(
+    n_records: int,
+    n_clones: int = 10,
+    seed: int = 42,
+) -> GeographyAssignment:
+    """Assign random census block geography to cloned
+    CPS records.
+
+    Each of n_records * n_clones total records gets a
+    random census block sampled from the global
+    population-weighted distribution. State and CD are
+    derived from the block GEOID.
+
+    Args:
+        n_records: Number of households in the base CPS
+            dataset.
+        n_clones: Number of clones (default 10).
+        seed: Random seed for reproducibility.
+
+    Returns:
+        GeographyAssignment with arrays of length
+        n_records * n_clones.
+    """
+    blocks, cds, states, probs = load_global_block_distribution()
+
+    n_total = n_records * n_clones
+    rng = np.random.default_rng(seed)
+    indices = rng.choice(len(blocks), size=n_total, p=probs)
+
+    return GeographyAssignment(
+        block_geoid=blocks[indices],
+        cd_geoid=cds[indices],
+        state_fips=states[indices],
+        n_records=n_records,
+        n_clones=n_clones,
+    )
+
+
+def double_geography_for_puf(
+    geography: GeographyAssignment,
+) -> GeographyAssignment:
+    """Double geography arrays for PUF clone step.
+
+    After PUF cloning doubles the base records, the geography
+    assignment must also double: each record and its PUF copy
+    share the same geographic assignment.
+
+    The output has n_records = 2 * geography.n_records, with
+    the first half being the CPS records and the second half
+    being the PUF copies.
+
+    Args:
+        geography: Original geography assignment.
+
+    Returns:
+        New GeographyAssignment with doubled n_records.
+    """
+    n_old = geography.n_records
+    n_new = n_old * 2
+    n_clones = geography.n_clones
+
+    new_blocks = []
+    new_cds = []
+    new_states = []
+
+    for c in range(n_clones):
+        start = c * n_old
+        end = start + n_old
+        clone_blocks = geography.block_geoid[start:end]
+        clone_cds = geography.cd_geoid[start:end]
+        clone_states = geography.state_fips[start:end]
+        new_blocks.append(np.concatenate([clone_blocks, clone_blocks]))
+        new_cds.append(np.concatenate([clone_cds, clone_cds]))
+        new_states.append(np.concatenate([clone_states, clone_states]))
+
+    return GeographyAssignment(
+        block_geoid=np.concatenate(new_blocks),
+        cd_geoid=np.concatenate(new_cds),
+        state_fips=np.concatenate(new_states),
+        n_records=n_new,
+        n_clones=n_clones,
+    )
diff --git a/policyengine_us_data/calibration/unified_calibration.py b/policyengine_us_data/calibration/unified_calibration.py
new file mode 100644
index 000000000..d2759b34b
--- /dev/null
+++ b/policyengine_us_data/calibration/unified_calibration.py
@@ -0,0 +1,637 @@
+"""
+Unified L0 calibration pipeline.
+
+Pipeline flow:
+    1. Load CPS dataset -> get n_records
+    2. Clone Nx, assign random geography (census block)
+    3. Re-randomize simple takeup variables per block
+    4. Build sparse calibration matrix (clone-by-clone)
+    5. L0-regularized optimization -> calibrated weights
+    6. Save weights, diagnostics, run config
+
+Two presets control output size via L0 regularization:
+- local: L0=1e-8, ~3-4M records (for local area dataset)
+- national: L0=1e-4, ~50K records (for web app)
+
+Usage:
+    python -m policyengine_us_data.calibration.unified_calibration \\
+        --dataset path/to/cps_2024.h5 \\
+        --db-path path/to/policy_data.db \\
+        --output path/to/weights.npy \\
+        --preset local \\
+        --epochs 100
+"""
+
+import argparse
+import builtins
+import logging
+import sys
+from pathlib import Path
+
+import numpy as np
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s",
+    stream=sys.stderr,
+)
+logger = logging.getLogger(__name__)
+
+PRESETS = {
+    "local": 1e-8,
+    "national": 1e-4,
+}
+
+BETA = 0.35
+GAMMA = -0.1
+ZETA = 1.1
+INIT_KEEP_PROB = 0.999
+LOG_WEIGHT_JITTER_SD = 0.05
+LOG_ALPHA_JITTER_SD = 0.01
+LAMBDA_L2 = 1e-12
+LEARNING_RATE = 0.15
+DEFAULT_EPOCHS = 100
+DEFAULT_N_CLONES = 10
+
+SIMPLE_TAKEUP_VARS = [
+    {
+        "variable": "takes_up_snap_if_eligible",
+        "entity": "spm_unit",
+        "rate_key": "snap",
+    },
+    {
+        "variable": "takes_up_aca_if_eligible",
+        "entity": "tax_unit",
+        "rate_key": "aca",
+    },
+    {
+        "variable": "takes_up_dc_ptc",
+        "entity": "tax_unit",
+        "rate_key": "dc_ptc",
+    },
+    {
+        "variable": "takes_up_head_start_if_eligible",
+        "entity": "person",
+        "rate_key": "head_start",
+    },
+    {
+        "variable": "takes_up_early_head_start_if_eligible",
+        "entity": "person",
+        "rate_key": "early_head_start",
+    },
+    {
+        "variable": "takes_up_ssi_if_eligible",
+        "entity": "person",
+        "rate_key": "ssi",
+    },
+    {
+        "variable": "would_file_taxes_voluntarily",
+        "entity": "tax_unit",
+        "rate_key": "voluntary_filing",
+    },
+    {
+        "variable": "takes_up_medicaid_if_eligible",
+        "entity": "person",
+        "rate_key": "medicaid",
+    },
+]
+
+
+def rerandomize_takeup(
+    sim,
+    clone_block_geoids: np.ndarray,
+    clone_state_fips: np.ndarray,
+    time_period: int,
+) -> None:
+    """Re-randomize simple takeup variables per census block.
+
+    Groups entities by their household's block GEOID and draws
+    new takeup booleans using seeded_rng(var_name, salt=block).
+    Overrides the simulation's stored inputs.
+
+    Args:
+        sim: Microsimulation instance (already has state_fips).
+        clone_block_geoids: Block GEOIDs per household.
+        clone_state_fips: State FIPS per household.
+        time_period: Tax year.
+    """
+    from policyengine_us_data.parameters import (
+        load_take_up_rate,
+    )
+    from policyengine_us_data.utils.randomness import (
+        seeded_rng,
+    )
+
+    n_households = len(clone_block_geoids)
+    hh_ids = sim.calculate("household_id", map_to="household").values
+    hh_to_block = dict(zip(hh_ids, clone_block_geoids))
+    hh_to_state = dict(zip(hh_ids, clone_state_fips))
+
+    for spec in SIMPLE_TAKEUP_VARS:
+        var_name = spec["variable"]
+        entity_level = spec["entity"]
+        rate_key = spec["rate_key"]
+
+        rate_or_dict = load_take_up_rate(rate_key, time_period)
+
+        is_state_specific = isinstance(rate_or_dict, dict)
+
+        entity_ids = sim.calculate(
+            f"{entity_level}_id", map_to=entity_level
+        ).values
+        entity_hh_ids = sim.calculate(
+            "household_id", map_to=entity_level
+        ).values
+        n_entities = len(entity_ids)
+
+        draws = np.zeros(n_entities, dtype=np.float64)
+        rates = np.zeros(n_entities, dtype=np.float64)
+
+        entity_blocks = np.array(
+            [hh_to_block.get(hid, "0") for hid in entity_hh_ids]
+        )
+
+        unique_blocks = np.unique(entity_blocks)
+        for block in unique_blocks:
+            mask = entity_blocks == block
+            n_in_block = mask.sum()
+            rng = seeded_rng(var_name, salt=str(block))
+            draws[mask] = rng.random(n_in_block)
+
+            if is_state_specific:
+                block_hh_ids = entity_hh_ids[mask]
+                for i, hid in enumerate(block_hh_ids):
+                    state = int(hh_to_state.get(hid, 0))
+                    state_str = str(state)
+                    r = rate_or_dict.get(
+                        state_str,
+                        rate_or_dict.get(state, 0.8),
+                    )
+                    idx = np.where(mask)[0][i]
+                    rates[idx] = r
+            else:
+                rates[mask] = rate_or_dict
+
+        new_values = draws < rates
+        sim.set_input(var_name, time_period, new_values)
+
+
+def parse_args(argv=None):
+    parser = argparse.ArgumentParser(
+        description="Unified L0 calibration pipeline"
+    )
+    parser.add_argument(
+        "--dataset",
+        default=None,
+        help="Path to CPS h5 file",
+    )
+    parser.add_argument(
+        "--db-path",
+        default=None,
+        help="Path to policy_data.db",
+    )
+    parser.add_argument(
+        "--output",
+        default=None,
+        help="Path to save weights (.npy)",
+    )
+    parser.add_argument(
+        "--n-clones",
+        type=int,
+        default=DEFAULT_N_CLONES,
+        help=f"Number of clones (default: {DEFAULT_N_CLONES})",
+    )
+    parser.add_argument(
+        "--preset",
+        choices=list(PRESETS.keys()),
+        default=None,
+        help="L0 preset: local or national",
+    )
+    parser.add_argument(
+        "--lambda-l0",
+        type=float,
+        default=None,
+        help="Custom L0 penalty (overrides preset)",
+    )
+    parser.add_argument(
+        "--epochs",
+        type=int,
+        default=DEFAULT_EPOCHS,
+        help=f"Training epochs (default: {DEFAULT_EPOCHS})",
+    )
+    parser.add_argument(
+        "--device",
+        default="cpu",
+        choices=["cpu", "cuda"],
+        help="Device for training",
+    )
+    parser.add_argument(
+        "--seed",
+        type=int,
+        default=42,
+        help="Random seed for geography assignment",
+    )
+    parser.add_argument(
+        "--domain-variables",
+        type=str,
+        default=None,
+        help=(
+            "Comma-separated domain variables for " "target_overview filtering"
+        ),
+    )
+    parser.add_argument(
+        "--hierarchical-domains",
+        type=str,
+        default=None,
+        help=(
+            "Comma-separated domains for hierarchical "
+            "uprating + CD reconciliation"
+        ),
+    )
+    parser.add_argument(
+        "--skip-takeup-rerandomize",
+        action="store_true",
+        help="Skip takeup re-randomization",
+    )
+    return parser.parse_args(argv)
+
+
+def fit_l0_weights(
+    X_sparse,
+    targets: np.ndarray,
+    lambda_l0: float,
+    epochs: int = DEFAULT_EPOCHS,
+    device: str = "cpu",
+    verbose_freq: int = None,
+) -> np.ndarray:
+    """Fit L0-regularized calibration weights.
+
+    Args:
+        X_sparse: Sparse matrix (targets x records).
+        targets: Target values array.
+        lambda_l0: L0 regularization strength.
+        epochs: Training epochs.
+        device: Torch device.
+        verbose_freq: Print frequency. Defaults to 10%.
+
+    Returns:
+        Weight array of shape (n_records,).
+    """
+    import time
+
+    try:
+        from l0.calibration import SparseCalibrationWeights
+    except ImportError:
+        raise ImportError(
+            "l0-python required. " "Install: pip install l0-python"
+        )
+
+    import torch
+
+    n_total = X_sparse.shape[1]
+    initial_weights = np.ones(n_total) * 100
+
+    logger.info(
+        "L0 calibration: %d targets, %d features, "
+        "lambda_l0=%.1e, epochs=%d",
+        X_sparse.shape[0],
+        n_total,
+        lambda_l0,
+        epochs,
+    )
+
+    model = SparseCalibrationWeights(
+        n_features=n_total,
+        beta=BETA,
+        gamma=GAMMA,
+        zeta=ZETA,
+        init_keep_prob=INIT_KEEP_PROB,
+        init_weights=initial_weights,
+        log_weight_jitter_sd=LOG_WEIGHT_JITTER_SD,
+        log_alpha_jitter_sd=LOG_ALPHA_JITTER_SD,
+        device=device,
+    )
+
+    if verbose_freq is None:
+        verbose_freq = max(1, epochs // 10)
+
+    _builtin_print = builtins.print
+
+    def _flushed_print(*args, **kwargs):
+        _builtin_print(*args, **kwargs)
+        sys.stdout.flush()
+
+    builtins.print = _flushed_print
+
+    t0 = time.time()
+    try:
+        model.fit(
+            M=X_sparse,
+            y=targets,
+            target_groups=None,
+            lambda_l0=lambda_l0,
+            lambda_l2=LAMBDA_L2,
+            lr=LEARNING_RATE,
+            epochs=epochs,
+            loss_type="relative",
+            verbose=True,
+            verbose_freq=verbose_freq,
+        )
+    finally:
+        builtins.print = _builtin_print
+
+    elapsed = time.time() - t0
+    logger.info(
+        "L0 done in %.1f min (%.1f sec/epoch)",
+        elapsed / 60,
+        elapsed / epochs,
+    )
+
+    with torch.no_grad():
+        weights = model.get_weights(deterministic=True).cpu().numpy()
+
+    n_nz = (weights > 0).sum()
+    logger.info(
+        "Non-zero: %d / %d (%.1f%% sparsity)",
+        n_nz,
+        n_total,
+        (1 - n_nz / n_total) * 100,
+    )
+    return weights
+
+
+def compute_diagnostics(
+    weights: np.ndarray,
+    X_sparse,
+    targets_df,
+    target_names: list,
+) -> "pd.DataFrame":
+    import pandas as pd
+
+    estimates = X_sparse.dot(weights)
+    true_values = targets_df["value"].values
+    row_sums = np.array(X_sparse.sum(axis=1)).flatten()
+
+    rel_errors = np.where(
+        np.abs(true_values) > 0,
+        (estimates - true_values) / np.abs(true_values),
+        0.0,
+    )
+    return pd.DataFrame(
+        {
+            "target": target_names,
+            "true_value": true_values,
+            "estimate": estimates,
+            "rel_error": rel_errors,
+            "abs_rel_error": np.abs(rel_errors),
+            "achievable": row_sums > 0,
+        }
+    )
+
+
+def run_calibration(
+    dataset_path: str,
+    db_path: str,
+    n_clones: int = DEFAULT_N_CLONES,
+    lambda_l0: float = 1e-8,
+    epochs: int = DEFAULT_EPOCHS,
+    device: str = "cpu",
+    seed: int = 42,
+    domain_variables: list = None,
+    hierarchical_domains: list = None,
+    skip_takeup_rerandomize: bool = False,
+):
+    """Run unified calibration pipeline.
+
+    Args:
+        dataset_path: Path to CPS h5 file.
+        db_path: Path to policy_data.db.
+        n_clones: Number of dataset clones.
+        lambda_l0: L0 regularization strength.
+        epochs: Training epochs.
+        device: Torch device.
+        seed: Random seed.
+        domain_variables: Filter targets by domain variable.
+        hierarchical_domains: Domains for hierarchical
+            uprating + CD reconciliation.
+        skip_takeup_rerandomize: Skip takeup step.
+
+    Returns:
+        (weights, targets_df, X_sparse, target_names)
+    """
+    import time
+
+    from policyengine_us import Microsimulation
+
+    from policyengine_us_data.calibration.clone_and_assign import (
+        assign_random_geography,
+    )
+    from policyengine_us_data.calibration.unified_matrix_builder import (
+        UnifiedMatrixBuilder,
+    )
+
+    t0 = time.time()
+
+    # Step 1: Load dataset
+    logger.info("Loading dataset from %s", dataset_path)
+    sim = Microsimulation(dataset=dataset_path)
+    n_records = len(sim.calculate("household_id", map_to="household").values)
+    logger.info("Loaded %d households", n_records)
+
+    # Step 2: Clone and assign geography
+    logger.info(
+        "Assigning geography: %d x %d = %d total",
+        n_records,
+        n_clones,
+        n_records * n_clones,
+    )
+    geography = assign_random_geography(
+        n_records=n_records,
+        n_clones=n_clones,
+        seed=seed,
+    )
+
+    # Step 3: Build sim_modifier for takeup rerandomization
+    sim_modifier = None
+    if not skip_takeup_rerandomize:
+        time_period = 2024
+
+        def sim_modifier(s, clone_idx):
+            col_start = clone_idx * n_records
+            col_end = col_start + n_records
+            blocks = geography.block_geoid[col_start:col_end]
+            states = geography.state_fips[col_start:col_end]
+            rerandomize_takeup(s, blocks, states, time_period)
+
+    # Step 4: Build target filter
+    target_filter = {}
+    if domain_variables:
+        target_filter["domain_variables"] = domain_variables
+
+    # Step 5: Build sparse calibration matrix
+    t_matrix = time.time()
+    db_uri = f"sqlite:///{db_path}"
+    builder = UnifiedMatrixBuilder(
+        db_uri=db_uri,
+        time_period=2024,
+        dataset_path=dataset_path,
+    )
+    targets_df, X_sparse, target_names = builder.build_matrix(
+        geography=geography,
+        sim=sim,
+        target_filter=target_filter,
+        hierarchical_domains=hierarchical_domains,
+        sim_modifier=sim_modifier,
+    )
+
+    builder.print_uprating_summary(targets_df)
+    logger.info(
+        "Matrix built in %.1f min",
+        (time.time() - t_matrix) / 60,
+    )
+    logger.info(
+        "Matrix shape: %s, nnz: %d",
+        X_sparse.shape,
+        X_sparse.nnz,
+    )
+
+    # Step 6: L0 calibration
+    targets = targets_df["value"].values
+
+    row_sums = np.array(X_sparse.sum(axis=1)).flatten()
+    achievable = row_sums > 0
+    logger.info(
+        "Achievable: %d / %d targets",
+        achievable.sum(),
+        len(achievable),
+    )
+
+    weights = fit_l0_weights(
+        X_sparse=X_sparse,
+        targets=targets,
+        lambda_l0=lambda_l0,
+        epochs=epochs,
+        device=device,
+    )
+
+    logger.info(
+        "Total pipeline: %.1f min",
+        (time.time() - t0) / 60,
+    )
+    return weights, targets_df, X_sparse, target_names
+
+
+def main(argv=None):
+    import json
+    import time
+
+    import pandas as pd
+
+    try:
+        if not sys.stderr.isatty():
+            sys.stderr.reconfigure(line_buffering=True)
+        if not sys.stdout.isatty():
+            sys.stdout.reconfigure(line_buffering=True)
+    except AttributeError:
+        pass
+
+    args = parse_args(argv)
+
+    from policyengine_us_data.storage import STORAGE_FOLDER
+
+    dataset_path = args.dataset or str(
+        STORAGE_FOLDER / "stratified_extended_cps_2024.h5"
+    )
+    db_path = args.db_path or str(
+        STORAGE_FOLDER / "calibration" / "policy_data.db"
+    )
+    output_path = args.output or str(
+        STORAGE_FOLDER / "calibration" / "unified_weights.npy"
+    )
+
+    if args.lambda_l0 is not None:
+        lambda_l0 = args.lambda_l0
+    elif args.preset is not None:
+        lambda_l0 = PRESETS[args.preset]
+    else:
+        lambda_l0 = PRESETS["local"]
+
+    domain_variables = None
+    if args.domain_variables:
+        domain_variables = [
+            x.strip() for x in args.domain_variables.split(",")
+        ]
+
+    hierarchical_domains = None
+    if args.hierarchical_domains:
+        hierarchical_domains = [
+            x.strip() for x in args.hierarchical_domains.split(",")
+        ]
+
+    t_start = time.time()
+
+    weights, targets_df, X_sparse, target_names = run_calibration(
+        dataset_path=dataset_path,
+        db_path=db_path,
+        n_clones=args.n_clones,
+        lambda_l0=lambda_l0,
+        epochs=args.epochs,
+        device=args.device,
+        seed=args.seed,
+        domain_variables=domain_variables,
+        hierarchical_domains=hierarchical_domains,
+        skip_takeup_rerandomize=(args.skip_takeup_rerandomize),
+    )
+
+    # Save weights
+    np.save(output_path, weights)
+    logger.info("Weights saved to %s", output_path)
+    print(f"OUTPUT_PATH:{output_path}")
+
+    # Save diagnostics
+    output_dir = Path(output_path).parent
+    diag_df = compute_diagnostics(weights, X_sparse, targets_df, target_names)
+    diag_path = output_dir / "unified_diagnostics.csv"
+    diag_df.to_csv(diag_path, index=False)
+
+    ach = diag_df[diag_df.achievable]
+    err_pct = ach.abs_rel_error * 100
+    logger.info(
+        "Diagnostics: %d targets, "
+        "mean=%.1f%%, median=%.1f%%, "
+        "<10%%=%.1f%%, <25%%=%.1f%%",
+        len(ach),
+        err_pct.mean(),
+        err_pct.median(),
+        (err_pct < 10).mean() * 100,
+        (err_pct < 25).mean() * 100,
+    )
+
+    # Save run config
+    t_end = time.time()
+    run_config = {
+        "dataset": dataset_path,
+        "db_path": db_path,
+        "n_clones": args.n_clones,
+        "lambda_l0": lambda_l0,
+        "epochs": args.epochs,
+        "device": args.device,
+        "seed": args.seed,
+        "domain_variables": domain_variables,
+        "hierarchical_domains": hierarchical_domains,
+        "n_targets": len(targets_df),
+        "n_records": X_sparse.shape[1],
+        "weight_sum": float(weights.sum()),
+        "weight_nonzero": int((weights > 0).sum()),
+        "mean_error_pct": float(err_pct.mean()),
+        "elapsed_seconds": round(t_end - t_start, 1),
+    }
+    config_path = output_dir / "unified_run_config.json"
+    with open(config_path, "w") as f:
+        json.dump(run_config, f, indent=2)
+    logger.info("Config saved to %s", config_path)
+    print(f"LOG_PATH:{diag_path}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/policyengine_us_data/calibration/unified_matrix_builder.py b/policyengine_us_data/calibration/unified_matrix_builder.py
new file mode 100644
index 000000000..ac31c34e1
--- /dev/null
+++ b/policyengine_us_data/calibration/unified_matrix_builder.py
@@ -0,0 +1,906 @@
+"""
+Unified sparse matrix builder for clone-based calibration.
+
+Builds a sparse calibration matrix for cloned+geography-assigned CPS
+records. Processes clone-by-clone: for each clone, sets each
+record's state_fips to its assigned value, simulates, and extracts
+variable values.
+
+Matrix shape: (n_targets, n_records * n_clones)
+Column ordering: index i = clone_idx * n_records + record_idx
+"""
+
+import logging
+from collections import defaultdict
+from typing import Dict, List, Optional, Tuple
+
+import numpy as np
+import pandas as pd
+from scipy import sparse
+from sqlalchemy import create_engine, text
+
+from policyengine_us_data.storage import STORAGE_FOLDER
+from policyengine_us_data.utils.census import STATE_NAME_TO_FIPS
+from policyengine_us_data.datasets.cps.local_area_calibration.calibration_utils import (
+    get_calculated_variables,
+    apply_op,
+    get_geo_level,
+)
+
+logger = logging.getLogger(__name__)
+
+_GEO_VARS = {
+    "state_fips",
+    "state_code",
+    "congressional_district_geoid",
+}
+
+
+class UnifiedMatrixBuilder:
+    """Build sparse calibration matrix for cloned CPS records.
+
+    Processes clone-by-clone: each clone's records get their
+    assigned geography, are simulated, and the results fill
+    the corresponding columns.
+
+    Args:
+        db_uri: SQLAlchemy database URI.
+        time_period: Tax year for calibration (e.g. 2024).
+        dataset_path: Path to the base extended CPS h5 file.
+    """
+
+    def __init__(
+        self,
+        db_uri: str,
+        time_period: int,
+        dataset_path: Optional[str] = None,
+    ):
+        self.db_uri = db_uri
+        self.engine = create_engine(db_uri)
+        self.time_period = time_period
+        self.dataset_path = dataset_path
+        self._entity_rel_cache = None
+
+    # ---------------------------------------------------------------
+    # Entity relationships
+    # ---------------------------------------------------------------
+
+    def _build_entity_relationship(self, sim) -> pd.DataFrame:
+        if self._entity_rel_cache is not None:
+            return self._entity_rel_cache
+
+        self._entity_rel_cache = pd.DataFrame(
+            {
+                "person_id": sim.calculate(
+                    "person_id", map_to="person"
+                ).values,
+                "household_id": sim.calculate(
+                    "household_id", map_to="person"
+                ).values,
+                "tax_unit_id": sim.calculate(
+                    "tax_unit_id", map_to="person"
+                ).values,
+                "spm_unit_id": sim.calculate(
+                    "spm_unit_id", map_to="person"
+                ).values,
+            }
+        )
+        return self._entity_rel_cache
+
+    # ---------------------------------------------------------------
+    # Constraint evaluation
+    # ---------------------------------------------------------------
+
+    def _evaluate_constraints_entity_aware(
+        self,
+        sim,
+        constraints: List[dict],
+        n_households: int,
+    ) -> np.ndarray:
+        """Evaluate constraints at person level, aggregate to
+        household level via .any()."""
+        if not constraints:
+            return np.ones(n_households, dtype=bool)
+
+        entity_rel = self._build_entity_relationship(sim)
+        n_persons = len(entity_rel)
+        person_mask = np.ones(n_persons, dtype=bool)
+
+        for c in constraints:
+            try:
+                vals = sim.calculate(
+                    c["variable"],
+                    self.time_period,
+                    map_to="person",
+                ).values
+            except Exception as exc:
+                logger.warning(
+                    "Cannot evaluate constraint '%s': %s",
+                    c["variable"],
+                    exc,
+                )
+                return np.zeros(n_households, dtype=bool)
+            person_mask &= apply_op(vals, c["operation"], c["value"])
+
+        df = entity_rel.copy()
+        df["satisfies"] = person_mask
+        hh_mask = df.groupby("household_id")["satisfies"].any()
+
+        household_ids = sim.calculate(
+            "household_id", map_to="household"
+        ).values
+        return np.array([hh_mask.get(hid, False) for hid in household_ids])
+
+    # ---------------------------------------------------------------
+    # Database queries
+    # ---------------------------------------------------------------
+
+    def _get_stratum_constraints(self, stratum_id: int) -> List[dict]:
+        query = """
+        SELECT constraint_variable AS variable, operation, value
+        FROM stratum_constraints
+        WHERE stratum_id = :stratum_id
+        """
+        with self.engine.connect() as conn:
+            df = pd.read_sql(
+                query,
+                conn,
+                params={"stratum_id": int(stratum_id)},
+            )
+        return df.to_dict("records")
+
+    def _query_targets(self, target_filter: dict) -> pd.DataFrame:
+        """Query targets via target_overview view with
+        best-period selection."""
+        or_conditions = []
+
+        if "domain_variables" in target_filter:
+            dvs = target_filter["domain_variables"]
+            ph = ",".join(f"'{dv}'" for dv in dvs)
+            or_conditions.append(f"tv.domain_variable IN ({ph})")
+
+        if "variables" in target_filter:
+            vs = ",".join(f"'{v}'" for v in target_filter["variables"])
+            or_conditions.append(f"tv.variable IN ({vs})")
+
+        if "target_ids" in target_filter:
+            ids = ",".join(map(str, target_filter["target_ids"]))
+            or_conditions.append(f"tv.target_id IN ({ids})")
+
+        if "stratum_ids" in target_filter:
+            ids = ",".join(map(str, target_filter["stratum_ids"]))
+            or_conditions.append(f"tv.stratum_id IN ({ids})")
+
+        if not or_conditions:
+            where_clause = "1=1"
+        else:
+            where_clause = " OR ".join(f"({c})" for c in or_conditions)
+
+        query = f"""
+        WITH filtered_targets AS (
+            SELECT tv.target_id, tv.stratum_id, tv.variable,
+                   tv.value, tv.period, tv.geo_level,
+                   tv.geographic_id, tv.domain_variable
+            FROM target_overview tv
+            WHERE {where_clause}
+        ),
+        best_periods AS (
+            SELECT stratum_id, variable,
+                CASE
+                    WHEN MAX(CASE WHEN period <= :time_period
+                             THEN period END) IS NOT NULL
+                    THEN MAX(CASE WHEN period <= :time_period
+                             THEN period END)
+                    ELSE MIN(period)
+                END as best_period
+            FROM filtered_targets
+            GROUP BY stratum_id, variable
+        )
+        SELECT ft.*
+        FROM filtered_targets ft
+        JOIN best_periods bp
+            ON ft.stratum_id = bp.stratum_id
+            AND ft.variable = bp.variable
+            AND ft.period = bp.best_period
+        ORDER BY ft.target_id
+        """
+
+        with self.engine.connect() as conn:
+            return pd.read_sql(
+                query,
+                conn,
+                params={"time_period": self.time_period},
+            )
+
+    # ---------------------------------------------------------------
+    # Uprating
+    # ---------------------------------------------------------------
+
+    def _calculate_uprating_factors(self, params) -> dict:
+        factors = {}
+        query = (
+            "SELECT DISTINCT period FROM targets "
+            "WHERE period IS NOT NULL ORDER BY period"
+        )
+        with self.engine.connect() as conn:
+            result = conn.execute(text(query))
+            years_needed = [row[0] for row in result]
+
+        for from_year in years_needed:
+            if from_year == self.time_period:
+                factors[(from_year, "cpi")] = 1.0
+                factors[(from_year, "pop")] = 1.0
+                continue
+
+            try:
+                cpi_from = params.gov.bls.cpi.cpi_u(from_year)
+                cpi_to = params.gov.bls.cpi.cpi_u(self.time_period)
+                factors[(from_year, "cpi")] = float(cpi_to / cpi_from)
+            except Exception:
+                factors[(from_year, "cpi")] = 1.0
+
+            try:
+                pop_from = params.calibration.gov.census.populations.total(
+                    from_year
+                )
+                pop_to = params.calibration.gov.census.populations.total(
+                    self.time_period
+                )
+                factors[(from_year, "pop")] = float(pop_to / pop_from)
+            except Exception:
+                factors[(from_year, "pop")] = 1.0
+
+        return factors
+
+    def _get_uprating_info(
+        self,
+        variable: str,
+        period: int,
+        factors: dict,
+    ) -> Tuple[float, str]:
+        if period == self.time_period:
+            return 1.0, "none"
+
+        count_indicators = [
+            "count",
+            "person",
+            "people",
+            "households",
+            "tax_units",
+        ]
+        is_count = any(ind in variable.lower() for ind in count_indicators)
+        uprating_type = "pop" if is_count else "cpi"
+        factor = factors.get((period, uprating_type), 1.0)
+        return factor, uprating_type
+
+    def _load_aca_ptc_factors(
+        self,
+    ) -> Dict[int, Dict[str, float]]:
+        csv_path = STORAGE_FOLDER / "aca_ptc_multipliers_2022_2024.csv"
+        df = pd.read_csv(csv_path)
+        result = {}
+        for _, row in df.iterrows():
+            fips_str = STATE_NAME_TO_FIPS.get(row["state"])
+            if fips_str is None:
+                continue
+            fips_int = int(fips_str)
+            result[fips_int] = {
+                "tax_unit_count": row["vol_mult"],
+                "aca_ptc": row["vol_mult"] * row["val_mult"],
+            }
+        return result
+
+    def _get_state_uprating_factors(
+        self,
+        domain: str,
+        targets_df: pd.DataFrame,
+        national_factors: dict,
+    ) -> Dict[int, Dict[str, float]]:
+        state_rows = targets_df[
+            (targets_df["domain_variable"] == domain)
+            & (targets_df["geo_level"] == "state")
+        ]
+        state_fips_list = state_rows["geographic_id"].unique()
+        variables = state_rows["variable"].unique()
+
+        if domain == "aca_ptc":
+            csv_factors = self._load_aca_ptc_factors()
+        else:
+            csv_factors = None
+
+        result = {}
+        for sf in state_fips_list:
+            state_int = int(sf)
+            var_factors = {}
+
+            if csv_factors and state_int in csv_factors:
+                for var in variables:
+                    var_factors[var] = csv_factors[state_int].get(var, 1.0)
+            else:
+                for var in variables:
+                    row = state_rows[
+                        (state_rows["geographic_id"] == sf)
+                        & (state_rows["variable"] == var)
+                    ]
+                    if row.empty:
+                        var_factors[var] = 1.0
+                        continue
+                    period = row.iloc[0]["period"]
+                    factor, _ = self._get_uprating_info(
+                        var, period, national_factors
+                    )
+                    var_factors[var] = factor
+
+            result[state_int] = var_factors
+
+        return result
+
+    def _apply_hierarchical_uprating(
+        self,
+        targets_df: pd.DataFrame,
+        hierarchical_domains: List[str],
+        national_factors: dict,
+    ) -> pd.DataFrame:
+        """Apply state-level uprating and reconcile CDs.
+
+        Two factors per CD row:
+        - hif: state_original / sum(cd_originals)
+        - uprating_factor: state-specific scaling
+
+        Final CD value = original * hif * uprating_factor.
+        """
+        df = targets_df.copy()
+        df["hif"] = np.nan
+        df["state_uprating_factor"] = np.nan
+        rows_to_drop = []
+
+        for domain in hierarchical_domains:
+            domain_mask = df["domain_variable"] == domain
+            state_factors = self._get_state_uprating_factors(
+                domain, df, national_factors
+            )
+            state_mask = domain_mask & (df["geo_level"] == "state")
+            district_mask = domain_mask & (df["geo_level"] == "district")
+
+            for sf, var_factors in state_factors.items():
+                for var, uf in var_factors.items():
+                    state_row = df[
+                        state_mask
+                        & (df["geographic_id"] == str(sf))
+                        & (df["variable"] == var)
+                    ]
+                    if state_row.empty:
+                        continue
+                    state_original = state_row.iloc[0]["original_value"]
+
+                    def _cd_in_state(g, s=sf):
+                        try:
+                            return int(g) // 100 == s
+                        except (ValueError, TypeError):
+                            return False
+
+                    cd_mask = (
+                        district_mask
+                        & (df["variable"] == var)
+                        & df["geographic_id"].apply(_cd_in_state)
+                    )
+                    cd_rows = df[cd_mask]
+                    if cd_rows.empty:
+                        continue
+
+                    cd_original_sum = cd_rows["original_value"].sum()
+                    if cd_original_sum == 0:
+                        continue
+
+                    hif = state_original / cd_original_sum
+                    for cd_idx in cd_rows.index:
+                        df.at[cd_idx, "hif"] = hif
+                        df.at[cd_idx, "state_uprating_factor"] = uf
+                        df.at[cd_idx, "value"] = (
+                            df.at[cd_idx, "original_value"] * hif * uf
+                        )
+
+            # Drop national/state rows used for reconciliation
+            national_mask = domain_mask & (df["geo_level"] == "national")
+            for idx in df[national_mask | state_mask].index:
+                row = df.loc[idx]
+                if row["period"] != self.time_period:
+                    rows_to_drop.append(idx)
+
+        if rows_to_drop:
+            df = df.drop(index=rows_to_drop).reset_index(drop=True)
+
+        df["target_period"] = self.time_period
+        return df
+
+    def print_uprating_summary(self, targets_df: pd.DataFrame) -> None:
+        has_state_uf = "state_uprating_factor" in targets_df.columns
+        if has_state_uf:
+            eff = targets_df["state_uprating_factor"].fillna(
+                targets_df["uprating_factor"]
+            )
+        else:
+            eff = targets_df["uprating_factor"]
+
+        uprated = targets_df[eff != 1.0]
+        if len(uprated) == 0:
+            print("No targets were uprated.")
+            return
+
+        print("\n" + "=" * 60)
+        print("UPRATING SUMMARY")
+        print("=" * 60)
+        print(f"Uprated {len(uprated)} of " f"{len(targets_df)} targets")
+        period_counts = uprated["period"].value_counts().sort_index()
+        for period, count in period_counts.items():
+            print(f"  Period {period}: {count} targets")
+        factors = eff[eff != 1.0]
+        print(
+            f"  Factor range: [{factors.min():.4f}, " f"{factors.max():.4f}]"
+        )
+
+    # ---------------------------------------------------------------
+    # Target naming
+    # ---------------------------------------------------------------
+
+    @staticmethod
+    def _make_target_name(
+        variable: str,
+        constraints: List[dict],
+        reform_id: int = 0,
+    ) -> str:
+        geo_parts: List[str] = []
+        for c in constraints:
+            if c["variable"] == "state_fips":
+                geo_parts.append(f"state_{c['value']}")
+            elif c["variable"] == "congressional_district_geoid":
+                geo_parts.append(f"cd_{c['value']}")
+
+        parts: List[str] = []
+        parts.append("/".join(geo_parts) if geo_parts else "national")
+        if reform_id > 0:
+            parts.append(f"{variable}_expenditure")
+        else:
+            parts.append(variable)
+
+        non_geo = [c for c in constraints if c["variable"] not in _GEO_VARS]
+        if non_geo:
+            strs = [
+                f"{c['variable']}{c['operation']}{c['value']}" for c in non_geo
+            ]
+            parts.append("[" + ",".join(strs) + "]")
+
+        return "/".join(parts)
+
+    # ---------------------------------------------------------------
+    # Target value calculation
+    # ---------------------------------------------------------------
+
+    def _calculate_target_values(
+        self,
+        sim,
+        target_variable: str,
+        non_geo_constraints: List[dict],
+        n_households: int,
+    ) -> np.ndarray:
+        """Calculate per-household target values.
+
+        For count targets (*_count): count entities per HH
+        satisfying constraints.
+        For value targets: multiply values by constraint mask.
+        """
+        is_count = target_variable.endswith("_count")
+
+        if not is_count:
+            mask = self._evaluate_constraints_entity_aware(
+                sim, non_geo_constraints, n_households
+            )
+            vals = sim.calculate(target_variable, map_to="household").values
+            return (vals * mask).astype(np.float32)
+
+        # Count target: entity-aware counting
+        entity_rel = self._build_entity_relationship(sim)
+        n_persons = len(entity_rel)
+        person_mask = np.ones(n_persons, dtype=bool)
+
+        for c in non_geo_constraints:
+            try:
+                cv = sim.calculate(c["variable"], map_to="person").values
+            except Exception:
+                return np.zeros(n_households, dtype=np.float32)
+            person_mask &= apply_op(cv, c["operation"], c["value"])
+
+        target_entity = sim.tax_benefit_system.variables[
+            target_variable
+        ].entity.key
+        household_ids = sim.calculate(
+            "household_id", map_to="household"
+        ).values
+
+        if target_entity == "household":
+            if non_geo_constraints:
+                mask = self._evaluate_constraints_entity_aware(
+                    sim, non_geo_constraints, n_households
+                )
+                return mask.astype(np.float32)
+            return np.ones(n_households, dtype=np.float32)
+
+        if target_entity == "person":
+            er = entity_rel.copy()
+            er["satisfies"] = person_mask
+            filtered = er[er["satisfies"]]
+            counts = filtered.groupby("household_id")["person_id"].nunique()
+        else:
+            eid_col = f"{target_entity}_id"
+            er = entity_rel.copy()
+            er["satisfies"] = person_mask
+            entity_ok = er.groupby(eid_col)["satisfies"].any()
+            unique = er[["household_id", eid_col]].drop_duplicates()
+            unique["entity_ok"] = unique[eid_col].map(entity_ok)
+            filtered = unique[unique["entity_ok"]]
+            counts = filtered.groupby("household_id")[eid_col].nunique()
+
+        return np.array(
+            [counts.get(hid, 0) for hid in household_ids],
+            dtype=np.float32,
+        )
+
+    # ---------------------------------------------------------------
+    # Clone simulation
+    # ---------------------------------------------------------------
+
+    def _simulate_clone(
+        self,
+        clone_state_fips: np.ndarray,
+        n_records: int,
+        variables: set,
+        sim_modifier=None,
+        clone_idx: int = 0,
+    ) -> Tuple[Dict[str, np.ndarray], object]:
+        """Simulate one clone with assigned geography.
+
+        Args:
+            clone_state_fips: State FIPS per record, shape
+                (n_records,).
+            n_records: Number of base records.
+            variables: Target variable names to compute.
+            sim_modifier: Optional callback(sim, clone_idx)
+                called after state_fips is set but before
+                cache clearing. Used for takeup
+                re-randomization.
+            clone_idx: Clone index passed to sim_modifier.
+
+        Returns:
+            (var_values, sim) where var_values maps variable
+            name to household-level float32 array.
+        """
+        from policyengine_us import Microsimulation
+
+        sim = Microsimulation(dataset=self.dataset_path)
+        sim.set_input(
+            "state_fips",
+            self.time_period,
+            clone_state_fips.astype(np.int32),
+        )
+        if sim_modifier is not None:
+            sim_modifier(sim, clone_idx)
+        for var in get_calculated_variables(sim):
+            sim.delete_arrays(var)
+
+        var_values: Dict[str, np.ndarray] = {}
+        for var in variables:
+            if var.endswith("_count"):
+                continue
+            try:
+                var_values[var] = sim.calculate(
+                    var,
+                    self.time_period,
+                    map_to="household",
+                ).values.astype(np.float32)
+            except Exception as exc:
+                logger.warning("Cannot calculate '%s': %s", var, exc)
+
+        return var_values, sim
+
+    # ---------------------------------------------------------------
+    # Main build method
+    # ---------------------------------------------------------------
+
+    def build_matrix(
+        self,
+        geography,
+        sim,
+        target_filter: Optional[dict] = None,
+        hierarchical_domains: Optional[List[str]] = None,
+        cache_dir: Optional[str] = None,
+        sim_modifier=None,
+    ) -> Tuple[pd.DataFrame, sparse.csr_matrix, List[str]]:
+        """Build sparse calibration matrix.
+
+        Two-phase build: (1) simulate each clone and save
+        COO entries to disk, (2) assemble CSR from caches.
+
+        Args:
+            geography: GeographyAssignment with state_fips,
+                cd_geoid, block_geoid arrays and n_records,
+                n_clones attributes.
+            sim: Microsimulation for parameters and entity
+                relationships.
+            target_filter: Dict for target_overview filtering.
+            hierarchical_domains: Domain names for
+                hierarchical uprating + CD reconciliation.
+            cache_dir: Directory for per-clone COO caches.
+                If None, COO data held in memory.
+            sim_modifier: Optional callback(sim, clone_idx)
+                called per clone after state_fips is set but
+                before cache clearing. Use for takeup
+                re-randomization.
+
+        Returns:
+            (targets_df, X_sparse, target_names)
+        """
+        n_records = geography.n_records
+        n_clones = geography.n_clones
+        n_total = n_records * n_clones
+        self._coo_parts = ([], [], [])
+
+        # 1. Query and uprate targets
+        targets_df = self._query_targets(target_filter or {})
+        if len(targets_df) == 0:
+            raise ValueError("No targets found matching filter")
+
+        params = sim.tax_benefit_system.parameters
+        uprating_factors = self._calculate_uprating_factors(params)
+        targets_df["original_value"] = targets_df["value"].copy()
+        targets_df["uprating_factor"] = targets_df.apply(
+            lambda row: self._get_uprating_info(
+                row["variable"],
+                row["period"],
+                uprating_factors,
+            )[0],
+            axis=1,
+        )
+        targets_df["value"] = (
+            targets_df["original_value"] * targets_df["uprating_factor"]
+        )
+
+        if hierarchical_domains:
+            targets_df = self._apply_hierarchical_uprating(
+                targets_df,
+                hierarchical_domains,
+                uprating_factors,
+            )
+
+        n_targets = len(targets_df)
+
+        # 2. Sort targets by geographic level
+        targets_df["_geo_level"] = targets_df["geographic_id"].apply(
+            get_geo_level
+        )
+        targets_df = targets_df.sort_values(
+            ["_geo_level", "variable", "geographic_id"]
+        )
+        targets_df = targets_df.drop(columns=["_geo_level"]).reset_index(
+            drop=True
+        )
+
+        # 3. Build column index structures from geography
+        state_col_lists: Dict[int, list] = defaultdict(list)
+        cd_col_lists: Dict[str, list] = defaultdict(list)
+        for col in range(n_total):
+            state_col_lists[int(geography.state_fips[col])].append(col)
+            cd_col_lists[str(geography.cd_geoid[col])].append(col)
+        state_to_cols = {s: np.array(c) for s, c in state_col_lists.items()}
+        cd_to_cols = {cd: np.array(c) for cd, c in cd_col_lists.items()}
+
+        # 4. Pre-process targets: resolve constraints
+        constraint_cache: Dict[int, List[dict]] = {}
+        target_geo_info: List[Tuple[str, str]] = []
+        target_names: List[str] = []
+        non_geo_constraints_list: List[List[dict]] = []
+
+        for _, row in targets_df.iterrows():
+            sid = int(row["stratum_id"])
+            if sid not in constraint_cache:
+                constraint_cache[sid] = self._get_stratum_constraints(sid)
+            constraints = constraint_cache[sid]
+
+            geo_level = row["geo_level"]
+            geo_id = row["geographic_id"]
+            target_geo_info.append((geo_level, geo_id))
+
+            non_geo = [
+                c for c in constraints if c["variable"] not in _GEO_VARS
+            ]
+            non_geo_constraints_list.append(non_geo)
+
+            target_names.append(
+                self._make_target_name(str(row["variable"]), constraints)
+            )
+
+        unique_variables = set(targets_df["variable"].values)
+
+        # 5. Clone loop
+        from pathlib import Path
+
+        clone_dir = Path(cache_dir) if cache_dir else None
+        if clone_dir:
+            clone_dir.mkdir(parents=True, exist_ok=True)
+
+        self._entity_rel_cache = None
+
+        for clone_idx in range(n_clones):
+            if clone_dir:
+                coo_path = clone_dir / f"clone_{clone_idx:04d}.npz"
+                if coo_path.exists():
+                    logger.info(
+                        "Clone %d/%d cached, skipping.",
+                        clone_idx + 1,
+                        n_clones,
+                    )
+                    continue
+
+            col_start = clone_idx * n_records
+            col_end = col_start + n_records
+            clone_states = geography.state_fips[col_start:col_end]
+
+            logger.info(
+                "Processing clone %d/%d " "(cols %d-%d, %d unique states)...",
+                clone_idx + 1,
+                n_clones,
+                col_start,
+                col_end - 1,
+                len(np.unique(clone_states)),
+            )
+
+            var_values, clone_sim = self._simulate_clone(
+                clone_states,
+                n_records,
+                unique_variables,
+                sim_modifier=sim_modifier,
+                clone_idx=clone_idx,
+            )
+
+            mask_cache: Dict[tuple, np.ndarray] = {}
+            count_cache: Dict[tuple, np.ndarray] = {}
+
+            rows_list: list = []
+            cols_list: list = []
+            vals_list: list = []
+
+            for row_idx in range(n_targets):
+                variable = str(targets_df.iloc[row_idx]["variable"])
+                geo_level, geo_id = target_geo_info[row_idx]
+                non_geo = non_geo_constraints_list[row_idx]
+
+                # Geographic column selection
+                if geo_level == "district":
+                    all_geo_cols = cd_to_cols.get(
+                        str(geo_id),
+                        np.array([], dtype=np.int64),
+                    )
+                elif geo_level == "state":
+                    all_geo_cols = state_to_cols.get(
+                        int(geo_id),
+                        np.array([], dtype=np.int64),
+                    )
+                else:
+                    all_geo_cols = np.arange(n_total)
+
+                clone_cols = all_geo_cols[
+                    (all_geo_cols >= col_start) & (all_geo_cols < col_end)
+                ]
+                if len(clone_cols) == 0:
+                    continue
+
+                rec_indices = clone_cols - col_start
+
+                constraint_key = tuple(
+                    sorted(
+                        (
+                            c["variable"],
+                            c["operation"],
+                            c["value"],
+                        )
+                        for c in non_geo
+                    )
+                )
+
+                if variable.endswith("_count"):
+                    vkey = (variable, constraint_key)
+                    if vkey not in count_cache:
+                        count_cache[vkey] = self._calculate_target_values(
+                            clone_sim,
+                            variable,
+                            non_geo,
+                            n_records,
+                        )
+                    values = count_cache[vkey]
+                else:
+                    if variable not in var_values:
+                        continue
+                    if constraint_key not in mask_cache:
+                        mask_cache[constraint_key] = (
+                            self._evaluate_constraints_entity_aware(
+                                clone_sim,
+                                non_geo,
+                                n_records,
+                            )
+                        )
+                    mask = mask_cache[constraint_key]
+                    values = var_values[variable] * mask
+
+                vals = values[rec_indices]
+                nonzero = vals != 0
+                if nonzero.any():
+                    rows_list.append(
+                        np.full(
+                            nonzero.sum(),
+                            row_idx,
+                            dtype=np.int32,
+                        )
+                    )
+                    cols_list.append(clone_cols[nonzero].astype(np.int32))
+                    vals_list.append(vals[nonzero])
+
+            # Save COO entries
+            if rows_list:
+                cr = np.concatenate(rows_list)
+                cc = np.concatenate(cols_list)
+                cv = np.concatenate(vals_list)
+            else:
+                cr = np.array([], dtype=np.int32)
+                cc = np.array([], dtype=np.int32)
+                cv = np.array([], dtype=np.float32)
+
+            if clone_dir:
+                np.savez_compressed(
+                    str(coo_path),
+                    rows=cr,
+                    cols=cc,
+                    vals=cv,
+                )
+                logger.info(
+                    "Clone %d: %d nonzero entries saved.",
+                    clone_idx + 1,
+                    len(cv),
+                )
+                del var_values, clone_sim
+            else:
+                self._coo_parts[0].append(cr)
+                self._coo_parts[1].append(cc)
+                self._coo_parts[2].append(cv)
+
+        # 6. Assemble sparse matrix from COO data
+        logger.info("Assembling matrix from %d clones...", n_clones)
+        if clone_dir:
+            all_r, all_c, all_v = [], [], []
+            for ci in range(n_clones):
+                p = clone_dir / f"clone_{ci:04d}.npz"
+                data = np.load(str(p))
+                all_r.append(data["rows"])
+                all_c.append(data["cols"])
+                all_v.append(data["vals"])
+            rows = np.concatenate(all_r)
+            cols = np.concatenate(all_c)
+            vals = np.concatenate(all_v)
+        else:
+            rows = np.concatenate(self._coo_parts[0])
+            cols = np.concatenate(self._coo_parts[1])
+            vals = np.concatenate(self._coo_parts[2])
+            del self._coo_parts
+
+        X_csr = sparse.csr_matrix(
+            (vals, (rows, cols)),
+            shape=(n_targets, n_total),
+            dtype=np.float32,
+        )
+
+        logger.info(
+            "Matrix: %d targets x %d cols, %d nnz",
+            X_csr.shape[0],
+            X_csr.shape[1],
+            X_csr.nnz,
+        )
+
+        return targets_df, X_csr, target_names
diff --git a/policyengine_us_data/datasets/cps/local_area_calibration/calibration_utils.py b/policyengine_us_data/datasets/cps/local_area_calibration/calibration_utils.py
index 3dcab0e9f..97c82360d 100644
--- a/policyengine_us_data/datasets/cps/local_area_calibration/calibration_utils.py
+++ b/policyengine_us_data/datasets/cps/local_area_calibration/calibration_utils.py
@@ -277,7 +277,7 @@ def apply_op(values: np.ndarray, op: str, val: str) -> np.ndarray:
     return np.ones(len(values), dtype=bool)
 
 
-def _get_geo_level(geo_id) -> int:
+def get_geo_level(geo_id) -> int:
     """Return geographic level: 0=National, 1=State, 2=District."""
     if geo_id == "US":
         return 0
@@ -324,9 +324,7 @@ def create_target_groups(
 
     # Add geo_level column for sorting
     targets_df = targets_df.copy()
-    targets_df["_geo_level"] = targets_df["geographic_id"].apply(
-        _get_geo_level
-    )
+    targets_df["_geo_level"] = targets_df["geographic_id"].apply(get_geo_level)
 
     geo_level_names = {0: "National", 1: "State", 2: "District"}
 
@@ -401,6 +399,70 @@ def create_target_groups(
     return target_groups, group_info
 
 
+_GEO_LEVEL_NAMES = {0: "National", 1: "State", 2: "District"}
+
+
+def drop_target_groups(
+    targets_df: pd.DataFrame,
+    X_sparse,
+    target_groups: np.ndarray,
+    group_info: List[str],
+    drop_specs: List[Tuple[str, str]],
+) -> Tuple[pd.DataFrame, "sparse.csr_matrix"]:
+    """Drop target groups by (label_substring, geo_level_name).
+
+    Args:
+        targets_df: Target metadata from build_matrix.
+        X_sparse: Sparse calibration matrix (n_targets x n_cols).
+        target_groups: Group ID per row from create_target_groups.
+        group_info: Group descriptions from create_target_groups.
+        drop_specs: List of (label_substring, geo_level_name)
+            tuples. geo_level_name is "National", "State", or
+            "District". label_substring is matched case-insensitive
+            against group descriptions.
+
+    Returns:
+        (filtered_targets_df, filtered_X_sparse)
+    """
+    geo_levels = targets_df["geographic_id"].apply(get_geo_level)
+    name_to_level = {v: k for k, v in _GEO_LEVEL_NAMES.items()}
+    drop_ids = set()
+
+    for label_substr, geo_name in drop_specs:
+        level = name_to_level[geo_name]
+        matched = False
+        for gid, info in enumerate(group_info):
+            group_mask = target_groups == gid
+            group_geo = geo_levels[group_mask]
+            if not (group_geo == level).all():
+                continue
+            if label_substr.lower() in info.lower():
+                drop_ids.add(gid)
+                matched = True
+        if not matched:
+            print(
+                f"  WARNING: no match for " f"({label_substr!r}, {geo_name!r})"
+            )
+
+    keep_mask = ~np.isin(target_groups, list(drop_ids))
+
+    print(f"Matrix before: {X_sparse.shape[0]} rows")
+    for gid in sorted(drop_ids):
+        n = (target_groups == gid).sum()
+        print(f"  DROPPING {group_info[gid]} ({n} rows)")
+    print()
+
+    kept_ids = sorted(set(range(len(group_info))) - drop_ids)
+    for gid in kept_ids:
+        n = (target_groups == gid).sum()
+        print(f"  KEEPING  {group_info[gid]} ({n} rows)")
+
+    X_out = X_sparse[keep_mask, :]
+    targets_out = targets_df[keep_mask].reset_index(drop=True)
+    print(f"\nMatrix after: {X_out.shape[0]} rows")
+    return targets_out, X_out
+
+
 def get_all_cds_from_database(db_uri: str) -> List[str]:
     """
     Get ordered list of all CD GEOIDs from database.
diff --git a/policyengine_us_data/datasets/cps/local_area_calibration/fit_calibration_weights.py b/policyengine_us_data/datasets/cps/local_area_calibration/fit_calibration_weights.py
deleted file mode 100644
index 7185c7dc1..000000000
--- a/policyengine_us_data/datasets/cps/local_area_calibration/fit_calibration_weights.py
+++ /dev/null
@@ -1,284 +0,0 @@
-"""
-Fit calibration weights using L0-regularized optimization.
-Prototype script for weight calibration using the l0-python package.
-"""
-
-import argparse
-import logging
-from datetime import datetime
-from pathlib import Path
-
-logging.basicConfig(
-    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
-)
-
-parser = argparse.ArgumentParser(description="Fit calibration weights")
-parser.add_argument(
-    "--device",
-    default="cpu",
-    choices=["cpu", "cuda"],
-    help="Device for training (cpu or cuda)",
-)
-parser.add_argument(
-    "--epochs", type=int, default=100, help="Total epochs for training"
-)
-parser.add_argument(
-    "--db-path",
-    default=None,
-    help="Path to policy_data.db (default: STORAGE_FOLDER/calibration/policy_data.db)",
-)
-parser.add_argument(
-    "--dataset-path", default=None, help="Path to stratified CPS h5 file"
-)
-args = parser.parse_args()
-
-import numpy as np
-import pandas as pd
-from policyengine_us import Microsimulation
-from policyengine_us_data.storage import STORAGE_FOLDER
-from policyengine_us_data.datasets.cps.local_area_calibration.sparse_matrix_builder import (
-    SparseMatrixBuilder,
-)
-from policyengine_us_data.datasets.cps.local_area_calibration.calibration_utils import (
-    get_all_cds_from_database,
-)
-from policyengine_us_data.datasets.cps.local_area_calibration.matrix_tracer import (
-    MatrixTracer,
-)
-
-try:
-    import torch
-    from l0.calibration import SparseCalibrationWeights
-except ImportError:
-    raise ImportError(
-        "l0-python is required for weight fitting. "
-        "Install with: pip install policyengine-us-data[l0]"
-    )
-
-# ============================================================================
-# CONFIGURATION
-# ============================================================================
-DEVICE = args.device
-TOTAL_EPOCHS = args.epochs
-EPOCHS_PER_CHUNK = 500  # TODO: need a better way to set this. Remember it can blow up the Vercel app
-
-# Groups to exclude from the matrix (by group ID from tracer output).
-# Set to [] to keep all groups. Review tracer.print_matrix_structure()
-# output to decide. E.g., drop state-level rows that are linearly
-# redundant with reconciled district rows — or keep them to steer
-# the optimizer.
-GROUPS_TO_EXCLUDE = [1]  # drop state SNAP HH counts (redundant with Group 4)
-
-# Hyperparameters
-BETA = 0.35
-GAMMA = -0.1
-ZETA = 1.1
-INIT_KEEP_PROB = 0.999
-LOG_WEIGHT_JITTER_SD = 0.05
-LOG_ALPHA_JITTER_SD = 0.01
-LAMBDA_L0 = 1e-8
-LAMBDA_L2 = 1e-12
-LEARNING_RATE = 0.15
-
-# Data paths
-if args.db_path:
-    db_path = Path(args.db_path)
-else:
-    db_path = STORAGE_FOLDER / "calibration" / "policy_data.db"
-db_uri = f"sqlite:///{db_path}"
-
-if args.dataset_path:
-    dataset_path = Path(args.dataset_path)
-else:
-    dataset_path = STORAGE_FOLDER / "stratified_extended_cps_2024.h5"
-
-output_dir = STORAGE_FOLDER / "calibration"
-output_dir.mkdir(parents=True, exist_ok=True)
-time_period = 2024
-
-# Get all CDs from database
-cds_to_calibrate = get_all_cds_from_database(db_uri)
-print(f"Found {len(cds_to_calibrate)} congressional districts")
-
-# ============================================================================
-# STEP 1: BUILD CALIBRATION MATRIX
-# ============================================================================
-print(f"Loading simulation from {dataset_path}...")
-sim = Microsimulation(dataset=str(dataset_path))
-n_households = len(sim.calculate("household_id", map_to="household").values)
-print(f"Loaded {n_households:,} households")
-
-print("\nBuilding sparse matrix...")
-builder = SparseMatrixBuilder(
-    db_uri=db_uri,
-    time_period=time_period,
-    cds_to_calibrate=cds_to_calibrate,
-    dataset_path=str(dataset_path),
-)
-
-targets_df, X_sparse, household_id_mapping = builder.build_matrix(
-    sim,
-    target_filter={
-        "domain_variables": ["aca_ptc", "snap"],
-    },
-    hierarchical_domains=["aca_ptc", "snap"],
-)
-
-builder.print_uprating_summary(targets_df)
-
-tracer = MatrixTracer(
-    targets_df, X_sparse, household_id_mapping, cds_to_calibrate, sim
-)
-tracer.print_matrix_structure()
-
-print(f"\nMatrix shape: {X_sparse.shape}")
-print(f"Targets: {len(targets_df)}")
-
-# ============================================================================
-# STEP 2: FILTER GROUPS AND ACHIEVABLE TARGETS
-# ============================================================================
-if GROUPS_TO_EXCLUDE:
-    keep_mask = ~np.isin(tracer.target_groups, GROUPS_TO_EXCLUDE)
-    n_dropped = (~keep_mask).sum()
-    print("\n" + "=" * 60)
-    print("GROUP EXCLUSION")
-    print("=" * 60)
-    print(
-        f"Excluding groups {GROUPS_TO_EXCLUDE}: "
-        f"dropping {n_dropped} of {len(targets_df)} rows"
-    )
-    targets_df = targets_df[keep_mask].reset_index(drop=True)
-    X_sparse = X_sparse[keep_mask, :]
-    print(f"Matrix after exclusion: {X_sparse.shape}")
-else:
-    print("\nNo groups excluded (GROUPS_TO_EXCLUDE is empty)")
-
-# Filter to achievable targets (rows with non-zero data)
-row_sums = np.array(X_sparse.sum(axis=1)).flatten()
-achievable_mask = row_sums > 0
-n_achievable = achievable_mask.sum()
-n_impossible = (~achievable_mask).sum()
-
-print(f"\nAchievable targets: {n_achievable}")
-print(f"Impossible targets (filtered out): {n_impossible}")
-
-targets_df = targets_df[achievable_mask].reset_index(drop=True)
-X_sparse = X_sparse[achievable_mask, :]
-
-print(f"Final matrix shape: {X_sparse.shape}")
-
-# Extract target vector and names
-targets = targets_df["value"].values
-target_names = [
-    f"{row['geographic_id']}/{row['variable']}"
-    for _, row in targets_df.iterrows()
-]
-
-# ============================================================================
-# STEP 3: INITIALIZE WEIGHTS
-# ============================================================================
-initial_weights = np.ones(X_sparse.shape[1]) * 100
-print(f"\nInitial weights shape: {initial_weights.shape}")
-print(f"Initial weights sum: {initial_weights.sum():,.0f}")
-
-# ============================================================================
-# STEP 4: CREATE MODEL
-# ============================================================================
-print("\nCreating SparseCalibrationWeights model...")
-model = SparseCalibrationWeights(
-    n_features=X_sparse.shape[1],
-    beta=BETA,
-    gamma=GAMMA,
-    zeta=ZETA,
-    init_keep_prob=INIT_KEEP_PROB,
-    init_weights=initial_weights,
-    log_weight_jitter_sd=LOG_WEIGHT_JITTER_SD,
-    log_alpha_jitter_sd=LOG_ALPHA_JITTER_SD,
-    device=DEVICE,
-)
-
-# ============================================================================
-# STEP 5: TRAIN IN CHUNKS
-# ============================================================================
-timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-calibration_log = pd.DataFrame()
-
-for chunk_start in range(0, TOTAL_EPOCHS, EPOCHS_PER_CHUNK):
-    chunk_epochs = min(EPOCHS_PER_CHUNK, TOTAL_EPOCHS - chunk_start)
-    current_epoch = chunk_start + chunk_epochs
-
-    print(f"\nTraining epochs {chunk_start + 1} to {current_epoch}...")
-
-    model.fit(
-        M=X_sparse,
-        y=targets,
-        target_groups=None,
-        lambda_l0=LAMBDA_L0,
-        lambda_l2=LAMBDA_L2,
-        lr=LEARNING_RATE,
-        epochs=chunk_epochs,
-        loss_type="relative",
-        verbose=True,
-        verbose_freq=chunk_epochs,
-    )
-
-    with torch.no_grad():
-        predictions = model.predict(X_sparse).cpu().numpy()
-
-    chunk_df = pd.DataFrame(
-        {
-            "target_name": target_names,
-            "estimate": predictions,
-            "target": targets,
-        }
-    )
-    chunk_df["epoch"] = current_epoch
-    chunk_df["error"] = chunk_df.estimate - chunk_df.target
-    chunk_df["rel_error"] = chunk_df.error / chunk_df.target
-    chunk_df["abs_error"] = chunk_df.error.abs()
-    chunk_df["rel_abs_error"] = chunk_df.rel_error.abs()
-    chunk_df["loss"] = chunk_df.rel_abs_error**2
-    calibration_log = pd.concat([calibration_log, chunk_df], ignore_index=True)
-
-# ============================================================================
-# STEP 6: EXTRACT AND SAVE WEIGHTS
-# ============================================================================
-with torch.no_grad():
-    w = model.get_weights(deterministic=True).cpu().numpy()
-
-print(f"\nFinal weights shape: {w.shape}")
-print(f"Final weights sum: {w.sum():,.0f}")
-print(f"Non-zero weights: {(w > 0).sum():,}")
-
-output_path = output_dir / f"calibration_weights_{timestamp}.npy"
-np.save(output_path, w)
-print(f"\nWeights saved to: {output_path}")
-print(f"OUTPUT_PATH:{output_path}")
-
-log_path = output_dir / f"calibration_log_{timestamp}.csv"
-calibration_log.to_csv(log_path, index=False)
-print(f"Calibration log saved to: {log_path}")
-print(f"LOG_PATH:{log_path}")
-
-# ============================================================================
-# STEP 7: VERIFY PREDICTIONS
-# ============================================================================
-print("\n" + "=" * 60)
-print("PREDICTION VERIFICATION")
-print("=" * 60)
-
-with torch.no_grad():
-    predictions = model.predict(X_sparse).cpu().numpy()
-
-for i in range(len(targets)):
-    rel_error = (predictions[i] - targets[i]) / targets[i] * 100
-    print(
-        f"{target_names[i][:50]:50} | "
-        f"pred: {predictions[i]:>12,.0f} | "
-        f"target: {targets[i]:>12,.0f} | "
-        f"err: {rel_error:>6.2f}%"
-    )
-
-print("\n" + "=" * 60)
-print("FITTING COMPLETED")
-print("=" * 60)
diff --git a/policyengine_us_data/datasets/cps/local_area_calibration/matrix_tracer.py b/policyengine_us_data/datasets/cps/local_area_calibration/matrix_tracer.py
deleted file mode 100644
index 4fbe6e78f..000000000
--- a/policyengine_us_data/datasets/cps/local_area_calibration/matrix_tracer.py
+++ /dev/null
@@ -1,382 +0,0 @@
-"""
-Matrix tracer utility for debugging geo-stacking sparse matrices.
-
-This utility allows tracing through the complex stacked matrix structure
-to verify values match simulation results.
-
-USAGE
-=====
-
-Basic Setup:
-
-    from matrix_tracer import MatrixTracer
-
-    tracer = MatrixTracer(
-        targets_df, X_sparse, household_id_mapping,
-        cds_to_calibrate, sim
-    )
-
-Common Operations:
-
-    # 1. Understand what a column represents
-    col_info = tracer.get_column_info(100)
-
-    # 2. Find where a household appears across all CDs
-    positions = tracer.get_household_column_positions(565)
-
-    # 3. View matrix structure
-    tracer.print_matrix_structure()
-
-Matrix Structure:
-
-    Columns are organized as: [CD1_households | CD2_households | ... | CD436_households]
-    Each CD block has n_households columns (e.g., 10,580 households)
-
-    Formula to find column index:
-        column_idx = cd_block_number * n_households + household_index
-"""
-
-import logging
-import pandas as pd
-import numpy as np
-from typing import Dict, List
-from scipy import sparse
-
-from policyengine_us_data.datasets.cps.local_area_calibration.calibration_utils import (
-    create_target_groups,
-)
-
-logger = logging.getLogger(__name__)
-
-
-class MatrixTracer:
-    """Trace through geo-stacked sparse matrices for debugging."""
-
-    def __init__(
-        self,
-        targets_df: pd.DataFrame,
-        matrix: sparse.csr_matrix,
-        household_id_mapping: Dict[str, List[str]],
-        geographic_ids: List[str],
-        sim,
-    ):
-        """
-        Initialize tracer with matrix components.
-
-        Args:
-            targets_df: DataFrame of all targets
-            matrix: The final stacked sparse matrix
-            household_id_mapping: Mapping from geo keys to household ID lists
-            geographic_ids: List of geographic IDs in order
-            sim: Microsimulation instance
-        """
-        self.targets_df = targets_df
-        self.matrix = matrix
-        self.household_id_mapping = household_id_mapping
-        self.geographic_ids = geographic_ids
-        self.sim = sim
-
-        # Get original household info
-        self.original_household_ids = sim.calculate("household_id").values
-        self.n_households = len(self.original_household_ids)
-        self.n_geographies = len(geographic_ids)
-
-        # Build reverse lookup: original_hh_id -> index in original data
-        self.hh_id_to_index = {
-            hh_id: idx for idx, hh_id in enumerate(self.original_household_ids)
-        }
-
-        # Build column catalog: maps column index -> (cd_geoid, household_id, household_index)
-        self.column_catalog = self._build_column_catalog()
-
-        # Build row catalog: maps row index -> target info
-        self.row_catalog = self._build_row_catalog()
-
-        logger.info(
-            f"Tracer initialized: {self.n_households} households x {self.n_geographies} geographies"
-        )
-        logger.info(f"Matrix shape: {matrix.shape}")
-
-    def _build_column_catalog(self) -> pd.DataFrame:
-        """Build a complete catalog of all matrix columns."""
-        catalog = []
-        col_idx = 0
-
-        for geo_id in self.geographic_ids:
-            for hh_idx, hh_id in enumerate(self.original_household_ids):
-                catalog.append(
-                    {
-                        "column_index": col_idx,
-                        "cd_geoid": geo_id,
-                        "household_id": hh_id,
-                        "household_index": hh_idx,
-                    }
-                )
-                col_idx += 1
-
-        return pd.DataFrame(catalog)
-
-    def _build_row_catalog(self) -> pd.DataFrame:
-        """Build a complete catalog of all matrix rows (targets)."""
-        catalog = []
-
-        for row_idx, (_, target) in enumerate(self.targets_df.iterrows()):
-            var_name = target["variable"]
-            var_desc = ""
-            if var_name in self.sim.tax_benefit_system.variables:
-                var_obj = self.sim.tax_benefit_system.variables[var_name]
-                var_desc = getattr(var_obj, "label", var_name)
-
-            catalog.append(
-                {
-                    "row_index": row_idx,
-                    "variable": var_name,
-                    "variable_desc": var_desc,
-                    "geographic_id": target.get("geographic_id", "unknown"),
-                    "target_value": target["value"],
-                    "stratum_id": target.get("stratum_id"),
-                    "domain_variable": target.get(
-                        "domain_variable", "unknown"
-                    ),
-                }
-            )
-
-        return pd.DataFrame(catalog)
-
-    def get_column_info(self, col_idx: int) -> Dict:
-        """Get information about a specific column."""
-        if col_idx >= len(self.column_catalog):
-            raise ValueError(
-                f"Column index {col_idx} out of range (max: {len(self.column_catalog)-1})"
-            )
-        return self.column_catalog.iloc[col_idx].to_dict()
-
-    def get_row_info(self, row_idx: int) -> Dict:
-        """Get information about a specific row (target)."""
-        if row_idx >= len(self.row_catalog):
-            raise ValueError(
-                f"Row index {row_idx} out of range (max: {len(self.row_catalog)-1})"
-            )
-        return self.row_catalog.iloc[row_idx].to_dict()
-
-    def lookup_matrix_cell(self, row_idx: int, col_idx: int) -> Dict:
-        """
-        Look up a specific matrix cell and return complete context.
-
-        Args:
-            row_idx: Row index in matrix
-            col_idx: Column index in matrix
-
-        Returns:
-            Dict with row info, column info, and matrix value
-        """
-        row_info = self.get_row_info(row_idx)
-        col_info = self.get_column_info(col_idx)
-        matrix_value = self.matrix[row_idx, col_idx]
-
-        return {
-            "row_index": row_idx,
-            "column_index": col_idx,
-            "matrix_value": float(matrix_value),
-            "target": row_info,
-            "household": col_info,
-        }
-
-    def get_household_column_positions(
-        self, original_hh_id: int
-    ) -> Dict[str, int]:
-        """
-        Get all column positions for a household across all geographies.
-
-        Args:
-            original_hh_id: Original household ID from simulation
-
-        Returns:
-            Dict mapping geo_id to column position in stacked matrix
-        """
-        if original_hh_id not in self.hh_id_to_index:
-            raise ValueError(
-                f"Household {original_hh_id} not found in original data"
-            )
-
-        # Get the household's index in the original data
-        hh_index = self.hh_id_to_index[original_hh_id]
-
-        # Calculate column positions for each geography
-        positions = {}
-        for geo_idx, geo_id in enumerate(self.geographic_ids):
-            # Each geography gets a block of n_households columns
-            col_position = geo_idx * self.n_households + hh_index
-            positions[geo_id] = col_position
-
-        return positions
-
-    def print_matrix_structure(self, show_groups=True):
-        """Print a comprehensive breakdown of the matrix structure."""
-        print("\n" + "=" * 80)
-        print("MATRIX STRUCTURE BREAKDOWN")
-        print("=" * 80)
-
-        print(
-            f"\nMatrix dimensions: {self.matrix.shape[0]} rows x "
-            f"{self.matrix.shape[1]} columns"
-        )
-        print(f"  Rows = {len(self.row_catalog)} targets")
-        print(
-            f"  Columns = {self.n_households} households x "
-            f"{self.n_geographies} CDs"
-        )
-        print(
-            f"           = {self.n_households:,} x {self.n_geographies} "
-            f"= {self.matrix.shape[1]:,}"
-        )
-
-        print("\n" + "-" * 80)
-        print("COLUMN STRUCTURE (Households stacked by CD)")
-        print("-" * 80)
-
-        # Build column ranges by CD
-        col_ranges = []
-        cumulative = 0
-        for geo_id in self.geographic_ids:
-            start_col = cumulative
-            end_col = cumulative + self.n_households - 1
-            col_ranges.append(
-                {
-                    "cd_geoid": geo_id,
-                    "start_col": start_col,
-                    "end_col": end_col,
-                    "n_households": self.n_households,
-                }
-            )
-            cumulative += self.n_households
-
-        ranges_df = pd.DataFrame(col_ranges)
-        print(f"\nShowing first and last 5 CDs of {len(ranges_df)} total:")
-        print("\nFirst 5 CDs:")
-        print(ranges_df.head(5).to_string(index=False))
-        print("\nLast 5 CDs:")
-        print(ranges_df.tail(5).to_string(index=False))
-
-        print("\n" + "-" * 80)
-        print("ROW STRUCTURE (Targets)")
-        print("-" * 80)
-
-        print(f"\nTotal targets: {len(self.row_catalog)}")
-
-        # Summarize by geographic level if column exists
-        if "geographic_level" in self.row_catalog.columns:
-            print("\nTargets by geographic level:")
-            geo_level_summary = (
-                self.row_catalog.groupby("geographic_level")
-                .size()
-                .reset_index(name="n_targets")
-            )
-            print(geo_level_summary.to_string(index=False))
-
-        print("\nTargets by domain variable:")
-        domain_summary = (
-            self.row_catalog.groupby("domain_variable")
-            .agg({"row_index": "count", "variable": lambda x: len(set(x))})
-            .rename(
-                columns={"row_index": "n_targets", "variable": "n_unique_vars"}
-            )
-        )
-        print(domain_summary.to_string())
-
-        # Create and display target groups with row indices
-        if show_groups:
-            print("\n" + "-" * 80)
-            print("TARGET GROUPS (for loss calculation)")
-            print("-" * 80)
-
-            target_groups, group_info = create_target_groups(self.targets_df)
-
-            # Store for later use
-            self.target_groups = target_groups
-
-            # Print each group with row indices
-            for group_id, info in enumerate(group_info):
-                group_mask = target_groups == group_id
-                row_indices = np.where(group_mask)[0]
-
-                # Format row indices for display
-                if len(row_indices) > 6:
-                    row_display = (
-                        f"[{row_indices[0]}, {row_indices[1]}, "
-                        f"{row_indices[2]}, ..., {row_indices[-2]}, "
-                        f"{row_indices[-1]}]"
-                    )
-                else:
-                    row_display = str(row_indices.tolist())
-
-                print(f"  {info} - rows {row_display}")
-
-        print("\n" + "=" * 80)
-
-    def print_column_catalog(self, max_rows: int = 50):
-        """Print a sample of the column catalog."""
-        print(
-            f"\nColumn Catalog (showing first {max_rows} of {len(self.column_catalog)}):"
-        )
-        print(self.column_catalog.head(max_rows).to_string(index=False))
-
-    def print_row_catalog(self, max_rows: int = 50):
-        """Print a sample of the row catalog."""
-        print(
-            f"\nRow Catalog (showing first {max_rows} of {len(self.row_catalog)}):"
-        )
-        print(self.row_catalog.head(max_rows).to_string(index=False))
-
-    def get_group_rows(self, group_id: int) -> pd.DataFrame:
-        """
-        Get all rows belonging to a specific target group.
-
-        Args:
-            group_id: The group ID to filter by
-
-        Returns:
-            DataFrame of row catalog entries for this group
-        """
-        if not hasattr(self, "target_groups"):
-            self.target_groups, self.group_info = create_target_groups(
-                self.targets_df
-            )
-
-        group_mask = self.target_groups == group_id
-        return self.row_catalog[group_mask].copy()
-
-    def trace_household_targets(self, original_hh_id: int) -> pd.DataFrame:
-        """
-        Extract all target values for a household across all geographies.
-
-        Args:
-            original_hh_id: Original household ID to trace
-
-        Returns:
-            DataFrame with target details and values for this household
-        """
-        positions = self.get_household_column_positions(original_hh_id)
-
-        results = []
-
-        for target_idx, (_, target) in enumerate(self.targets_df.iterrows()):
-            target_result = {
-                "target_idx": target_idx,
-                "variable": target["variable"],
-                "target_value": target["value"],
-                "geographic_id": target.get("geographic_id", "unknown"),
-                "domain_variable": target.get("domain_variable", "unknown"),
-            }
-
-            # Extract values for this target across all geographies
-            for geo_id, col_pos in positions.items():
-                if col_pos < self.matrix.shape[1]:
-                    matrix_value = self.matrix[target_idx, col_pos]
-                    target_result[f"matrix_value_{geo_id}"] = matrix_value
-                else:
-                    target_result[f"matrix_value_{geo_id}"] = np.nan
-
-            results.append(target_result)
-
-        return pd.DataFrame(results)
diff --git a/policyengine_us_data/datasets/cps/local_area_calibration/sparse_matrix_builder.py b/policyengine_us_data/datasets/cps/local_area_calibration/sparse_matrix_builder.py
deleted file mode 100644
index 74b2e2cee..000000000
--- a/policyengine_us_data/datasets/cps/local_area_calibration/sparse_matrix_builder.py
+++ /dev/null
@@ -1,838 +0,0 @@
-"""
-Sparse matrix builder for geo-stacking calibration.
-
-Generic, database-driven approach where all constraints (including geographic)
-are evaluated as masks. Geographic constraints work because we SET state_fips
-before evaluating constraints.
-"""
-
-import logging
-from collections import defaultdict
-from typing import Dict, List, Optional, Tuple
-import numpy as np
-import pandas as pd
-from scipy import sparse
-from sqlalchemy import create_engine, text
-
-logger = logging.getLogger(__name__)
-
-from policyengine_us_data.storage import STORAGE_FOLDER
-from policyengine_us_data.utils.census import STATE_NAME_TO_FIPS
-from policyengine_us_data.datasets.cps.local_area_calibration.calibration_utils import (
-    get_calculated_variables,
-    apply_op,
-    _get_geo_level,
-)
-
-
-class SparseMatrixBuilder:
-    """Build sparse calibration matrices for geo-stacking."""
-
-    def __init__(
-        self,
-        db_uri: str,
-        time_period: int,
-        cds_to_calibrate: List[str],
-        dataset_path: Optional[str] = None,
-    ):
-        self.db_uri = db_uri
-        self.engine = create_engine(db_uri)
-        self.time_period = time_period
-        self.cds_to_calibrate = cds_to_calibrate
-        self.dataset_path = dataset_path
-        self._entity_rel_cache = None
-
-    def _build_entity_relationship(self, sim) -> pd.DataFrame:
-        """
-        Build entity relationship DataFrame mapping persons to all entity IDs.
-
-        This is used to evaluate constraints at the person level and then
-        aggregate to household level, handling variables defined at different
-        entity levels (person, tax_unit, household, spm_unit).
-
-        Returns:
-            DataFrame with person_id, household_id, tax_unit_id, spm_unit_id
-        """
-        if self._entity_rel_cache is not None:
-            return self._entity_rel_cache
-
-        self._entity_rel_cache = pd.DataFrame(
-            {
-                "person_id": sim.calculate(
-                    "person_id", map_to="person"
-                ).values,
-                "household_id": sim.calculate(
-                    "household_id", map_to="person"
-                ).values,
-                "tax_unit_id": sim.calculate(
-                    "tax_unit_id", map_to="person"
-                ).values,
-                "spm_unit_id": sim.calculate(
-                    "spm_unit_id", map_to="person"
-                ).values,
-            }
-        )
-        return self._entity_rel_cache
-
-    def _evaluate_constraints_entity_aware(
-        self, state_sim, constraints: List[dict], n_households: int
-    ) -> np.ndarray:
-        """
-        Evaluate non-geographic constraints at person level, aggregate to
-        household level using .any().
-
-        This properly handles constraints on variables defined at different
-        entity levels (e.g., tax_unit_is_filer at tax_unit level). Instead of
-        summing values at household level (which would give 2, 3, etc. for
-        households with multiple tax units), we evaluate at person level and
-        use .any() aggregation ("does this household have at least one person
-        satisfying all constraints?").
-
-        Args:
-            state_sim: Microsimulation with state_fips set
-            constraints: List of constraint dicts with variable, operation,
-                value keys (geographic constraints should be pre-filtered)
-            n_households: Number of households
-
-        Returns:
-            Boolean mask array of length n_households
-        """
-        if not constraints:
-            return np.ones(n_households, dtype=bool)
-
-        entity_rel = self._build_entity_relationship(state_sim)
-        n_persons = len(entity_rel)
-
-        person_mask = np.ones(n_persons, dtype=bool)
-
-        for c in constraints:
-            var = c["variable"]
-            op = c["operation"]
-            val = c["value"]
-
-            # Calculate constraint variable at person level
-            constraint_values = state_sim.calculate(
-                var, self.time_period, map_to="person"
-            ).values
-
-            # Apply operation at person level
-            person_mask &= apply_op(constraint_values, op, val)
-
-        # Aggregate to household level using .any()
-        # "At least one person in this household satisfies ALL constraints"
-        entity_rel_with_mask = entity_rel.copy()
-        entity_rel_with_mask["satisfies"] = person_mask
-
-        household_mask_series = entity_rel_with_mask.groupby("household_id")[
-            "satisfies"
-        ].any()
-
-        # Ensure we return a mask aligned with household order
-        household_ids = state_sim.calculate(
-            "household_id", map_to="household"
-        ).values
-        household_mask = np.array(
-            [
-                household_mask_series.get(hh_id, False)
-                for hh_id in household_ids
-            ]
-        )
-
-        return household_mask
-
-    def _calculate_target_values_entity_aware(
-        self,
-        state_sim,
-        target_variable: str,
-        non_geo_constraints: List[dict],
-        geo_mask: np.ndarray,
-        n_households: int,
-    ) -> np.ndarray:
-        """
-        Calculate target values at household level, handling count targets.
-
-        For count targets (*_count): Count entities per household satisfying
-            constraints
-        For value targets: Sum values at household level (existing behavior)
-
-        Args:
-            state_sim: Microsimulation with state_fips set
-            target_variable: The target variable name (e.g., "snap",
-                "person_count")
-            non_geo_constraints: List of constraint dicts (geographic
-                constraints should be pre-filtered)
-            geo_mask: Boolean mask array for geographic filtering (household
-                level)
-            n_households: Number of households
-
-        Returns:
-            Float array of target values at household level
-        """
-        is_count_target = target_variable.endswith("_count")
-
-        if not is_count_target:
-            # Value target: use existing entity-aware constraint evaluation
-            entity_mask = self._evaluate_constraints_entity_aware(
-                state_sim, non_geo_constraints, n_households
-            )
-            mask = geo_mask & entity_mask
-
-            target_values = state_sim.calculate(
-                target_variable, map_to="household"
-            ).values
-            return (target_values * mask).astype(np.float32)
-
-        # Count target: need to count entities satisfying constraints
-        entity_rel = self._build_entity_relationship(state_sim)
-        n_persons = len(entity_rel)
-
-        # Evaluate constraints at person level (don't aggregate to HH yet)
-        person_mask = np.ones(n_persons, dtype=bool)
-        for c in non_geo_constraints:
-            constraint_values = state_sim.calculate(
-                c["variable"], map_to="person"
-            ).values
-            person_mask &= apply_op(
-                constraint_values, c["operation"], c["value"]
-            )
-
-        # Get target entity from variable definition
-        target_entity = state_sim.tax_benefit_system.variables[
-            target_variable
-        ].entity.key
-
-        household_ids = state_sim.calculate(
-            "household_id", map_to="household"
-        ).values
-        geo_mask_map = dict(zip(household_ids, geo_mask))
-
-        if target_entity == "household":
-            # household_count: 1 per qualifying household
-            if non_geo_constraints:
-                entity_mask = self._evaluate_constraints_entity_aware(
-                    state_sim, non_geo_constraints, n_households
-                )
-                return (geo_mask & entity_mask).astype(np.float32)
-            return geo_mask.astype(np.float32)
-
-        if target_entity == "person":
-            # Count persons satisfying constraints per household
-            entity_rel["satisfies"] = person_mask
-            entity_rel["geo_ok"] = entity_rel["household_id"].map(geo_mask_map)
-            filtered = entity_rel[
-                entity_rel["satisfies"] & entity_rel["geo_ok"]
-            ]
-            counts = filtered.groupby("household_id")["person_id"].nunique()
-        else:
-            # For tax_unit, spm_unit: aggregate person mask to entity, then
-            # count
-            entity_id_col = f"{target_entity}_id"
-            entity_rel["satisfies"] = person_mask
-            entity_satisfies = entity_rel.groupby(entity_id_col)[
-                "satisfies"
-            ].any()
-
-            entity_rel_unique = entity_rel[
-                ["household_id", entity_id_col]
-            ].drop_duplicates()
-            entity_rel_unique["entity_ok"] = entity_rel_unique[
-                entity_id_col
-            ].map(entity_satisfies)
-            entity_rel_unique["geo_ok"] = entity_rel_unique[
-                "household_id"
-            ].map(geo_mask_map)
-            filtered = entity_rel_unique[
-                entity_rel_unique["entity_ok"] & entity_rel_unique["geo_ok"]
-            ]
-            counts = filtered.groupby("household_id")[entity_id_col].nunique()
-
-        # Build result aligned with household order
-        return np.array(
-            [counts.get(hh_id, 0) for hh_id in household_ids], dtype=np.float32
-        )
-
-    def _query_targets(self, target_filter: dict) -> pd.DataFrame:
-        """Query targets via target_overview view.
-
-        Best period: most recent period <= self.time_period, or closest
-        future period if none exists.
-
-        Returns DataFrame with geo_level, geographic_id, and
-        domain_variable columns.
-
-        Supports filters: domain_variables, variables, target_ids,
-        stratum_ids.
-        """
-        or_conditions = []
-
-        if "domain_variables" in target_filter:
-            dvs = target_filter["domain_variables"]
-            placeholders = ",".join(f"'{dv}'" for dv in dvs)
-            or_conditions.append(f"tv.domain_variable IN ({placeholders})")
-
-        if "variables" in target_filter:
-            vars_str = ",".join(f"'{v}'" for v in target_filter["variables"])
-            or_conditions.append(f"tv.variable IN ({vars_str})")
-
-        if "target_ids" in target_filter:
-            ids = ",".join(map(str, target_filter["target_ids"]))
-            or_conditions.append(f"tv.target_id IN ({ids})")
-
-        if "stratum_ids" in target_filter:
-            ids = ",".join(map(str, target_filter["stratum_ids"]))
-            or_conditions.append(f"tv.stratum_id IN ({ids})")
-
-        if not or_conditions:
-            where_clause = "1=1"
-        else:
-            where_clause = " OR ".join(f"({c})" for c in or_conditions)
-
-        query = f"""
-        WITH filtered_targets AS (
-            SELECT tv.target_id, tv.stratum_id, tv.variable, tv.value,
-                   tv.period, tv.geo_level, tv.geographic_id,
-                   tv.domain_variable
-            FROM target_overview tv
-            WHERE {where_clause}
-        ),
-        best_periods AS (
-            SELECT stratum_id, variable,
-                CASE
-                    WHEN MAX(CASE WHEN period <= :time_period
-                             THEN period END) IS NOT NULL
-                    THEN MAX(CASE WHEN period <= :time_period
-                             THEN period END)
-                    ELSE MIN(period)
-                END as best_period
-            FROM filtered_targets
-            GROUP BY stratum_id, variable
-        )
-        SELECT ft.*
-        FROM filtered_targets ft
-        JOIN best_periods bp
-            ON ft.stratum_id = bp.stratum_id
-            AND ft.variable = bp.variable
-            AND ft.period = bp.best_period
-        ORDER BY ft.target_id
-        """
-
-        with self.engine.connect() as conn:
-            return pd.read_sql(
-                query, conn, params={"time_period": self.time_period}
-            )
-
-    def _get_constraints(self, stratum_id: int) -> List[dict]:
-        """Get all constraints for a stratum (including geographic)."""
-        query = """
-        SELECT constraint_variable as variable, operation, value
-        FROM stratum_constraints
-        WHERE stratum_id = :stratum_id
-        """
-        with self.engine.connect() as conn:
-            df = pd.read_sql(query, conn, params={"stratum_id": stratum_id})
-        return df.to_dict("records")
-
-    def _get_geographic_id(self, stratum_id: int) -> str:
-        """Extract geographic_id from constraints for targets_df."""
-        constraints = self._get_constraints(stratum_id)
-        for c in constraints:
-            if c["variable"] == "state_fips":
-                return c["value"]
-            if c["variable"] == "congressional_district_geoid":
-                return c["value"]
-        return "US"
-
-    def _calculate_uprating_factors(self, params) -> dict:
-        """Calculate CPI and population uprating factors for all periods."""
-        factors = {}
-
-        query = "SELECT DISTINCT period FROM targets WHERE period IS NOT NULL ORDER BY period"
-        with self.engine.connect() as conn:
-            result = conn.execute(text(query))
-            years_needed = [row[0] for row in result]
-
-        logger.info(
-            f"Calculating uprating factors for years "
-            f"{years_needed} to {self.time_period}"
-        )
-
-        for from_year in years_needed:
-            if from_year == self.time_period:
-                factors[(from_year, "cpi")] = 1.0
-                factors[(from_year, "pop")] = 1.0
-                continue
-
-            try:
-                cpi_from = params.gov.bls.cpi.cpi_u(from_year)
-                cpi_to = params.gov.bls.cpi.cpi_u(self.time_period)
-                factors[(from_year, "cpi")] = float(cpi_to / cpi_from)
-            except Exception as e:
-                logger.warning(
-                    f"Could not calculate CPI factor for " f"{from_year}: {e}"
-                )
-                factors[(from_year, "cpi")] = 1.0
-
-            try:
-                pop_from = params.calibration.gov.census.populations.total(
-                    from_year
-                )
-                pop_to = params.calibration.gov.census.populations.total(
-                    self.time_period
-                )
-                factors[(from_year, "pop")] = float(pop_to / pop_from)
-            except Exception as e:
-                logger.warning(
-                    f"Could not calculate population factor for "
-                    f"{from_year}: {e}"
-                )
-                factors[(from_year, "pop")] = 1.0
-
-        for (year, type_), factor in sorted(factors.items()):
-            if factor != 1.0:
-                logger.info(
-                    f"  {year} -> {self.time_period} "
-                    f"({type_}): {factor:.4f}"
-                )
-
-        return factors
-
-    def _get_uprating_info(
-        self,
-        variable: str,
-        period: int,
-        factors: dict,
-    ) -> Tuple[float, str]:
-        """Get uprating factor and type for a variable at a given period."""
-        if period == self.time_period:
-            return 1.0, "none"
-
-        count_indicators = [
-            "count",
-            "person",
-            "people",
-            "households",
-            "tax_units",
-        ]
-        is_count = any(ind in variable.lower() for ind in count_indicators)
-        uprating_type = "pop" if is_count else "cpi"
-
-        factor = factors.get((period, uprating_type), 1.0)
-        return factor, uprating_type
-
-    def _load_aca_ptc_factors(self) -> Dict[int, Dict[str, float]]:
-        """Load state-level ACA PTC uprating factors from CSV.
-
-        Returns:
-            {state_fips_int: {"tax_unit_count": vol_mult,
-                              "aca_ptc": vol_mult * val_mult}}
-        """
-        csv_path = STORAGE_FOLDER / "aca_ptc_multipliers_2022_2024.csv"
-        df = pd.read_csv(csv_path)
-        result = {}
-        for _, row in df.iterrows():
-            fips_str = STATE_NAME_TO_FIPS.get(row["state"])
-            if fips_str is None:
-                continue
-            fips_int = int(fips_str)
-            result[fips_int] = {
-                "tax_unit_count": row["vol_mult"],
-                "aca_ptc": row["vol_mult"] * row["val_mult"],
-            }
-        return result
-
-    def _get_state_uprating_factors(
-        self,
-        domain: str,
-        targets_df: pd.DataFrame,
-        national_factors: dict,
-    ) -> Dict[int, Dict[str, float]]:
-        """Get per-state uprating factors for a hierarchical domain.
-
-        For aca_ptc: loads real state-level enrollment/APTC factors
-        from CSV. For other domains: returns uniform national CPI/pop
-        factors.
-
-        Returns:
-            {state_fips: {variable: factor}} for each state in the
-            domain's state-level targets.
-        """
-        state_rows = targets_df[
-            (targets_df["domain_variable"] == domain)
-            & (targets_df["geo_level"] == "state")
-        ]
-        state_fips_list = state_rows["geographic_id"].unique()
-        variables = state_rows["variable"].unique()
-
-        if domain == "aca_ptc":
-            csv_factors = self._load_aca_ptc_factors()
-            logger.info(
-                f"  [{domain}] Using CSV state-level factors "
-                f"({len(csv_factors)} states)"
-            )
-        else:
-            csv_factors = None
-            logger.info(f"  [{domain}] Using national CPI/pop factors")
-
-        result = {}
-        n_csv = 0
-        n_fallback = 0
-        for sf in state_fips_list:
-            state_int = int(sf)
-            var_factors = {}
-
-            if csv_factors and state_int in csv_factors:
-                n_csv += 1
-                for var in variables:
-                    var_factors[var] = csv_factors[state_int].get(var, 1.0)
-            else:
-                n_fallback += 1
-                for var in variables:
-                    row = state_rows[
-                        (state_rows["geographic_id"] == sf)
-                        & (state_rows["variable"] == var)
-                    ]
-                    if row.empty:
-                        var_factors[var] = 1.0
-                        continue
-                    period = row.iloc[0]["period"]
-                    factor, _ = self._get_uprating_info(
-                        var, period, national_factors
-                    )
-                    var_factors[var] = factor
-
-            result[state_int] = var_factors
-
-        if csv_factors:
-            all_factors = [f for vf in result.values() for f in vf.values()]
-            logger.info(
-                f"    {n_csv} states from CSV, "
-                f"{n_fallback} national fallback"
-            )
-            for var in variables:
-                vf = [result[s][var] for s in result]
-                logger.info(f"    {var}: [{min(vf):.4f}, {max(vf):.4f}]")
-
-        return result
-
-    def _apply_hierarchical_uprating(
-        self,
-        targets_df: pd.DataFrame,
-        hierarchical_domains: List[str],
-        national_factors: dict,
-    ) -> pd.DataFrame:
-        """Apply state-level uprating and reconcile CDs to state totals.
-
-        Two separable factors per CD row:
-        - hif (hierarchy inconsistency factor): base-year correction
-          so that sum(CDs) == state total in the source data.
-          hif = state_original / sum(cd_originals). Pure geometry,
-          no time dimension.
-        - uprating_factor: state-specific (or national fallback)
-          scaling from base year to target year. Pure time, no
-          geography correction.
-
-        Final CD value = original_value * hif * uprating_factor.
-
-        Also drops national/state rows used for reconciliation
-        (keeps rows like CMS person_count at period == time_period).
-        """
-        df = targets_df.copy()
-        df["hif"] = np.nan
-        df["state_uprating_factor"] = np.nan
-
-        rows_to_drop = []
-
-        for domain in hierarchical_domains:
-            domain_mask = df["domain_variable"] == domain
-
-            state_factors = self._get_state_uprating_factors(
-                domain, df, national_factors
-            )
-
-            state_mask = domain_mask & (df["geo_level"] == "state")
-            district_mask = domain_mask & (df["geo_level"] == "district")
-
-            for sf, var_factors in state_factors.items():
-                for var, uf in var_factors.items():
-                    state_row = df[
-                        state_mask
-                        & (df["geographic_id"] == str(sf))
-                        & (df["variable"] == var)
-                    ]
-                    if state_row.empty:
-                        continue
-                    state_original = state_row.iloc[0]["original_value"]
-
-                    def _cd_in_state(g, s=sf):
-                        try:
-                            return int(g) // 100 == s
-                        except (ValueError, TypeError):
-                            return False
-
-                    cd_mask = (
-                        district_mask
-                        & (df["variable"] == var)
-                        & df["geographic_id"].apply(_cd_in_state)
-                    )
-                    cd_rows = df[cd_mask]
-                    if cd_rows.empty:
-                        continue
-
-                    cd_original_sum = cd_rows["original_value"].sum()
-                    if cd_original_sum == 0:
-                        continue
-
-                    hif = state_original / cd_original_sum
-
-                    for cd_idx in cd_rows.index:
-                        df.at[cd_idx, "hif"] = hif
-                        df.at[cd_idx, "state_uprating_factor"] = uf
-                        df.at[cd_idx, "value"] = (
-                            df.at[cd_idx, "original_value"] * hif * uf
-                        )
-
-            # Log HIF and UF summary for this domain
-            cd_domain = df[district_mask & df["hif"].notna()]
-            if not cd_domain.empty:
-                for var in cd_domain["variable"].unique():
-                    vrows = cd_domain[cd_domain["variable"] == var]
-                    hifs = vrows["hif"]
-                    ufs = vrows["state_uprating_factor"]
-                    logger.info(
-                        f"  [{domain}] {var}: "
-                        f"{len(vrows)} CDs, "
-                        f"HIF=[{hifs.min():.4f}, {hifs.max():.4f}], "
-                        f"UF=[{ufs.min():.4f}, {ufs.max():.4f}]"
-                    )
-
-            # Drop national/state rows used for reconciliation
-            # Keep rows like CMS person_count (period == time_period)
-            national_mask = domain_mask & (df["geo_level"] == "national")
-            for idx in df[national_mask | state_mask].index:
-                row = df.loc[idx]
-                if row["period"] != self.time_period:
-                    rows_to_drop.append(idx)
-
-        if rows_to_drop:
-            dropped = df.loc[rows_to_drop]
-            logger.info(
-                f"Hierarchical uprating: dropping "
-                f"{len(rows_to_drop)} national/state rows "
-                f"(used only for reconciliation)"
-            )
-            for domain in hierarchical_domains:
-                d = dropped[dropped["domain_variable"] == domain]
-                if d.empty:
-                    continue
-                by_level = d["geo_level"].value_counts().to_dict()
-                parts = [f"{n} {lvl}" for lvl, n in sorted(by_level.items())]
-                logger.info(f"    {domain}: {', '.join(parts)}")
-            df = df.drop(index=rows_to_drop).reset_index(drop=True)
-
-        df["target_period"] = self.time_period
-
-        return df
-
-    def print_uprating_summary(self, targets_df: pd.DataFrame) -> None:
-        """Print summary of uprating applied to targets."""
-        has_state_uf = "state_uprating_factor" in targets_df.columns
-
-        # Effective factor: use state_uprating_factor where set,
-        # otherwise fall back to uprating_factor
-        if has_state_uf:
-            eff = targets_df["state_uprating_factor"].fillna(
-                targets_df["uprating_factor"]
-            )
-        else:
-            eff = targets_df["uprating_factor"]
-
-        uprated = targets_df[eff != 1.0]
-        if len(uprated) == 0:
-            print("No targets were uprated.")
-            return
-
-        print("\n" + "=" * 60)
-        print("UPRATING SUMMARY")
-        print("=" * 60)
-        print(f"Uprated {len(uprated)} of {len(targets_df)} targets")
-
-        period_counts = uprated["period"].value_counts().sort_index()
-        for period, count in period_counts.items():
-            print(f"  Period {period}: {count} targets")
-
-        factors = eff[eff != 1.0]
-        print(
-            f"  Factor range: [{factors.min():.4f}, " f"{factors.max():.4f}]"
-        )
-
-    def _create_state_sim(self, state: int, n_households: int):
-        """Create a fresh simulation with state_fips set to given state."""
-        from policyengine_us import Microsimulation
-
-        state_sim = Microsimulation(dataset=self.dataset_path)
-        state_sim.set_input(
-            "state_fips",
-            self.time_period,
-            np.full(n_households, state, dtype=np.int32),
-        )
-        for var in get_calculated_variables(state_sim):
-            state_sim.delete_arrays(var)
-        return state_sim
-
-    def build_matrix(
-        self,
-        sim,
-        target_filter: dict,
-        hierarchical_domains: Optional[List[str]] = None,
-    ) -> Tuple[pd.DataFrame, sparse.csr_matrix, Dict[str, List[str]]]:
-        """
-        Build sparse calibration matrix.
-
-        Args:
-            sim: Microsimulation instance (used for household_ids, or
-                as template)
-            target_filter: Dict specifying which targets to include
-                - {"domain_variables": ["aca_ptc"]} via target_overview
-                - {"target_ids": [123, 456]} for specific targets
-                - an empty dict {} will fetch all targets
-            hierarchical_domains: Optional list of domain_variable
-                names for state-level uprating + CD reconciliation.
-                Requires domain_variables in target_filter.
-
-        Returns:
-            Tuple of (targets_df, X_sparse, household_id_mapping)
-        """
-        household_ids = sim.calculate(
-            "household_id", map_to="household"
-        ).values
-        n_households = len(household_ids)
-        n_cds = len(self.cds_to_calibrate)
-        n_cols = n_households * n_cds
-
-        targets_df = self._query_targets(target_filter)
-
-        if len(targets_df) == 0:
-            raise ValueError("No targets found matching filter")
-
-        # Uprate targets from their original period to self.time_period
-        params = sim.tax_benefit_system.parameters
-        uprating_factors = self._calculate_uprating_factors(params)
-        targets_df["original_value"] = targets_df["value"].copy()
-        targets_df["uprating_factor"] = targets_df.apply(
-            lambda row: self._get_uprating_info(
-                row["variable"], row["period"], uprating_factors
-            )[0],
-            axis=1,
-        )
-        targets_df["value"] = (
-            targets_df["original_value"] * targets_df["uprating_factor"]
-        )
-
-        # Hierarchical uprating: state-level uprating + CD reconciliation
-        if hierarchical_domains:
-            targets_df = self._apply_hierarchical_uprating(
-                targets_df, hierarchical_domains, uprating_factors
-            )
-
-        n_targets = len(targets_df)
-
-        # Sort by (geo_level, variable, geographic_id) for contiguous group
-        targets_df["_geo_level"] = targets_df["geographic_id"].apply(
-            _get_geo_level
-        )
-        targets_df = targets_df.sort_values(
-            ["_geo_level", "variable", "geographic_id"]
-        )
-        targets_df = targets_df.drop(columns=["_geo_level"]).reset_index(
-            drop=True
-        )
-
-        X = sparse.lil_matrix((n_targets, n_cols), dtype=np.float32)
-
-        # Group CDs by state. CD GEOIDs follow format SSCCC where SS is state
-        # FIPS (2 digits) and CCC is CD number (2-3 digits), so state = CD // 100
-        cds_by_state = defaultdict(list)
-        for cd_idx, cd in enumerate(self.cds_to_calibrate):
-            state = int(cd) // 100
-            cds_by_state[state].append((cd_idx, cd))
-
-        for state, cd_list in cds_by_state.items():
-            # Clear entity relationship cache when creating new simulation
-            self._entity_rel_cache = None
-
-            if self.dataset_path:
-                state_sim = self._create_state_sim(state, n_households)
-            else:
-                state_sim = sim
-                state_sim.set_input(
-                    "state_fips",
-                    self.time_period,
-                    np.full(n_households, state, dtype=np.int32),
-                )
-                for var in get_calculated_variables(state_sim):
-                    state_sim.delete_arrays(var)
-
-            for cd_idx, cd in cd_list:
-                col_start = cd_idx * n_households
-
-                for row_idx, (_, target) in enumerate(targets_df.iterrows()):
-                    constraints = self._get_constraints(target["stratum_id"])
-
-                    geo_constraints = []
-                    non_geo_constraints = []
-                    for c in constraints:
-                        if c["variable"] in (
-                            "state_fips",
-                            "congressional_district_geoid",
-                        ):
-                            geo_constraints.append(c)
-                        else:
-                            non_geo_constraints.append(c)
-
-                    # Check geographic constraints first (quick fail)
-                    geo_mask = np.ones(n_households, dtype=bool)
-                    for c in geo_constraints:
-                        if c["variable"] == "congressional_district_geoid":
-                            if (
-                                c["operation"] in ("==", "=")
-                                and c["value"] != cd
-                            ):
-                                geo_mask[:] = False
-                        elif c["variable"] == "state_fips":
-                            if (
-                                c["operation"] in ("==", "=")
-                                and int(c["value"]) != state
-                            ):
-                                geo_mask[:] = False
-
-                    if not geo_mask.any():
-                        continue
-
-                    # Calculate target values with entity-aware handling
-                    # This properly handles count targets (*_count) by counting
-                    # entities rather than summing values
-                    masked_values = self._calculate_target_values_entity_aware(
-                        state_sim,
-                        target["variable"],
-                        non_geo_constraints,
-                        geo_mask,
-                        n_households,
-                    )
-
-                    if not masked_values.any():
-                        continue
-
-                    nonzero = np.where(masked_values != 0)[0]
-                    if len(nonzero) > 0:
-                        X[row_idx, col_start + nonzero] = masked_values[
-                            nonzero
-                        ]
-
-        household_id_mapping = {}
-        for cd in self.cds_to_calibrate:
-            key = f"cd{cd}"
-            household_id_mapping[key] = [
-                f"{hh_id}_{key}" for hh_id in household_ids
-            ]
-
-        return targets_df, X.tocsr(), household_id_mapping
diff --git a/policyengine_us_data/parameters/take_up/voluntary_filing.yaml b/policyengine_us_data/parameters/take_up/voluntary_filing.yaml
new file mode 100644
index 000000000..46d23e504
--- /dev/null
+++ b/policyengine_us_data/parameters/take_up/voluntary_filing.yaml
@@ -0,0 +1,6 @@
+description: Percentage of tax units (not taking up EITC) who file taxes voluntarily.
+metadata:
+  label: Voluntary filing rate
+  unit: /1
+values:
+  2018-01-01: 0.05
diff --git a/policyengine_us_data/tests/test_calibration/__init__.py b/policyengine_us_data/tests/test_calibration/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/policyengine_us_data/tests/test_calibration/test_build_matrix_masking.py b/policyengine_us_data/tests/test_calibration/test_build_matrix_masking.py
new file mode 100644
index 000000000..8db56ddcb
--- /dev/null
+++ b/policyengine_us_data/tests/test_calibration/test_build_matrix_masking.py
@@ -0,0 +1,207 @@
+"""Integration test for build_matrix geographic masking.
+
+Traces one household through the matrix with 2 clones, verifying:
+- National targets: both clones can contribute (non-zero)
+- State targets: only the clone assigned to that state contributes
+- CD targets: only the clone assigned to that CD contributes;
+  a different CD in the same state gets zero
+"""
+
+import os
+
+import numpy as np
+import pytest
+from scipy import sparse
+
+from policyengine_us_data.storage import STORAGE_FOLDER
+
+DATASET_PATH = str(STORAGE_FOLDER / "stratified_extended_cps_2024.h5")
+DB_PATH = str(STORAGE_FOLDER / "calibration" / "policy_data.db")
+DB_URI = f"sqlite:///{DB_PATH}"
+
+N_CLONES = 2
+SEED = 42
+RECORD_IDX = 8629  # High SNAP ($18k), lands in TX/PA with seed=42
+
+
+def _data_available():
+    return os.path.exists(DATASET_PATH) and os.path.exists(DB_PATH)
+
+
+@pytest.fixture(scope="module")
+def matrix_result():
+    if not _data_available():
+        pytest.skip("Calibration data not available")
+
+    from policyengine_us import Microsimulation
+    from policyengine_us_data.calibration.clone_and_assign import (
+        assign_random_geography,
+    )
+    from policyengine_us_data.calibration.unified_matrix_builder import (
+        UnifiedMatrixBuilder,
+    )
+
+    sim = Microsimulation(dataset=DATASET_PATH)
+    n_records = sim.calculate("household_id").values.shape[0]
+    geography = assign_random_geography(
+        n_records, n_clones=N_CLONES, seed=SEED
+    )
+    builder = UnifiedMatrixBuilder(
+        db_uri=DB_URI,
+        time_period=2024,
+        dataset_path=DATASET_PATH,
+    )
+    targets_df, X_sparse, target_names = builder.build_matrix(
+        geography=geography,
+        sim=sim,
+        target_filter={"domain_variables": ["snap", "medicaid"]},
+    )
+    return {
+        "geography": geography,
+        "targets_df": targets_df,
+        "X": X_sparse,
+        "target_names": target_names,
+        "n_records": n_records,
+    }
+
+
+def _clone_col(n_records, clone_idx, record_idx):
+    return clone_idx * n_records + record_idx
+
+
+class TestMatrixShape:
+    def test_columns_equal_clones_times_records(self, matrix_result):
+        X = matrix_result["X"]
+        n_records = matrix_result["n_records"]
+        assert X.shape[1] == N_CLONES * n_records
+
+    def test_rows_equal_targets(self, matrix_result):
+        X = matrix_result["X"]
+        assert X.shape[0] == len(matrix_result["targets_df"])
+
+    def test_matrix_is_sparse(self, matrix_result):
+        X = matrix_result["X"]
+        density = X.nnz / (X.shape[0] * X.shape[1])
+        assert density < 0.1
+
+
+class TestNationalMasking:
+    def test_both_clones_visible_to_national_target(self, matrix_result):
+        X = matrix_result["X"]
+        targets_df = matrix_result["targets_df"]
+        n_records = matrix_result["n_records"]
+
+        national_rows = targets_df[targets_df["geo_level"] == "national"].index
+        assert len(national_rows) > 0
+
+        col_0 = _clone_col(n_records, 0, RECORD_IDX)
+        col_1 = _clone_col(n_records, 1, RECORD_IDX)
+        X_csc = X.tocsc()
+
+        visible_0 = X_csc[:, col_0].toarray().ravel()
+        visible_1 = X_csc[:, col_1].toarray().ravel()
+
+        for row_idx in national_rows:
+            if visible_0[row_idx] != 0 or visible_1[row_idx] != 0:
+                return
+        pytest.fail(
+            "Household has zero value for all national targets "
+            "in both clones — cannot verify masking"
+        )
+
+
+class TestStateMasking:
+    def test_clone_visible_only_to_own_state(self, matrix_result):
+        X = matrix_result["X"]
+        targets_df = matrix_result["targets_df"]
+        geography = matrix_result["geography"]
+        n_records = matrix_result["n_records"]
+
+        col_0 = _clone_col(n_records, 0, RECORD_IDX)
+        col_1 = _clone_col(n_records, 1, RECORD_IDX)
+        state_0 = str(int(geography.state_fips[col_0]))
+        state_1 = str(int(geography.state_fips[col_1]))
+
+        if state_0 == state_1:
+            pytest.skip(
+                "Both clones landed in the same state — "
+                "cannot test cross-state masking"
+            )
+
+        state_targets = targets_df[targets_df["geo_level"] == "state"]
+        X_csc = X.tocsc()
+        vals_0 = X_csc[:, col_0].toarray().ravel()
+        vals_1 = X_csc[:, col_1].toarray().ravel()
+
+        for _, row in state_targets.iterrows():
+            row_idx = row.name
+            geo_id = str(row["geographic_id"])
+            if geo_id == state_0:
+                assert vals_1[row_idx] == 0, (
+                    f"Clone 1 (state {state_1}) should be zero "
+                    f"for state {state_0} target row {row_idx}"
+                )
+            elif geo_id == state_1:
+                assert vals_0[row_idx] == 0, (
+                    f"Clone 0 (state {state_0}) should be zero "
+                    f"for state {state_1} target row {row_idx}"
+                )
+
+
+class TestDistrictMasking:
+    def test_clone_visible_only_to_own_cd(self, matrix_result):
+        X = matrix_result["X"]
+        targets_df = matrix_result["targets_df"]
+        geography = matrix_result["geography"]
+        n_records = matrix_result["n_records"]
+
+        col_0 = _clone_col(n_records, 0, RECORD_IDX)
+        cd_0 = str(geography.cd_geoid[col_0])
+        state_0 = str(int(geography.state_fips[col_0]))
+
+        district_targets = targets_df[targets_df["geo_level"] == "district"]
+        X_csc = X.tocsc()
+        vals_0 = X_csc[:, col_0].toarray().ravel()
+
+        same_state_other_cd = district_targets[
+            (
+                district_targets["geographic_id"].apply(
+                    lambda g: g.startswith(state_0)
+                )
+            )
+            & (district_targets["geographic_id"] != cd_0)
+        ]
+
+        for _, row in same_state_other_cd.iterrows():
+            row_idx = row.name
+            assert vals_0[row_idx] == 0, (
+                f"Clone 0 (CD {cd_0}) should be zero for "
+                f"CD {row['geographic_id']} target row {row_idx}"
+            )
+
+    def test_clone_nonzero_for_own_cd(self, matrix_result):
+        X = matrix_result["X"]
+        targets_df = matrix_result["targets_df"]
+        geography = matrix_result["geography"]
+        n_records = matrix_result["n_records"]
+
+        col_0 = _clone_col(n_records, 0, RECORD_IDX)
+        cd_0 = str(geography.cd_geoid[col_0])
+
+        own_cd_targets = targets_df[
+            (targets_df["geo_level"] == "district")
+            & (targets_df["geographic_id"] == cd_0)
+        ]
+        if len(own_cd_targets) == 0:
+            pytest.skip(f"No district targets for CD {cd_0}")
+
+        X_csc = X.tocsc()
+        vals_0 = X_csc[:, col_0].toarray().ravel()
+
+        any_nonzero = any(
+            vals_0[row.name] != 0 for _, row in own_cd_targets.iterrows()
+        )
+        assert any_nonzero, (
+            f"Clone 0 should have at least one non-zero entry "
+            f"for its own CD {cd_0}"
+        )
diff --git a/policyengine_us_data/tests/test_calibration/test_clone_and_assign.py b/policyengine_us_data/tests/test_calibration/test_clone_and_assign.py
new file mode 100644
index 000000000..0ba330549
--- /dev/null
+++ b/policyengine_us_data/tests/test_calibration/test_clone_and_assign.py
@@ -0,0 +1,189 @@
+"""Tests for clone_and_assign module.
+
+Uses mock CSV data so tests don't require the real
+block_cd_distributions.csv.gz file.
+"""
+
+import numpy as np
+import pandas as pd
+import pytest
+from unittest.mock import patch
+
+from policyengine_us_data.calibration.clone_and_assign import (
+    GeographyAssignment,
+    load_global_block_distribution,
+    assign_random_geography,
+    double_geography_for_puf,
+)
+
+MOCK_BLOCKS = pd.DataFrame(
+    {
+        "cd_geoid": [101, 101, 101, 102, 102, 103, 103, 103, 103],
+        "block_geoid": [
+            "010010001001001",
+            "010010001001002",
+            "010010001001003",
+            "020010001001001",
+            "020010001001002",
+            "360100001001001",
+            "360100001001002",
+            "360100001001003",
+            "360100001001004",
+        ],
+        "probability": [
+            0.4,
+            0.3,
+            0.3,
+            0.6,
+            0.4,
+            0.25,
+            0.25,
+            0.25,
+            0.25,
+        ],
+    }
+)
+
+
+@pytest.fixture(autouse=True)
+def _clear_lru_cache():
+    load_global_block_distribution.cache_clear()
+    yield
+    load_global_block_distribution.cache_clear()
+
+
+def _mock_distribution():
+    blocks = MOCK_BLOCKS["block_geoid"].values
+    cds = MOCK_BLOCKS["cd_geoid"].astype(str).values
+    states = np.array([int(b[:2]) for b in blocks])
+    probs = MOCK_BLOCKS["probability"].values.astype(np.float64)
+    probs = probs / probs.sum()
+    return blocks, cds, states, probs
+
+
+class TestLoadGlobalBlockDistribution:
+    def test_loads_and_normalizes(self, tmp_path):
+        csv_path = tmp_path / "block_cd_distributions.csv.gz"
+        MOCK_BLOCKS.to_csv(csv_path, index=False, compression="gzip")
+        with patch(
+            "policyengine_us_data.calibration"
+            ".clone_and_assign.STORAGE_FOLDER",
+            tmp_path,
+        ):
+            blocks, cds, states, probs = (
+                load_global_block_distribution.__wrapped__()
+            )
+        assert len(blocks) == 9
+        np.testing.assert_almost_equal(probs.sum(), 1.0)
+
+    def test_state_fips_extracted(self, tmp_path):
+        csv_path = tmp_path / "block_cd_distributions.csv.gz"
+        MOCK_BLOCKS.to_csv(csv_path, index=False, compression="gzip")
+        with patch(
+            "policyengine_us_data.calibration"
+            ".clone_and_assign.STORAGE_FOLDER",
+            tmp_path,
+        ):
+            _, _, states, _ = load_global_block_distribution.__wrapped__()
+        assert states[0] == 1
+        assert states[3] == 2
+        assert states[5] == 36
+
+
+class TestAssignRandomGeography:
+    @patch(
+        "policyengine_us_data.calibration.clone_and_assign"
+        ".load_global_block_distribution"
+    )
+    def test_shape(self, mock_load):
+        mock_load.return_value = _mock_distribution()
+        r = assign_random_geography(n_records=10, n_clones=3, seed=42)
+        assert len(r.block_geoid) == 30
+        assert r.n_records == 10
+        assert r.n_clones == 3
+
+    @patch(
+        "policyengine_us_data.calibration.clone_and_assign"
+        ".load_global_block_distribution"
+    )
+    def test_deterministic(self, mock_load):
+        mock_load.return_value = _mock_distribution()
+        r1 = assign_random_geography(n_records=10, n_clones=3, seed=99)
+        r2 = assign_random_geography(n_records=10, n_clones=3, seed=99)
+        np.testing.assert_array_equal(r1.block_geoid, r2.block_geoid)
+
+    @patch(
+        "policyengine_us_data.calibration.clone_and_assign"
+        ".load_global_block_distribution"
+    )
+    def test_different_seeds_differ(self, mock_load):
+        mock_load.return_value = _mock_distribution()
+        r1 = assign_random_geography(n_records=100, n_clones=3, seed=1)
+        r2 = assign_random_geography(n_records=100, n_clones=3, seed=2)
+        assert not np.array_equal(r1.block_geoid, r2.block_geoid)
+
+    @patch(
+        "policyengine_us_data.calibration.clone_and_assign"
+        ".load_global_block_distribution"
+    )
+    def test_state_from_block(self, mock_load):
+        mock_load.return_value = _mock_distribution()
+        r = assign_random_geography(n_records=20, n_clones=5, seed=42)
+        for i in range(len(r.block_geoid)):
+            expected = int(r.block_geoid[i][:2])
+            assert r.state_fips[i] == expected
+
+    def test_missing_file_raises(self, tmp_path):
+        fake = tmp_path / "nonexistent"
+        fake.mkdir()
+        with patch(
+            "policyengine_us_data.calibration"
+            ".clone_and_assign.STORAGE_FOLDER",
+            fake,
+        ):
+            with pytest.raises(FileNotFoundError):
+                load_global_block_distribution.__wrapped__()
+
+
+class TestDoubleGeographyForPuf:
+    def test_doubles_n_records(self):
+        geo = GeographyAssignment(
+            block_geoid=np.array(["010010001001001", "020010001001001"] * 3),
+            cd_geoid=np.array(["101", "202"] * 3),
+            state_fips=np.array([1, 2] * 3),
+            n_records=2,
+            n_clones=3,
+        )
+        r = double_geography_for_puf(geo)
+        assert r.n_records == 4
+        assert r.n_clones == 3
+        assert len(r.block_geoid) == 12
+
+    def test_puf_half_matches_cps_half(self):
+        geo = GeographyAssignment(
+            block_geoid=np.array(
+                [
+                    "010010001001001",
+                    "020010001001001",
+                    "360100001001001",
+                    "060100001001001",
+                    "480100001001001",
+                    "120100001001001",
+                ]
+            ),
+            cd_geoid=np.array(["101", "202", "1036", "653", "4831", "1227"]),
+            state_fips=np.array([1, 2, 36, 6, 48, 12]),
+            n_records=3,
+            n_clones=2,
+        )
+        r = double_geography_for_puf(geo)
+        n_new = r.n_records
+
+        for c in range(r.n_clones):
+            start = c * n_new
+            mid = start + n_new // 2
+            end = start + n_new
+            np.testing.assert_array_equal(
+                r.state_fips[start:mid],
+                r.state_fips[mid:end],
+            )
diff --git a/policyengine_us_data/tests/test_calibration/test_drop_target_groups.py b/policyengine_us_data/tests/test_calibration/test_drop_target_groups.py
new file mode 100644
index 000000000..daade621d
--- /dev/null
+++ b/policyengine_us_data/tests/test_calibration/test_drop_target_groups.py
@@ -0,0 +1,142 @@
+"""Tests for drop_target_groups in calibration_utils."""
+
+import numpy as np
+import pandas as pd
+import pytest
+from scipy import sparse
+
+from policyengine_us_data.datasets.cps.local_area_calibration.calibration_utils import (
+    drop_target_groups,
+    create_target_groups,
+)
+
+
+@pytest.fixture
+def sample_data():
+    targets_df = pd.DataFrame(
+        {
+            "variable": [
+                "snap",
+                "snap",
+                "snap",
+                "household_count",
+                "household_count",
+            ],
+            "domain_variable": [
+                "snap",
+                "snap",
+                "snap",
+                "snap",
+                "snap",
+            ],
+            "geographic_id": ["US", "6", "37", "6", "37"],
+            "value": [1000, 500, 300, 200, 100],
+        }
+    )
+    n_rows = len(targets_df)
+    n_cols = 10
+    rng = np.random.default_rng(42)
+    X = sparse.random(n_rows, n_cols, density=0.5, random_state=rng)
+    X = X.tocsr()
+    target_groups, group_info = create_target_groups(targets_df)
+    return targets_df, X, target_groups, group_info
+
+
+class TestDropTargetGroups:
+    def test_drops_matching_group(self, sample_data):
+        targets_df, X, target_groups, group_info = sample_data
+        n_before = len(targets_df)
+        out_df, out_X = drop_target_groups(
+            targets_df,
+            X,
+            target_groups,
+            group_info,
+            [("household count", "State")],
+        )
+        assert len(out_df) < n_before
+        assert out_X.shape[0] == len(out_df)
+        assert "household_count" not in out_df["variable"].values or not (
+            out_df[out_df["variable"] == "household_count"]["geographic_id"]
+            .isin(["6", "37"])
+            .any()
+        )
+
+    def test_keeps_unmatched_groups(self, sample_data):
+        targets_df, X, target_groups, group_info = sample_data
+        out_df, out_X = drop_target_groups(
+            targets_df,
+            X,
+            target_groups,
+            group_info,
+            [("household count", "State")],
+        )
+        assert "snap" in out_df["variable"].values
+
+    def test_matrix_rows_match_df(self, sample_data):
+        targets_df, X, target_groups, group_info = sample_data
+        out_df, out_X = drop_target_groups(
+            targets_df,
+            X,
+            target_groups,
+            group_info,
+            [("snap", "National")],
+        )
+        assert out_X.shape[0] == len(out_df)
+        assert out_X.shape[1] == X.shape[1]
+
+    def test_no_match_keeps_all(self, sample_data):
+        targets_df, X, target_groups, group_info = sample_data
+        out_df, out_X = drop_target_groups(
+            targets_df,
+            X,
+            target_groups,
+            group_info,
+            [("nonexistent", "National")],
+        )
+        assert len(out_df) == len(targets_df)
+        assert out_X.shape[0] == X.shape[0]
+
+    def test_drop_all_groups(self, sample_data):
+        targets_df, X, target_groups, group_info = sample_data
+        out_df, out_X = drop_target_groups(
+            targets_df,
+            X,
+            target_groups,
+            group_info,
+            [
+                ("snap", "National"),
+                ("snap", "State"),
+                ("household count", "State"),
+            ],
+        )
+        assert len(out_df) == 0
+        assert out_X.shape[0] == 0
+
+    def test_columns_preserved(self, sample_data):
+        targets_df, X, target_groups, group_info = sample_data
+        out_df, out_X = drop_target_groups(
+            targets_df,
+            X,
+            target_groups,
+            group_info,
+            [("snap", "National")],
+        )
+        assert out_X.shape[1] == X.shape[1]
+
+    def test_case_insensitive_match(self, sample_data):
+        targets_df, X, target_groups, group_info = sample_data
+        out_df, _ = drop_target_groups(
+            targets_df,
+            X,
+            target_groups,
+            group_info,
+            [("SNAP", "State")],
+        )
+        out_df2, _ = drop_target_groups(
+            targets_df,
+            X,
+            target_groups,
+            group_info,
+            [("snap", "State")],
+        )
+        assert len(out_df) == len(out_df2)
diff --git a/policyengine_us_data/tests/test_calibration/test_unified_calibration.py b/policyengine_us_data/tests/test_calibration/test_unified_calibration.py
new file mode 100644
index 000000000..2d3f80619
--- /dev/null
+++ b/policyengine_us_data/tests/test_calibration/test_unified_calibration.py
@@ -0,0 +1,87 @@
+"""Tests for unified_calibration module.
+
+Focuses on rerandomize_takeup: verifies draws differ by
+block and are reproducible within the same block.
+"""
+
+import numpy as np
+import pytest
+
+from policyengine_us_data.utils.randomness import seeded_rng
+
+
+class TestRerandomizeTakeupSeeding:
+    """Verify seeded_rng(var, salt=block) produces
+    reproducible, block-dependent draws."""
+
+    def test_same_block_same_draws(self):
+        var = "takes_up_snap_if_eligible"
+        block = "010010001001001"
+        rng1 = seeded_rng(var, salt=block)
+        rng2 = seeded_rng(var, salt=block)
+        draws1 = rng1.random(100)
+        draws2 = rng2.random(100)
+        np.testing.assert_array_equal(draws1, draws2)
+
+    def test_different_blocks_different_draws(self):
+        var = "takes_up_snap_if_eligible"
+        rng1 = seeded_rng(var, salt="010010001001001")
+        rng2 = seeded_rng(var, salt="020010001001001")
+        draws1 = rng1.random(100)
+        draws2 = rng2.random(100)
+        assert not np.array_equal(draws1, draws2)
+
+    def test_different_vars_different_draws(self):
+        block = "010010001001001"
+        rng1 = seeded_rng("takes_up_snap_if_eligible", salt=block)
+        rng2 = seeded_rng("takes_up_aca_if_eligible", salt=block)
+        draws1 = rng1.random(100)
+        draws2 = rng2.random(100)
+        assert not np.array_equal(draws1, draws2)
+
+    def test_draws_in_unit_interval(self):
+        rng = seeded_rng(
+            "takes_up_snap_if_eligible",
+            salt="010010001001001",
+        )
+        draws = rng.random(10000)
+        assert draws.min() >= 0.0
+        assert draws.max() < 1.0
+
+    def test_rate_comparison_produces_booleans(self):
+        rng = seeded_rng(
+            "takes_up_snap_if_eligible",
+            salt="010010001001001",
+        )
+        draws = rng.random(10000)
+        rate = 0.75
+        result = draws < rate
+        assert result.dtype == bool
+        frac = result.mean()
+        assert 0.70 < frac < 0.80
+
+
+class TestSimpleTakeupConfig:
+    """Verify the SIMPLE_TAKEUP_VARS config is well-formed."""
+
+    def test_all_entries_have_required_keys(self):
+        from policyengine_us_data.calibration.unified_calibration import (
+            SIMPLE_TAKEUP_VARS,
+        )
+
+        for entry in SIMPLE_TAKEUP_VARS:
+            assert "variable" in entry
+            assert "entity" in entry
+            assert "rate_key" in entry
+            assert entry["entity"] in (
+                "person",
+                "tax_unit",
+                "spm_unit",
+            )
+
+    def test_expected_count(self):
+        from policyengine_us_data.calibration.unified_calibration import (
+            SIMPLE_TAKEUP_VARS,
+        )
+
+        assert len(SIMPLE_TAKEUP_VARS) == 8
diff --git a/policyengine_us_data/tests/test_local_area_calibration/test_hierarchical_uprating.py b/policyengine_us_data/tests/test_calibration/test_unified_matrix_builder.py
similarity index 53%
rename from policyengine_us_data/tests/test_local_area_calibration/test_hierarchical_uprating.py
rename to policyengine_us_data/tests/test_calibration/test_unified_matrix_builder.py
index 918e6ac86..ea2d49c5c 100644
--- a/policyengine_us_data/tests/test_local_area_calibration/test_hierarchical_uprating.py
+++ b/policyengine_us_data/tests/test_calibration/test_unified_matrix_builder.py
@@ -1,15 +1,18 @@
-"""
-Tests for hierarchical uprating and CD reconciliation.
+"""Tests for UnifiedMatrixBuilder.
+
+Ports uprating/hierarchical tests from test_hierarchical_uprating.py.
+Uses in-memory SQLite DBs, self-contained.
 """
 
 import unittest
 import tempfile
 import os
+
 import pandas as pd
 from sqlalchemy import create_engine, text
 
-from policyengine_us_data.datasets.cps.local_area_calibration.sparse_matrix_builder import (
-    SparseMatrixBuilder,
+from policyengine_us_data.calibration.unified_matrix_builder import (
+    UnifiedMatrixBuilder,
 )
 from policyengine_us_data.db.create_database_tables import (
     TARGET_OVERVIEW_VIEW,
@@ -17,13 +20,18 @@
 
 
 def _create_test_db(db_path):
-    """Create test DB with target_overview view and sample data."""
     db_uri = f"sqlite:///{db_path}"
     engine = create_engine(db_uri)
 
     with engine.connect() as conn:
         conn.execute(
-            text("CREATE TABLE strata (" "stratum_id INTEGER PRIMARY KEY)")
+            text(
+                "CREATE TABLE strata ("
+                "stratum_id INTEGER PRIMARY KEY, "
+                "definition_hash VARCHAR(64), "
+                "parent_stratum_id INTEGER, "
+                "notes VARCHAR)"
+            )
         )
         conn.execute(
             text(
@@ -46,7 +54,6 @@ def _create_test_db(db_path):
                 "active INTEGER DEFAULT 1)"
             )
         )
-
         conn.execute(text(TARGET_OVERVIEW_VIEW))
         conn.commit()
 
@@ -54,51 +61,37 @@ def _create_test_db(db_path):
 
 
 def _insert_aca_ptc_data(engine):
-    """Insert ACA PTC test data at national/state/district levels.
-
-    State 6 (CA): 3 CDs (601, 602, 603)
-    State 37 (NC): 2 CDs (3701, 3702)
-
-    All IRS data at period=2022.
-    One CMS national person_count at period=2024.
-    """
     with engine.connect() as conn:
-        # Strata: national(1), state CA(2), state NC(3),
-        # CDs: 601(4), 602(5), 603(6), 3701(7), 3702(8)
-        # CMS national(9)
         strata = [1, 2, 3, 4, 5, 6, 7, 8, 9]
         for sid in strata:
             conn.execute(
-                text("INSERT INTO strata VALUES (:sid)"),
-                {"sid": sid},
+                text(
+                    "INSERT INTO strata "
+                    "(stratum_id, parent_stratum_id) "
+                    "VALUES (:sid, :parent)"
+                ),
+                {
+                    "sid": sid,
+                    "parent": None if sid == 1 else 1,
+                },
             )
 
-        # Constraints
         constraints = [
-            # National: aca_ptc > 0
             (1, 1, "aca_ptc", ">", "0"),
-            # State CA: aca_ptc > 0, state_fips=6
             (2, 2, "aca_ptc", ">", "0"),
             (3, 2, "state_fips", "=", "6"),
-            # State NC: aca_ptc > 0, state_fips=37
             (4, 3, "aca_ptc", ">", "0"),
             (5, 3, "state_fips", "=", "37"),
-            # CD 601
             (6, 4, "aca_ptc", ">", "0"),
             (7, 4, "congressional_district_geoid", "=", "601"),
-            # CD 602
             (8, 5, "aca_ptc", ">", "0"),
             (9, 5, "congressional_district_geoid", "=", "602"),
-            # CD 603
             (10, 6, "aca_ptc", ">", "0"),
             (11, 6, "congressional_district_geoid", "=", "603"),
-            # CD 3701
             (12, 7, "aca_ptc", ">", "0"),
             (13, 7, "congressional_district_geoid", "=", "3701"),
-            # CD 3702
             (14, 8, "aca_ptc", ">", "0"),
             (15, 8, "congressional_district_geoid", "=", "3702"),
-            # CMS national: aca_ptc > 0
             (16, 9, "aca_ptc", ">", "0"),
         ]
         for cid, sid, var, op, val in constraints:
@@ -116,48 +109,31 @@ def _insert_aca_ptc_data(engine):
                 },
             )
 
-        # Targets
         targets = [
-            # National aca_ptc 2022
             (1, 1, "aca_ptc", 10000.0, 2022),
-            # National tax_unit_count 2022
             (2, 1, "tax_unit_count", 500.0, 2022),
-            # State CA aca_ptc 2022: 6000
             (3, 2, "aca_ptc", 6000.0, 2022),
-            # State CA tax_unit_count 2022: 300
             (4, 2, "tax_unit_count", 300.0, 2022),
-            # State NC aca_ptc 2022: 4000
             (5, 3, "aca_ptc", 4000.0, 2022),
-            # State NC tax_unit_count 2022: 200
             (6, 3, "tax_unit_count", 200.0, 2022),
-            # CD 601 aca_ptc 2022: 2000
             (7, 4, "aca_ptc", 2000.0, 2022),
-            # CD 602 aca_ptc 2022: 2500
             (8, 5, "aca_ptc", 2500.0, 2022),
-            # CD 603 aca_ptc 2022: 1500
             (9, 6, "aca_ptc", 1500.0, 2022),
-            # CD 601 tax_unit_count 2022: 100
             (10, 4, "tax_unit_count", 100.0, 2022),
-            # CD 602 tax_unit_count 2022: 120
             (11, 5, "tax_unit_count", 120.0, 2022),
-            # CD 603 tax_unit_count 2022: 80
             (12, 6, "tax_unit_count", 80.0, 2022),
-            # CD 3701 aca_ptc 2022: 2200
             (13, 7, "aca_ptc", 2200.0, 2022),
-            # CD 3702 aca_ptc 2022: 1800
             (14, 8, "aca_ptc", 1800.0, 2022),
-            # CD 3701 tax_unit_count 2022: 110
             (15, 7, "tax_unit_count", 110.0, 2022),
-            # CD 3702 tax_unit_count 2022: 90
             (16, 8, "tax_unit_count", 90.0, 2022),
-            # CMS national person_count 2024
             (17, 9, "person_count", 19743689.0, 2024),
         ]
         for tid, sid, var, val, period in targets:
             conn.execute(
                 text(
                     "INSERT INTO targets "
-                    "VALUES (:tid, :sid, :var, :val, :period, 1)"
+                    "VALUES (:tid, :sid, :var, :val, "
+                    ":period, 1)"
                 ),
                 {
                     "tid": tid,
@@ -170,9 +146,7 @@ def _insert_aca_ptc_data(engine):
         conn.commit()
 
 
-class TestQueryTargetsOverview(unittest.TestCase):
-    """Test _query_targets with target_overview view."""
-
+class TestQueryTargets(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
         cls.temp_db = tempfile.NamedTemporaryFile(suffix=".db", delete=False)
@@ -186,57 +160,46 @@ def tearDownClass(cls):
         os.unlink(cls.db_path)
 
     def _make_builder(self, time_period=2024):
-        return SparseMatrixBuilder(
+        return UnifiedMatrixBuilder(
             db_uri=self.db_uri,
             time_period=time_period,
-            cds_to_calibrate=["601", "602", "603", "3701", "3702"],
         )
 
     def test_domain_variables_filter(self):
-        builder = self._make_builder()
-        df = builder._query_targets({"domain_variables": ["aca_ptc"]})
+        b = self._make_builder()
+        df = b._query_targets({"domain_variables": ["aca_ptc"]})
         self.assertGreater(len(df), 0)
         self.assertIn("geo_level", df.columns)
         self.assertIn("geographic_id", df.columns)
         self.assertIn("domain_variable", df.columns)
 
     def test_all_geo_levels_returned(self):
-        builder = self._make_builder()
-        df = builder._query_targets({"domain_variables": ["aca_ptc"]})
+        b = self._make_builder()
+        df = b._query_targets({"domain_variables": ["aca_ptc"]})
         geo_levels = set(df["geo_level"].unique())
         self.assertEqual(geo_levels, {"national", "state", "district"})
 
     def test_best_period_selection(self):
-        """All aca_ptc targets at 2022, CMS at 2024."""
-        builder = self._make_builder(time_period=2024)
-        df = builder._query_targets({"domain_variables": ["aca_ptc"]})
-        aca_rows = df[df["variable"] == "aca_ptc"]
-        self.assertTrue((aca_rows["period"] == 2022).all())
-
-        cms_rows = df[df["variable"] == "person_count"]
-        self.assertEqual(len(cms_rows), 1)
-        self.assertEqual(cms_rows.iloc[0]["period"], 2024)
+        b = self._make_builder(time_period=2024)
+        df = b._query_targets({"domain_variables": ["aca_ptc"]})
+        aca = df[df["variable"] == "aca_ptc"]
+        self.assertTrue((aca["period"] == 2022).all())
+        cms = df[df["variable"] == "person_count"]
+        self.assertEqual(len(cms), 1)
+        self.assertEqual(cms.iloc[0]["period"], 2024)
 
     def test_geographic_id_populated(self):
-        builder = self._make_builder()
-        df = builder._query_targets({"domain_variables": ["aca_ptc"]})
+        b = self._make_builder()
+        df = b._query_targets({"domain_variables": ["aca_ptc"]})
         national = df[df["geo_level"] == "national"]
         self.assertTrue((national["geographic_id"] == "US").all())
-
         state_ca = df[
             (df["geo_level"] == "state") & (df["geographic_id"] == "6")
         ]
         self.assertGreater(len(state_ca), 0)
 
-        district_601 = df[
-            (df["geo_level"] == "district") & (df["geographic_id"] == "601")
-        ]
-        self.assertGreater(len(district_601), 0)
-
 
 class TestHierarchicalUprating(unittest.TestCase):
-    """Test _apply_hierarchical_uprating logic."""
-
     @classmethod
     def setUpClass(cls):
         cls.temp_db = tempfile.NamedTemporaryFile(suffix=".db", delete=False)
@@ -250,141 +213,91 @@ def tearDownClass(cls):
         os.unlink(cls.db_path)
 
     def _make_builder(self, time_period=2024):
-        return SparseMatrixBuilder(
+        return UnifiedMatrixBuilder(
             db_uri=self.db_uri,
             time_period=time_period,
-            cds_to_calibrate=["601", "602", "603", "3701", "3702"],
         )
 
     def _get_targets_with_uprating(self, cpi_factor=1.1, pop_factor=1.02):
-        builder = self._make_builder(time_period=2024)
-        df = builder._query_targets({"domain_variables": ["aca_ptc"]})
+        b = self._make_builder(time_period=2024)
+        df = b._query_targets({"domain_variables": ["aca_ptc"]})
         factors = {
             (2022, "cpi"): cpi_factor,
             (2022, "pop"): pop_factor,
         }
         df["original_value"] = df["value"].copy()
         df["uprating_factor"] = df.apply(
-            lambda row: builder._get_uprating_info(
+            lambda row: b._get_uprating_info(
                 row["variable"], row["period"], factors
             )[0],
             axis=1,
         )
         df["value"] = df["original_value"] * df["uprating_factor"]
-        return builder, df, factors
+        return b, df, factors
 
-    def test_cd_sums_match_uprated_state_totals(self):
-        """After reconciliation, CD sums must equal state * UF."""
-        builder, df, factors = self._get_targets_with_uprating(
+    def test_cd_sums_match_uprated_state(self):
+        b, df, factors = self._get_targets_with_uprating(
             cpi_factor=1.1, pop_factor=1.02
         )
+        result = b._apply_hierarchical_uprating(df, ["aca_ptc"], factors)
+        csv_factors = b._load_aca_ptc_factors()
 
-        result = builder._apply_hierarchical_uprating(df, ["aca_ptc"], factors)
-
-        # Get the CSV-based uprating factors used
-        csv_factors = builder._load_aca_ptc_factors()
-
-        # Expected: state_original * csv_factor
-        for var, state_fips, state_original in [
+        for var, sf, orig in [
             ("aca_ptc", 6, 6000.0),
             ("aca_ptc", 37, 4000.0),
             ("tax_unit_count", 6, 300.0),
             ("tax_unit_count", 37, 200.0),
         ]:
-            expected_total = state_original * csv_factors[state_fips][var]
+            expected = orig * csv_factors[sf][var]
             cd_rows = result[
                 (result["variable"] == var)
                 & (result["geo_level"] == "district")
                 & (
                     result["geographic_id"].apply(
-                        lambda g, s=state_fips: (
+                        lambda g, s=sf: (
                             int(g) // 100 == s if g.isdigit() else False
                         )
                     )
                 )
             ]
-            cd_sum = cd_rows["value"].sum()
             self.assertAlmostEqual(
-                cd_sum,
-                expected_total,
+                cd_rows["value"].sum(),
+                expected,
                 places=2,
-                msg=f"CD sum for {var} state {state_fips}",
+                msg=f"{var} state {sf}",
             )
 
     def test_national_and_state_rows_dropped(self):
-        """IRS national and state rows (period!=2024) are dropped."""
-        builder, df, factors = self._get_targets_with_uprating()
-        result = builder._apply_hierarchical_uprating(df, ["aca_ptc"], factors)
-
+        b, df, factors = self._get_targets_with_uprating()
+        result = b._apply_hierarchical_uprating(df, ["aca_ptc"], factors)
         irs_national = result[
             (result["geo_level"] == "national") & (result["period"] != 2024)
         ]
         self.assertEqual(len(irs_national), 0)
-
         state_rows = result[result["geo_level"] == "state"]
         self.assertEqual(len(state_rows), 0)
 
     def test_cms_person_count_preserved(self):
-        """CMS national person_count (period=2024) is NOT dropped."""
-        builder, df, factors = self._get_targets_with_uprating()
-        result = builder._apply_hierarchical_uprating(df, ["aca_ptc"], factors)
-
+        b, df, factors = self._get_targets_with_uprating()
+        result = b._apply_hierarchical_uprating(df, ["aca_ptc"], factors)
         cms = result[
             (result["variable"] == "person_count") & (result["period"] == 2024)
         ]
         self.assertEqual(len(cms), 1)
         self.assertAlmostEqual(cms.iloc[0]["value"], 19743689.0, places=0)
 
-    def test_hif_and_uprating_columns(self):
-        """Diagnostic hif and state_uprating_factor columns populated."""
-        builder, df, factors = self._get_targets_with_uprating(cpi_factor=1.1)
-        result = builder._apply_hierarchical_uprating(df, ["aca_ptc"], factors)
-
-        cd_aca = result[
-            (result["variable"] == "aca_ptc")
-            & (result["geo_level"] == "district")
-        ]
-        self.assertTrue(cd_aca["hif"].notna().all())
-        self.assertTrue(cd_aca["state_uprating_factor"].notna().all())
-
     def test_hif_is_one_when_cds_sum_to_state(self):
-        """HIF == 1.0 when CDs already sum to state total.
-
-        The uprating factor now comes from the CSV (state-specific),
-        not from national CPI, so we just check HIF and that a
-        nonzero uprating factor is set.
-        """
-        builder, df, factors = self._get_targets_with_uprating(cpi_factor=1.15)
-        result = builder._apply_hierarchical_uprating(df, ["aca_ptc"], factors)
-
+        b, df, factors = self._get_targets_with_uprating(cpi_factor=1.15)
+        result = b._apply_hierarchical_uprating(df, ["aca_ptc"], factors)
         cd_aca = result[
             (result["variable"] == "aca_ptc")
             & (result["geo_level"] == "district")
         ]
         for _, row in cd_aca.iterrows():
-            self.assertAlmostEqual(
-                row["hif"],
-                1.0,
-                places=6,
-                msg=(
-                    f"CD {row['geographic_id']} HIF "
-                    f"should be 1.0 (CDs sum to state)"
-                ),
-            )
-            self.assertGreater(
-                row["state_uprating_factor"],
-                0,
-                msg=(
-                    f"CD {row['geographic_id']} should "
-                    f"have a positive uprating factor"
-                ),
-            )
-
-    def test_no_data_loss_for_non_hierarchical_rows(self):
-        """Rows not in hierarchical_domains are untouched."""
-        builder, df, factors = self._get_targets_with_uprating()
+            self.assertAlmostEqual(row["hif"], 1.0, places=6)
 
-        # Add a non-hierarchical row
+    def test_non_hierarchical_rows_untouched(self):
+        b, df, factors = self._get_targets_with_uprating()
         extra = pd.DataFrame(
             [
                 {
@@ -401,20 +314,14 @@ def test_no_data_loss_for_non_hierarchical_rows(self):
                 }
             ]
         )
-        df_with_snap = pd.concat([df, extra], ignore_index=True)
-
-        result = builder._apply_hierarchical_uprating(
-            df_with_snap, ["aca_ptc"], factors
-        )
-
-        snap_rows = result[result["domain_variable"] == "snap"]
-        self.assertEqual(len(snap_rows), 1)
-        self.assertEqual(snap_rows.iloc[0]["value"], 5000.0)
+        df2 = pd.concat([df, extra], ignore_index=True)
+        result = b._apply_hierarchical_uprating(df2, ["aca_ptc"], factors)
+        snap = result[result["domain_variable"] == "snap"]
+        self.assertEqual(len(snap), 1)
+        self.assertEqual(snap.iloc[0]["value"], 5000.0)
 
 
 class TestGetStateUpratingFactors(unittest.TestCase):
-    """Test _get_state_uprating_factors."""
-
     @classmethod
     def setUpClass(cls):
         cls.temp_db = tempfile.NamedTemporaryFile(suffix=".db", delete=False)
@@ -428,56 +335,17 @@ def tearDownClass(cls):
         os.unlink(cls.db_path)
 
     def test_aca_ptc_uses_csv_factors(self):
-        """aca_ptc domain loads real state-level factors from CSV."""
-        builder = SparseMatrixBuilder(
-            db_uri=self.db_uri,
-            time_period=2024,
-            cds_to_calibrate=["601"],
-        )
-        df = builder._query_targets({"domain_variables": ["aca_ptc"]})
-        national_factors = {
-            (2022, "cpi"): 1.08,
-            (2022, "pop"): 1.015,
-        }
+        b = UnifiedMatrixBuilder(db_uri=self.db_uri, time_period=2024)
+        df = b._query_targets({"domain_variables": ["aca_ptc"]})
+        nf = {(2022, "cpi"): 1.08, (2022, "pop"): 1.015}
         df["original_value"] = df["value"].copy()
 
-        result = builder._get_state_uprating_factors(
-            "aca_ptc", df, national_factors
-        )
-
+        result = b._get_state_uprating_factors("aca_ptc", df, nf)
         self.assertIn(6, result)
         self.assertIn(37, result)
 
-        # CA: vol_mult ~1.0554, val_mult ~1.1460
-        # aca_ptc factor = vol_mult * val_mult
-        self.assertAlmostEqual(
-            result[6]["aca_ptc"],
-            1.0554375137756227 * 1.1459694989106755,
-            places=5,
-        )
-        # tax_unit_count factor = vol_mult only
-        self.assertAlmostEqual(
-            result[6]["tax_unit_count"], 1.0554375137756227, places=5
-        )
-
-        # NC: vol_mult ~1.4784, val_mult ~0.9571
-        self.assertAlmostEqual(
-            result[37]["aca_ptc"],
-            1.4784049241899557 * 0.9571183533447685,
-            places=5,
-        )
-        self.assertAlmostEqual(
-            result[37]["tax_unit_count"], 1.4784049241899557, places=5
-        )
-
-    def test_non_aca_domain_uses_national_factors(self):
-        """Non-aca_ptc domains fall back to national CPI/pop factors."""
-        builder = SparseMatrixBuilder(
-            db_uri=self.db_uri,
-            time_period=2024,
-            cds_to_calibrate=["601"],
-        )
-        # Build a fake targets_df with domain="snap"
+    def test_non_aca_uses_national_factors(self):
+        b = UnifiedMatrixBuilder(db_uri=self.db_uri, time_period=2024)
         df = pd.DataFrame(
             [
                 {
@@ -500,21 +368,32 @@ def test_non_aca_domain_uses_national_factors(self):
                 },
             ]
         )
-        national_factors = {
-            (2022, "cpi"): 1.08,
-            (2022, "pop"): 1.015,
-        }
-
-        result = builder._get_state_uprating_factors(
-            "snap", df, national_factors
-        )
-
+        nf = {(2022, "cpi"): 1.08, (2022, "pop"): 1.015}
+        result = b._get_state_uprating_factors("snap", df, nf)
         self.assertIn(6, result)
-        # snap is dollar -> CPI
         self.assertAlmostEqual(result[6]["snap"], 1.08)
-        # household_count -> pop
         self.assertAlmostEqual(result[6]["household_count"], 1.015)
 
 
+class TestCountTargetDetection(unittest.TestCase):
+    def test_endswith_count(self):
+        count_vars = [
+            "person_count",
+            "tax_unit_count",
+            "household_count",
+        ]
+        value_vars = ["snap", "aca_ptc", "income_tax"]
+        for v in count_vars:
+            self.assertTrue(
+                v.endswith("_count"),
+                f"{v} should be detected as count",
+            )
+        for v in value_vars:
+            self.assertFalse(
+                v.endswith("_count"),
+                f"{v} should not be a count target",
+            )
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/policyengine_us_data/tests/test_local_area_calibration/conftest.py b/policyengine_us_data/tests/test_local_area_calibration/conftest.py
index ce36157cc..dfede8002 100644
--- a/policyengine_us_data/tests/test_local_area_calibration/conftest.py
+++ b/policyengine_us_data/tests/test_local_area_calibration/conftest.py
@@ -1,57 +1,8 @@
-"""Shared fixtures for local area calibration tests.
-
-Importantly, this file determines which variables will be included in the sparse matrix and calibrating routine.
-"""
+"""Shared fixtures for local area calibration tests."""
 
 import pytest
-import numpy as np
-from sqlalchemy import create_engine, text
 
-from policyengine_us import Microsimulation
 from policyengine_us_data.storage import STORAGE_FOLDER
-from policyengine_us_data.datasets.cps.local_area_calibration.sparse_matrix_builder import (
-    SparseMatrixBuilder,
-)
-from policyengine_us_data.datasets.cps.local_area_calibration.matrix_tracer import (
-    MatrixTracer,
-)
-from policyengine_us_data.datasets.cps.local_area_calibration.calibration_utils import (
-    get_calculated_variables,
-)
-
-# Variables to test for state-level value matching (CI uses subset for speed)
-# Format: (variable_name, rtol)
-#     variable_name as per the targets in policy_data.db
-#     rtol is relative tolerance for comparison
-#
-# NOTE: Count targets (person_count, tax_unit_count) are excluded because
-# they have constraints (e.g., age>=5|age<18) that make the X_sparse values
-# different from raw sim.calculate() values. Count targets are tested
-# separately in test_count_targets.py with controlled mock data.
-VARIABLES_TO_TEST = [
-    ("snap", 1e-2),
-    ("income_tax", 1e-2),
-    ("eitc", 1e-2),
-]
-
-# CI filter config - minimal subset for fast CI runs
-# Tests 3 representative variables covering benefits, taxes, and credits
-COMBINED_FILTER_CONFIG = {
-    "domain_variables": [
-        "snap",
-    ],
-    "variables": [
-        "snap",
-        "income_tax",
-        "eitc",
-    ],
-}
-
-# Maximum allowed mismatch rate for state-level value comparison
-MAX_MISMATCH_RATE = 0.02
-
-# Number of samples for cell-level verification tests
-N_VERIFICATION_SAMPLES = 500
 
 
 @pytest.fixture(scope="module")
@@ -63,92 +14,3 @@ def db_uri():
 @pytest.fixture(scope="module")
 def dataset_path():
     return str(STORAGE_FOLDER / "stratified_extended_cps_2024.h5")
-
-
-@pytest.fixture(scope="module")
-def test_cds(db_uri):
-    """CDs from NC, HI, MT, AK (manageable size for CI, multiple same-state CDs)."""
-    engine = create_engine(db_uri)
-    query = """
-    SELECT DISTINCT sc.value as cd_geoid
-    FROM stratum_constraints sc
-    WHERE sc.constraint_variable = 'congressional_district_geoid'
-      AND (
-        sc.value LIKE '37__'
-        OR sc.value LIKE '150_'
-        OR sc.value LIKE '300_'
-        OR sc.value = '200' OR sc.value = '201'
-      )
-    ORDER BY sc.value
-    """
-    with engine.connect() as conn:
-        result = conn.execute(text(query)).fetchall()
-        return [row[0] for row in result]
-
-
-@pytest.fixture(scope="module")
-def sim(dataset_path):
-    return Microsimulation(dataset=dataset_path)
-
-
-@pytest.fixture(scope="module")
-def matrix_data(db_uri, dataset_path, test_cds, sim):
-    """Build sparse matrix with all configured variables."""
-    builder = SparseMatrixBuilder(
-        db_uri,
-        time_period=2023,
-        cds_to_calibrate=test_cds,
-        dataset_path=dataset_path,
-    )
-    targets_df, X_sparse, household_id_mapping = builder.build_matrix(
-        sim, target_filter=COMBINED_FILTER_CONFIG
-    )
-    return targets_df, X_sparse, household_id_mapping
-
-
-@pytest.fixture(scope="module")
-def targets_df(matrix_data):
-    return matrix_data[0]
-
-
-@pytest.fixture(scope="module")
-def X_sparse(matrix_data):
-    return matrix_data[1]
-
-
-@pytest.fixture(scope="module")
-def household_id_mapping(matrix_data):
-    return matrix_data[2]
-
-
-@pytest.fixture(scope="module")
-def tracer(targets_df, X_sparse, household_id_mapping, test_cds, sim):
-    return MatrixTracer(
-        targets_df, X_sparse, household_id_mapping, test_cds, sim
-    )
-
-
-@pytest.fixture(scope="module")
-def n_households(tracer):
-    return tracer.n_households
-
-
-@pytest.fixture(scope="module")
-def household_ids(tracer):
-    return tracer.original_household_ids
-
-
-@pytest.fixture(scope="module")
-def household_states(sim):
-    return sim.calculate("state_fips", map_to="household").values
-
-
-def create_state_simulation(dataset_path, n_households, state):
-    """Create simulation with all households assigned to a specific state."""
-    s = Microsimulation(dataset=dataset_path)
-    s.set_input(
-        "state_fips", 2023, np.full(n_households, state, dtype=np.int32)
-    )
-    for var in get_calculated_variables(s):
-        s.delete_arrays(var)
-    return s
diff --git a/policyengine_us_data/tests/test_local_area_calibration/test_column_indexing.py b/policyengine_us_data/tests/test_local_area_calibration/test_column_indexing.py
deleted file mode 100644
index 2e23763bc..000000000
--- a/policyengine_us_data/tests/test_local_area_calibration/test_column_indexing.py
+++ /dev/null
@@ -1,47 +0,0 @@
-"""Test column indexing in sparse matrix."""
-
-import pytest
-
-
-def test_column_indexing_roundtrip(X_sparse, tracer, test_cds):
-    """
-    Verify column index = cd_idx * n_households + household_index.
-
-    This is pure math - if this fails, everything else is unreliable.
-    """
-    n_hh = tracer.n_households
-    hh_ids = tracer.original_household_ids
-    errors = []
-
-    test_cases = []
-    for cd_idx in [0, len(test_cds) // 2, len(test_cds) - 1]:
-        for hh_idx in [0, 100, n_hh - 1]:
-            test_cases.append((cd_idx, hh_idx))
-
-    for cd_idx, hh_idx in test_cases:
-        cd = test_cds[cd_idx]
-        hh_id = hh_ids[hh_idx]
-        expected_col = cd_idx * n_hh + hh_idx
-        col_info = tracer.get_column_info(expected_col)
-        positions = tracer.get_household_column_positions(hh_id)
-        pos_col = positions[cd]
-
-        if col_info["cd_geoid"] != cd:
-            errors.append(f"CD mismatch at col {expected_col}")
-        if col_info["household_index"] != hh_idx:
-            errors.append(f"HH index mismatch at col {expected_col}")
-        if col_info["household_id"] != hh_id:
-            errors.append(f"HH ID mismatch at col {expected_col}")
-        if pos_col != expected_col:
-            errors.append(f"Position mismatch for hh {hh_id}, cd {cd}")
-
-    assert not errors, f"Column indexing errors: {errors}"
-
-
-def test_matrix_dimensions(X_sparse, tracer, test_cds):
-    """Verify matrix width matches expected CD x household count."""
-    n_hh = tracer.n_households
-    expected_cols = len(test_cds) * n_hh
-    assert (
-        X_sparse.shape[1] == expected_cols
-    ), f"Matrix width mismatch: expected {expected_cols}, got {X_sparse.shape[1]}"
diff --git a/policyengine_us_data/tests/test_local_area_calibration/test_count_targets.py b/policyengine_us_data/tests/test_local_area_calibration/test_count_targets.py
deleted file mode 100644
index 46eae4ebb..000000000
--- a/policyengine_us_data/tests/test_local_area_calibration/test_count_targets.py
+++ /dev/null
@@ -1,415 +0,0 @@
-"""
-Tests for count target handling in SparseMatrixBuilder.
-
-These tests verify that count targets (e.g., person_count, tax_unit_count)
-are correctly handled by counting entities that satisfy constraints, rather
-than summing values.
-"""
-
-import pytest
-import numpy as np
-from dataclasses import dataclass
-
-from policyengine_us_data.datasets.cps.local_area_calibration.sparse_matrix_builder import (
-    SparseMatrixBuilder,
-)
-
-
-@dataclass
-class MockEntity:
-    """Mock entity with a key attribute."""
-
-    key: str
-
-
-@dataclass
-class MockVariable:
-    """Mock variable with entity information."""
-
-    entity: MockEntity
-
-    @classmethod
-    def create(cls, entity_key: str) -> "MockVariable":
-        return cls(entity=MockEntity(key=entity_key))
-
-
-class MockTaxBenefitSystem:
-    """Mock tax benefit system with variable definitions."""
-
-    def __init__(self):
-        self.variables = {
-            "person_count": MockVariable.create("person"),
-            "tax_unit_count": MockVariable.create("tax_unit"),
-            "household_count": MockVariable.create("household"),
-            "spm_unit_count": MockVariable.create("spm_unit"),
-            "snap": MockVariable.create("spm_unit"),
-        }
-
-
-@dataclass
-class MockCalculationResult:
-    """Mock result from simulation.calculate()."""
-
-    values: np.ndarray
-
-
-class MockSimulation:
-    """Mock simulation for testing count target calculations."""
-
-    def __init__(self, entity_data: dict, variable_values: dict):
-        """
-        Args:
-            entity_data: Dict with person_id, household_id, tax_unit_id,
-                spm_unit_id arrays (all at person level)
-            variable_values: Dict mapping variable names to their values
-                at the appropriate entity level
-        """
-        self.entity_data = entity_data
-        self.variable_values = variable_values
-        self.tax_benefit_system = MockTaxBenefitSystem()
-
-    def calculate(self, variable: str, map_to: str = None):
-        """Return mock calculation result."""
-        if variable in self.entity_data:
-            # Entity ID variables
-            if map_to == "person":
-                values = np.array(self.entity_data[variable])
-            elif map_to == "household":
-                # Return unique household IDs
-                values = np.array(
-                    sorted(set(self.entity_data["household_id"]))
-                )
-            else:
-                values = np.array(self.entity_data[variable])
-        elif variable in self.variable_values:
-            # Regular variables - return at requested level
-            val_data = self.variable_values[variable]
-            if map_to == "person":
-                values = np.array(val_data["person"])
-            elif map_to == "household":
-                values = np.array(val_data["household"])
-            else:
-                values = np.array(val_data.get("default", []))
-        else:
-            values = np.array([])
-
-        return MockCalculationResult(values=values)
-
-
-@pytest.fixture
-def basic_entity_data():
-    """
-    Create mock entity relationships with known household compositions.
-
-    Household 1 (id=100): 3 people (ages 5, 12, 40) -> 2 aged 5-17
-    Household 2 (id=200): 2 people (ages 3, 25) -> 0 aged 5-17
-    Household 3 (id=300): 4 people (ages 6, 8, 10, 45) -> 3 aged 5-17
-    """
-    return {
-        "person_id": [1, 2, 3, 4, 5, 6, 7, 8, 9],
-        "household_id": [100, 100, 100, 200, 200, 300, 300, 300, 300],
-        "tax_unit_id": [10, 10, 10, 20, 20, 30, 30, 30, 30],
-        "spm_unit_id": [
-            1000,
-            1000,
-            1000,
-            2000,
-            2000,
-            3000,
-            3000,
-            3000,
-            3000,
-        ],
-    }
-
-
-@pytest.fixture
-def basic_variable_values():
-    """Variable values for basic household composition tests."""
-    return {
-        "age": {
-            "person": [5, 12, 40, 3, 25, 6, 8, 10, 45],
-            "household": [40, 25, 45],  # Not used for age constraints
-        },
-        "person_count": {
-            "person": [1, 1, 1, 1, 1, 1, 1, 1, 1],
-            "household": [3, 2, 4],  # Sum per household
-        },
-        "snap": {
-            "person": [100, 100, 100, 0, 0, 200, 200, 200, 200],
-            "household": [300, 0, 800],
-        },
-    }
-
-
-@pytest.fixture
-def basic_sim(basic_entity_data, basic_variable_values):
-    """Mock simulation with basic household compositions."""
-    return MockSimulation(basic_entity_data, basic_variable_values)
-
-
-@pytest.fixture
-def builder():
-    """Create a minimal SparseMatrixBuilder (won't use DB for unit tests)."""
-    return SparseMatrixBuilder(
-        db_uri="sqlite:///:memory:",
-        time_period=2023,
-        cds_to_calibrate=["101"],
-    )
-
-
-# Tests for basic count target calculation
-class TestCountTargetCalculation:
-    """Test _calculate_target_values_entity_aware for count targets."""
-
-    def test_person_count_with_age_constraints(self, builder, basic_sim):
-        """Test person_count correctly counts persons in age range per HH."""
-        # Constraints: age >= 5 AND age < 18
-        constraints = [
-            {"variable": "age", "operation": ">=", "value": 5},
-            {"variable": "age", "operation": "<", "value": 18},
-        ]
-
-        geo_mask = np.array([True, True, True])  # All households included
-        n_households = 3
-
-        result = builder._calculate_target_values_entity_aware(
-            basic_sim,
-            "person_count",
-            constraints,
-            geo_mask,
-            n_households,
-        )
-
-        # Expected: HH1 has 2 people (ages 5, 12), HH2 has 0, HH3 has 3 (6,8,10)
-        expected = np.array([2, 0, 3], dtype=np.float32)
-        np.testing.assert_array_equal(result, expected)
-
-    def test_person_count_no_constraints(self, builder, basic_sim):
-        """Test person_count without constraints returns all persons per HH."""
-        constraints = []
-        geo_mask = np.array([True, True, True])
-        n_households = 3
-
-        result = builder._calculate_target_values_entity_aware(
-            basic_sim,
-            "person_count",
-            constraints,
-            geo_mask,
-            n_households,
-        )
-
-        # Expected: HH1 has 3 people, HH2 has 2, HH3 has 4
-        expected = np.array([3, 2, 4], dtype=np.float32)
-        np.testing.assert_array_equal(result, expected)
-
-    def test_person_count_with_geo_mask(self, builder, basic_sim):
-        """Test person_count respects geographic mask."""
-        constraints = [
-            {"variable": "age", "operation": ">=", "value": 5},
-            {"variable": "age", "operation": "<", "value": 18},
-        ]
-
-        # Only include households 1 and 3
-        geo_mask = np.array([True, False, True])
-        n_households = 3
-
-        result = builder._calculate_target_values_entity_aware(
-            basic_sim,
-            "person_count",
-            constraints,
-            geo_mask,
-            n_households,
-        )
-
-        # Expected: HH1=2, HH2=0 (masked out), HH3=3
-        expected = np.array([2, 0, 3], dtype=np.float32)
-        np.testing.assert_array_equal(result, expected)
-
-    def test_value_target_uses_sum(self, builder, basic_sim):
-        """Test that non-count targets sum values (existing behavior)."""
-        # SNAP is a value target, not a count target
-        constraints = []
-        geo_mask = np.array([True, True, True])
-        n_households = 3
-
-        result = builder._calculate_target_values_entity_aware(
-            basic_sim,
-            "snap",
-            constraints,
-            geo_mask,
-            n_households,
-        )
-
-        # Expected: Sum of snap values per household
-        expected = np.array([300, 0, 800], dtype=np.float32)
-        np.testing.assert_array_equal(result, expected)
-
-    def test_household_count_no_constraints(self, builder, basic_sim):
-        """Test household_count returns 1 for each qualifying household."""
-        constraints = []
-        geo_mask = np.array([True, True, True])
-        n_households = 3
-
-        result = builder._calculate_target_values_entity_aware(
-            basic_sim,
-            "household_count",
-            constraints,
-            geo_mask,
-            n_households,
-        )
-
-        # Expected: 1 for each household in geo_mask
-        expected = np.array([1, 1, 1], dtype=np.float32)
-        np.testing.assert_array_equal(result, expected)
-
-    def test_household_count_with_geo_mask(self, builder, basic_sim):
-        """Test household_count respects geographic mask."""
-        constraints = []
-        geo_mask = np.array([True, False, True])
-        n_households = 3
-
-        result = builder._calculate_target_values_entity_aware(
-            basic_sim,
-            "household_count",
-            constraints,
-            geo_mask,
-            n_households,
-        )
-
-        # Expected: 1 for HH1, 0 for HH2 (masked), 1 for HH3
-        expected = np.array([1, 0, 1], dtype=np.float32)
-        np.testing.assert_array_equal(result, expected)
-
-
-# Fixtures for complex entity relationship tests
-@pytest.fixture
-def complex_entity_data():
-    """
-    Create entity data with multiple tax units per household.
-
-    Household 1 (id=100): 4 people in 2 tax units
-      Tax unit 10: person 1 (age 30, filer), person 2 (age 28)
-      Tax unit 11: person 3 (age 65, filer), person 4 (age 62)
-    Household 2 (id=200): 2 people in 1 tax unit
-      Tax unit 20: person 5 (age 45, filer), person 6 (age 16)
-    """
-    return {
-        "person_id": [1, 2, 3, 4, 5, 6],
-        "household_id": [100, 100, 100, 100, 200, 200],
-        "tax_unit_id": [10, 10, 11, 11, 20, 20],
-        "spm_unit_id": [1000, 1000, 1000, 1000, 2000, 2000],
-    }
-
-
-@pytest.fixture
-def complex_variable_values():
-    """Variable values for complex entity relationship tests."""
-    return {
-        "age": {
-            "person": [30, 28, 65, 62, 45, 16],
-            "household": [65, 45],
-        },
-        "is_tax_unit_head": {
-            "person": [True, False, True, False, True, False],
-            "household": [2, 1],  # count of heads per HH
-        },
-        "tax_unit_count": {
-            "person": [1, 1, 1, 1, 1, 1],
-            "household": [2, 1],
-        },
-        "person_count": {
-            "person": [1, 1, 1, 1, 1, 1],
-            "household": [4, 2],
-        },
-    }
-
-
-@pytest.fixture
-def complex_sim(complex_entity_data, complex_variable_values):
-    """Mock simulation with complex entity relationships."""
-    return MockSimulation(complex_entity_data, complex_variable_values)
-
-
-# Tests for complex entity relationships
-class TestCountTargetWithRealEntities:
-    """Test count targets with more complex entity relationships."""
-
-    def test_tax_unit_count_no_constraints(self, builder, complex_sim):
-        """Test tax_unit_count counts all tax units per household."""
-        constraints = []
-        geo_mask = np.array([True, True])
-        n_households = 2
-
-        result = builder._calculate_target_values_entity_aware(
-            complex_sim,
-            "tax_unit_count",
-            constraints,
-            geo_mask,
-            n_households,
-        )
-
-        # Expected: HH1 has 2 tax units, HH2 has 1
-        expected = np.array([2, 1], dtype=np.float32)
-        np.testing.assert_array_equal(result, expected)
-
-    def test_tax_unit_count_with_age_constraint(self, builder, complex_sim):
-        """Test tax_unit_count with age constraint on members."""
-        # Count tax units that have at least one person aged >= 65
-        constraints = [
-            {"variable": "age", "operation": ">=", "value": 65},
-        ]
-        geo_mask = np.array([True, True])
-        n_households = 2
-
-        result = builder._calculate_target_values_entity_aware(
-            complex_sim,
-            "tax_unit_count",
-            constraints,
-            geo_mask,
-            n_households,
-        )
-
-        # Expected: HH1 has 1 tax unit (TU 11) with person >=65, HH2 has 0
-        expected = np.array([1, 0], dtype=np.float32)
-        np.testing.assert_array_equal(result, expected)
-
-    def test_person_count_seniors(self, builder, complex_sim):
-        """Test person_count for seniors (age >= 65)."""
-        constraints = [
-            {"variable": "age", "operation": ">=", "value": 65},
-        ]
-        geo_mask = np.array([True, True])
-        n_households = 2
-
-        result = builder._calculate_target_values_entity_aware(
-            complex_sim,
-            "person_count",
-            constraints,
-            geo_mask,
-            n_households,
-        )
-
-        # Expected: HH1 has 1 senior (age 65), HH2 has 0
-        expected = np.array([1, 0], dtype=np.float32)
-        np.testing.assert_array_equal(result, expected)
-
-    def test_person_count_children(self, builder, complex_sim):
-        """Test person_count for children (age < 18)."""
-        constraints = [
-            {"variable": "age", "operation": "<", "value": 18},
-        ]
-        geo_mask = np.array([True, True])
-        n_households = 2
-
-        result = builder._calculate_target_values_entity_aware(
-            complex_sim,
-            "person_count",
-            constraints,
-            geo_mask,
-            n_households,
-        )
-
-        # Expected: HH1 has 0 children, HH2 has 1 (age 16)
-        expected = np.array([0, 1], dtype=np.float32)
-        np.testing.assert_array_equal(result, expected)
diff --git a/policyengine_us_data/tests/test_local_area_calibration/test_cross_state.py b/policyengine_us_data/tests/test_local_area_calibration/test_cross_state.py
deleted file mode 100644
index 2f44428c5..000000000
--- a/policyengine_us_data/tests/test_local_area_calibration/test_cross_state.py
+++ /dev/null
@@ -1,134 +0,0 @@
-"""Test cross-state values match state-swapped simulations."""
-
-import pytest
-import numpy as np
-from collections import defaultdict
-
-from policyengine_us import Microsimulation
-from policyengine_us_data.datasets.cps.local_area_calibration.calibration_utils import (
-    get_calculated_variables,
-)
-
-from .conftest import VARIABLES_TO_TEST, N_VERIFICATION_SAMPLES
-
-
-@pytest.mark.skip(
-    reason="Sparse matrix builder not used in production; test needs rework after time_period fix"
-)
-def test_cross_state_matches_swapped_sim(
-    X_sparse,
-    targets_df,
-    test_cds,
-    dataset_path,
-    n_households,
-    household_ids,
-    household_states,
-):
-    """
-    Cross-state non-zero cells must match state-swapped simulation.
-
-    When household moves to different state, X_sparse should contain the
-    value calculated from a fresh simulation with state_fips set to
-    destination state.
-
-    Uses stratified sampling to ensure all variables in VARIABLES_TO_TEST
-    are covered with approximately equal samples per variable.
-    """
-    seed = 42
-    rng = np.random.default_rng(seed)
-    n_hh = n_households
-    hh_ids = household_ids
-    hh_states = household_states
-
-    state_sims = {}
-
-    def get_state_sim(state):
-        if state not in state_sims:
-            s = Microsimulation(dataset=dataset_path)
-            s.set_input(
-                "state_fips", 2023, np.full(n_hh, state, dtype=np.int32)
-            )
-            for var in get_calculated_variables(s):
-                s.delete_arrays(var)
-            state_sims[state] = s
-        return state_sims[state]
-
-    nonzero_rows, nonzero_cols = X_sparse.nonzero()
-
-    # Group cross-state cells by variable for stratified sampling
-    variable_to_indices = defaultdict(list)
-    variables_to_test = {v[0] for v in VARIABLES_TO_TEST}
-
-    for i in range(len(nonzero_rows)):
-        row_idx = nonzero_rows[i]
-        col_idx = nonzero_cols[i]
-        cd_idx = col_idx // n_hh
-        hh_idx = col_idx % n_hh
-        cd = test_cds[cd_idx]
-        dest_state = int(cd) // 100
-        orig_state = int(hh_states[hh_idx])
-
-        # Only include cross-state cells
-        if dest_state == orig_state:
-            continue
-
-        # Get variable for this row
-        variable = targets_df.iloc[row_idx]["variable"]
-        if variable in variables_to_test:
-            variable_to_indices[variable].append(i)
-
-    if not variable_to_indices:
-        pytest.skip("No cross-state non-zero cells found for test variables")
-
-    # Stratified sampling: sample proportionally from each variable
-    samples_per_var = max(
-        1, N_VERIFICATION_SAMPLES // len(variable_to_indices)
-    )
-    sample_indices = []
-
-    for variable, indices in variable_to_indices.items():
-        n_to_sample = min(samples_per_var, len(indices))
-        sampled = rng.choice(indices, n_to_sample, replace=False)
-        sample_indices.extend(sampled)
-
-    errors = []
-    variables_tested = set()
-
-    for idx in sample_indices:
-        row_idx = nonzero_rows[idx]
-        col_idx = nonzero_cols[idx]
-        cd_idx = col_idx // n_hh
-        hh_idx = col_idx % n_hh
-        cd = test_cds[cd_idx]
-        dest_state = int(cd) // 100
-        variable = targets_df.iloc[row_idx]["variable"]
-        actual = float(X_sparse[row_idx, col_idx])
-        state_sim = get_state_sim(dest_state)
-        expected = float(
-            state_sim.calculate(variable, map_to="household").values[hh_idx]
-        )
-
-        variables_tested.add(variable)
-
-        if not np.isclose(actual, expected, atol=0.5):
-            errors.append(
-                {
-                    "hh_id": hh_ids[hh_idx],
-                    "orig_state": int(hh_states[hh_idx]),
-                    "dest_state": dest_state,
-                    "variable": variable,
-                    "actual": actual,
-                    "expected": expected,
-                }
-            )
-
-    # Report which variables were tested
-    missing_vars = variables_to_test - variables_tested
-    if missing_vars:
-        print(f"Warning: No cross-state cells found for: {missing_vars}")
-
-    assert not errors, (
-        f"Cross-state verification failed: {len(errors)}/{len(sample_indices)} "
-        f"mismatches across {len(variables_tested)} variables. "
-        f"First 5: {errors[:5]}"
-    )
diff --git a/policyengine_us_data/tests/test_local_area_calibration/test_geo_masking.py b/policyengine_us_data/tests/test_local_area_calibration/test_geo_masking.py
deleted file mode 100644
index 9f0033733..000000000
--- a/policyengine_us_data/tests/test_local_area_calibration/test_geo_masking.py
+++ /dev/null
@@ -1,202 +0,0 @@
-"""Test geographic masking behavior in sparse matrix."""
-
-import pytest
-import numpy as np
-
-
-def test_state_level_zero_masking(
-    X_sparse, targets_df, tracer, test_cds, n_households
-):
-    """
-    State-level targets have zeros for wrong-state CD columns.
-
-    For a target with geographic_id=37 (NC), columns for CDs in other states
-    (HI, MT, AK) should all be zero.
-    """
-    seed = 42
-    rng = np.random.default_rng(seed)
-    n_hh = n_households
-
-    state_targets = []
-    for row_idx in range(len(targets_df)):
-        geo_id = targets_df.iloc[row_idx].get("geographic_id", "US")
-        if geo_id != "US":
-            try:
-                val = int(geo_id)
-                if val < 100:
-                    state_targets.append((row_idx, val))
-            except (ValueError, TypeError):
-                pass
-
-    if not state_targets:
-        pytest.skip("No state-level targets found")
-
-    errors = []
-    checked = 0
-    sample_targets = rng.choice(
-        len(state_targets), min(20, len(state_targets)), replace=False
-    )
-
-    for idx in sample_targets:
-        row_idx, target_state = state_targets[idx]
-        other_state_cds = [
-            (i, cd)
-            for i, cd in enumerate(test_cds)
-            if int(cd) // 100 != target_state
-        ]
-        if not other_state_cds:
-            continue
-
-        sample_cds = rng.choice(
-            len(other_state_cds), min(5, len(other_state_cds)), replace=False
-        )
-        for cd_sample_idx in sample_cds:
-            cd_idx, cd = other_state_cds[cd_sample_idx]
-            sample_hh = rng.choice(n_hh, min(5, n_hh), replace=False)
-            for hh_idx in sample_hh:
-                col_idx = cd_idx * n_hh + hh_idx
-                actual = X_sparse[row_idx, col_idx]
-                checked += 1
-                if actual != 0:
-                    errors.append(
-                        {"row": row_idx, "cd": cd, "value": float(actual)}
-                    )
-
-    assert (
-        not errors
-    ), f"State-level masking failed: {len(errors)}/{checked} should be zero"
-
-
-def test_cd_level_zero_masking(
-    X_sparse, targets_df, tracer, test_cds, n_households
-):
-    """
-    CD-level targets have zeros for other CDs, even same-state.
-
-    For a target with geographic_id=3707, columns for CDs 3701-3706, 3708-3714
-    should all be zero, even though they're all in NC (state 37).
-    """
-    seed = 42
-    rng = np.random.default_rng(seed)
-    n_hh = n_households
-
-    cd_targets_with_same_state = []
-    for row_idx in range(len(targets_df)):
-        geo_id = targets_df.iloc[row_idx].get("geographic_id", "US")
-        if geo_id != "US":
-            try:
-                val = int(geo_id)
-                if val >= 100:
-                    target_state = val // 100
-                    same_state_other_cds = [
-                        cd
-                        for cd in test_cds
-                        if int(cd) // 100 == target_state and cd != geo_id
-                    ]
-                    if same_state_other_cds:
-                        cd_targets_with_same_state.append(
-                            (row_idx, geo_id, same_state_other_cds)
-                        )
-            except (ValueError, TypeError):
-                pass
-
-    if not cd_targets_with_same_state:
-        pytest.skip(
-            "No CD-level targets with same-state other CDs in test_cds"
-        )
-
-    errors = []
-    same_state_checks = 0
-
-    for row_idx, target_cd, other_cds in cd_targets_with_same_state[:10]:
-        for cd in other_cds:
-            cd_idx = test_cds.index(cd)
-            for hh_idx in rng.choice(n_hh, 3, replace=False):
-                col_idx = cd_idx * n_hh + hh_idx
-                actual = X_sparse[row_idx, col_idx]
-                same_state_checks += 1
-                if actual != 0:
-                    errors.append(
-                        {
-                            "target_cd": target_cd,
-                            "other_cd": cd,
-                            "value": float(actual),
-                        }
-                    )
-
-    assert not errors, (
-        f"CD-level masking failed: {len(errors)} same-state-different-CD "
-        f"non-zero values. First 5: {errors[:5]}"
-    )
-
-
-@pytest.mark.skip(
-    reason="Sparse matrix builder not used in production; test needs rework after time_period fix"
-)
-def test_national_no_geo_masking(
-    X_sparse, targets_df, tracer, sim, test_cds, dataset_path, n_households
-):
-    """
-    National targets have no geographic masking.
-
-    National targets (geographic_id='US') can have non-zero values for ANY CD.
-    Values differ by destination state because benefits are recalculated
-    under each state's rules.
-    """
-    seed = 42
-    rng = np.random.default_rng(seed)
-    n_hh = n_households
-    hh_ids = tracer.original_household_ids
-
-    national_rows = [
-        i
-        for i in range(len(targets_df))
-        if targets_df.iloc[i].get("geographic_id", "US") == "US"
-    ]
-
-    if not national_rows:
-        pytest.skip("No national targets found")
-
-    states_in_test = sorted(set(int(cd) // 100 for cd in test_cds))
-    cds_by_state = {
-        state: [cd for cd in test_cds if int(cd) // 100 == state]
-        for state in states_in_test
-    }
-
-    for row_idx in national_rows:
-        variable = targets_df.iloc[row_idx]["variable"]
-
-        row_data = X_sparse.getrow(row_idx)
-        nonzero_cols = row_data.nonzero()[1]
-
-        assert (
-            len(nonzero_cols) > 0
-        ), f"National target row {row_idx} ({variable}) has no non-zero values"
-
-        sample_cols = rng.choice(
-            nonzero_cols, min(5, len(nonzero_cols)), replace=False
-        )
-
-        households_checked = 0
-        households_with_multi_state_values = 0
-
-        for col_idx in sample_cols:
-            hh_idx = col_idx % n_hh
-
-            values_by_state = {}
-            for state, cds in cds_by_state.items():
-                cd = cds[0]
-                cd_idx = test_cds.index(cd)
-                state_col = cd_idx * n_hh + hh_idx
-                val = float(X_sparse[row_idx, state_col])
-                if val != 0:
-                    values_by_state[state] = val
-
-            households_checked += 1
-            if len(values_by_state) > 1:
-                households_with_multi_state_values += 1
-
-        assert households_with_multi_state_values > 0, (
-            f"National target {variable}: no households have values in "
-            f"multiple states"
-        )
diff --git a/policyengine_us_data/tests/test_local_area_calibration/test_matrix_national_variation.py b/policyengine_us_data/tests/test_local_area_calibration/test_matrix_national_variation.py
deleted file mode 100644
index 53760834c..000000000
--- a/policyengine_us_data/tests/test_local_area_calibration/test_matrix_national_variation.py
+++ /dev/null
@@ -1,488 +0,0 @@
-"""
-Tests for correctness in the sparse matrix builder, particularly for national level contributions.
-
-These tests verify that:
-1. Matrix shape and structure are correct
-2. Variable aggregation (person to household) preserves totals
-3. National-level targets receive contributions from all states (no geographic
-   bias)
-4. Cross-state recalculation applies state-specific rules
-"""
-
-import pytest
-import numpy as np
-import pandas as pd
-from policyengine_us_data.datasets.cps.local_area_calibration.sparse_matrix_builder import (
-    SparseMatrixBuilder,
-)
-
-from .conftest import (
-    VARIABLES_TO_TEST,
-    COMBINED_FILTER_CONFIG,
-)
-
-# Variables with state-specific variation (e.g., SNAP eligibility)
-VARIABLES_WITH_STATE_VARIATION = [
-    "snap",
-]
-
-
-@pytest.fixture(scope="module")
-def builder(db_uri, dataset_path, test_cds):
-    """SparseMatrixBuilder configured with test CDs."""
-    return SparseMatrixBuilder(
-        db_uri=db_uri,
-        time_period=2023,
-        cds_to_calibrate=test_cds,
-        dataset_path=dataset_path,
-    )
-
-
-def _get_geo_level(geo_id) -> str:
-    """Determine geographic level from geographic_id."""
-    if geo_id == "US":
-        return "national"
-    try:
-        val = int(geo_id)
-        if 1 <= val <= 56:
-            return "state"
-        else:
-            return "district"
-    except (ValueError, TypeError):
-        return "unknown"
-
-
-def test_person_level_aggregation_preserves_totals(sim):
-    """Health insurance premiums (person-level) sum correctly to household."""
-    var = "health_insurance_premiums_without_medicare_part_b"
-    person_total = sim.calculate(var, 2023, map_to="person").values.sum()
-    household_total = sim.calculate(var, 2023, map_to="household").values.sum()
-    assert np.isclose(person_total, household_total, rtol=1e-6)
-
-
-def test_matrix_shape(sim, builder):
-    """Matrix should have (n_targets, n_households * n_cds) shape."""
-    targets_df, X_sparse, _ = builder.build_matrix(
-        sim,
-        target_filter={
-            "variables": ["health_insurance_premiums_without_medicare_part_b"]
-        },
-    )
-    n_households = len(
-        sim.calculate("household_id", map_to="household").values
-    )
-    n_cds = len(builder.cds_to_calibrate)
-    assert X_sparse.shape[1] == n_households * n_cds
-
-
-def test_combined_variables_in_matrix(sim, builder):
-    """Matrix should include all configured variables."""
-    targets_df, X_sparse, _ = builder.build_matrix(
-        sim,
-        target_filter=COMBINED_FILTER_CONFIG,
-    )
-    variables = targets_df["variable"].unique()
-
-    for var_name, _ in VARIABLES_TO_TEST:
-        assert var_name in variables, f"Missing variable: {var_name}"
-
-
-class TestNationalLevelContributions:
-    """
-    Tests verifying that national-level targets receive contributions from
-    households across all states, not just a geographic subset.
-
-    The key insight: for a national target, when we look at a single CD's
-    column block, households from ALL original states should potentially
-    contribute (subject to meeting eligibility constraints). There should
-    be no systematic geographic bias where only households from certain
-    states contribute to the national total.
-    """
-
-    def test_national_targets_receive_multistate_contributions(
-        self, targets_df, X_sparse, household_states, n_households, test_cds
-    ):
-        """
-        Verify that national-level targets have contributions from households
-        originally from multiple states.
-
-        For each national target:
-        1. Look at the matrix row
-        2. For EACH CD's column block, identify which original states have
-           non-zero contributions
-        3. Verify contributions come from multiple states (not geographically
-           biased)
-        """
-        state_fips = household_states
-        cds = test_cds
-
-        # Find national-level targets
-        national_targets = targets_df[
-            targets_df["geographic_id"].apply(
-                lambda x: _get_geo_level(x) == "national"
-            )
-        ]
-
-        if len(national_targets) == 0:
-            pytest.skip("No national-level targets found")
-
-        results = []
-
-        for _, target in national_targets.iterrows():
-            row_idx = target.name
-            variable = target["variable"]
-            row = X_sparse[row_idx, :].toarray().flatten()
-
-            # For each CD block, check which original states contribute
-            cd_contribution_stats = []
-
-            for cd_idx, cd in enumerate(cds):
-                col_start = cd_idx * n_households
-                col_end = col_start + n_households
-                cd_values = row[col_start:col_end]
-
-                # Find households with non-zero values in this CD block
-                nonzero_mask = cd_values != 0
-                nonzero_indices = np.where(nonzero_mask)[0]
-
-                if len(nonzero_indices) == 0:
-                    continue
-
-                # Get original states of contributing households
-                contributing_states = set(state_fips[nonzero_indices])
-
-                cd_contribution_stats.append(
-                    {
-                        "cd": cd,
-                        "cd_state": int(cd) // 100,
-                        "n_contributing": len(nonzero_indices),
-                        "n_states": len(contributing_states),
-                        "contributing_states": contributing_states,
-                    }
-                )
-
-            if not cd_contribution_stats:
-                results.append(
-                    {
-                        "variable": variable,
-                        "status": "NO_CONTRIBUTIONS",
-                        "details": "No non-zero values in any CD block",
-                    }
-                )
-                continue
-
-            # Aggregate stats
-            stats_df = pd.DataFrame(cd_contribution_stats)
-            avg_states = stats_df["n_states"].mean()
-            min_states = stats_df["n_states"].min()
-
-            # Check: on average, contributions should come from multiple states
-            # (at least 2, since we have CDs from 4 different states)
-            passed = avg_states >= 2 and min_states >= 1
-
-            results.append(
-                {
-                    "variable": variable,
-                    "status": "PASSED" if passed else "FAILED",
-                    "avg_contributing_states": avg_states,
-                    "min_contributing_states": min_states,
-                    "n_cd_blocks_with_data": len(stats_df),
-                }
-            )
-
-        # Assert no geographic bias
-        failed = [r for r in results if r["status"] == "FAILED"]
-        assert len(failed) == 0, (
-            f"Geographic bias detected in national targets: "
-            f"{[r['variable'] for r in failed]}"
-        )
-
-    def test_state_distribution_in_national_targets(
-        self, targets_df, X_sparse, household_states, n_households, test_cds
-    ):
-        """
-        Verify the distribution of contributing states in national targets
-        roughly matches the original data distribution.
-
-        This catches cases where one state dominates the contributions
-        disproportionately.
-        """
-        state_fips = household_states
-        cds = test_cds
-
-        # Get original state distribution (count of households per state)
-        unique_states, original_counts = np.unique(
-            state_fips, return_counts=True
-        )
-        original_dist = dict(zip(unique_states, original_counts))
-        total_hh = len(state_fips)
-
-        # Find national-level targets
-        national_targets = targets_df[
-            targets_df["geographic_id"].apply(
-                lambda x: _get_geo_level(x) == "national"
-            )
-        ]
-
-        if len(national_targets) == 0:
-            pytest.skip("No national-level targets found")
-
-        for _, target in national_targets.iterrows():
-            row_idx = target.name
-            variable = target["variable"]
-            row = X_sparse[row_idx, :].toarray().flatten()
-
-            # Count contributions by original state across ALL CD blocks
-            state_contribution_counts = {}
-
-            for cd_idx, cd in enumerate(cds):
-                col_start = cd_idx * n_households
-                col_end = col_start + n_households
-                cd_values = row[col_start:col_end]
-
-                nonzero_mask = cd_values != 0
-                nonzero_indices = np.where(nonzero_mask)[0]
-
-                for hh_idx in nonzero_indices:
-                    orig_state = state_fips[hh_idx]
-                    state_contribution_counts[orig_state] = (
-                        state_contribution_counts.get(orig_state, 0) + 1
-                    )
-
-            if not state_contribution_counts:
-                continue
-
-            # Check that no single state dominates excessively
-            total_contributions = sum(state_contribution_counts.values())
-            max_contribution = max(state_contribution_counts.values())
-            max_state = max(
-                state_contribution_counts, key=state_contribution_counts.get
-            )
-            max_share = max_contribution / total_contributions
-
-            # The max share should not exceed 70% (unless that state has 70%+
-            # of households in the original data)
-            original_max_share = original_dist.get(max_state, 0) / total_hh
-
-            # Allow 20% margin above original share
-            threshold = min(0.7, original_max_share + 0.2)
-
-            assert max_share <= threshold, (
-                f"State {max_state} dominates national {variable} target with "
-                f"{max_share:.1%} of contributions "
-                f"(original share: {original_max_share:.1%})"
-            )
-
-
-@pytest.mark.skip(
-    reason="Sparse matrix builder not used in production; test needs rework after time_period fix"
-)
-class TestCrossStateRecalculation:
-    """
-    Tests verifying that household values change when borrowed to different
-    states, confirming state-specific rules are being applied.
-
-    The key insight: for national-level targets (no state constraint), each
-    household appears in every CD block. The value in each CD block represents
-    what the variable would be if that household lived in that CD's state.
-    For state-dependent variables (like SNAP), values should differ across
-    states for at least some households.
-
-    NOTE: This complements test_cross_state.py which verifies exact values.
-    These tests verify that variation exists (state rules are applied).
-    """
-
-    def test_values_change_across_states_for_national_targets(
-        self, targets_df, X_sparse, n_households, test_cds
-    ):
-        """
-        Verify that for national targets, household values vary across CD
-        blocks from different states.
-
-        This confirms the matrix builder is correctly recalculating variables
-        with state-specific rules when households are "borrowed" to different
-        geographic areas.
-
-        The test checks:
-        1. For each national target, examine households with non-zero values
-        2. Compare each household's value across CD blocks from different states
-        3. At least some households should have different values in different
-           states (confirming recalculation with different state rules)
-        """
-        cds = test_cds
-
-        # Group CDs by state
-        cds_by_state = {}
-        for cd_idx, cd in enumerate(cds):
-            state = int(cd) // 100
-            if state not in cds_by_state:
-                cds_by_state[state] = []
-            cds_by_state[state].append((cd_idx, cd))
-
-        states = list(cds_by_state.keys())
-        if len(states) < 2:
-            pytest.skip("Need at least 2 states to test cross-state variation")
-
-        # Find national-level targets
-        national_targets = targets_df[
-            targets_df["geographic_id"].apply(
-                lambda x: _get_geo_level(x) == "national"
-            )
-        ]
-
-        if len(national_targets) == 0:
-            pytest.skip("No national-level targets found")
-
-        results = []
-
-        for _, target in national_targets.iterrows():
-            if target["variable"] not in VARIABLES_WITH_STATE_VARIATION:
-                continue
-            row_idx = target.name
-            variable = target["variable"]
-            row = X_sparse[row_idx, :].toarray().flatten()
-
-            # For each household, collect values from different states
-            households_with_variation = 0
-            households_checked = 0
-
-            # Sample households (check every 10th to keep test fast)
-            for hh_idx in range(0, n_households, 10):
-                # Get this household's value in each state (use first CD of
-                # each state)
-                state_values = {}
-                for state, cd_list in cds_by_state.items():
-                    cd_idx, _ = cd_list[0]  # First CD in this state
-                    col_idx = cd_idx * n_households + hh_idx
-                    state_values[state] = row[col_idx]
-
-                # Skip if all values are zero (household doesn't qualify for
-                # this variable)
-                nonzero_values = [v for v in state_values.values() if v != 0]
-                if len(nonzero_values) < 2:
-                    continue
-
-                households_checked += 1
-
-                # Check if values differ across states
-                unique_values = set(nonzero_values)
-                if len(unique_values) > 1:
-                    households_with_variation += 1
-
-            variation_rate = (
-                households_with_variation / households_checked
-                if households_checked > 0
-                else 0
-            )
-
-            results.append(
-                {
-                    "variable": variable,
-                    "households_checked": households_checked,
-                    "households_with_variation": households_with_variation,
-                    "variation_rate": variation_rate,
-                }
-            )
-
-        # For state-dependent variables, we expect SOME variation
-        # (not all households will vary - some may have $0 or max benefits
-        # regardless of state)
-        # The key is that variation exists, confirming recalculation occurs
-        for r in results:
-            if r["households_checked"] > 0:
-                # At least 10% of households should show variation for
-                # state-dependent variables
-                assert (
-                    r["variation_rate"] > 0.1 or r["households_checked"] < 10
-                ), (
-                    f"No cross-state variation found for {r['variable']}. "
-                    f"This suggests state-specific rules may not be applied "
-                    f"when households are borrowed to different states."
-                )
-
-    def test_same_household_different_states_shows_rule_changes(
-        self, targets_df, X_sparse, household_states, n_households, test_cds
-    ):
-        """
-        Deep dive test: pick specific households and verify their values
-        differ across states in a way consistent with state-specific rules.
-
-        For SNAP specifically, different states have different:
-        - Standard deductions
-        - Shelter deduction caps
-        - Vehicle allowances
-        - Categorical eligibility rules
-
-        This test finds households where we can verify the recalculation
-        is applying different state rules.
-        """
-        state_fips_orig = household_states
-        cds = test_cds
-
-        # Group CDs by state
-        cds_by_state = {}
-        for cd_idx, cd in enumerate(cds):
-            state = int(cd) // 100
-            if state not in cds_by_state:
-                cds_by_state[state] = []
-            cds_by_state[state].append((cd_idx, cd))
-
-        states = sorted(cds_by_state.keys())
-        if len(states) < 2:
-            pytest.skip("Need at least 2 states")
-
-        # Find national SNAP target (most state-dependent)
-        snap_national = targets_df[
-            (targets_df["variable"] == "snap")
-            & (
-                targets_df["geographic_id"].apply(
-                    lambda x: _get_geo_level(x) == "national"
-                )
-            )
-        ]
-
-        if len(snap_national) == 0:
-            pytest.skip("No national SNAP target found")
-
-        row_idx = snap_national.iloc[0].name
-        row = X_sparse[row_idx, :].toarray().flatten()
-
-        # Find households with interesting variation patterns
-        example_households = []
-
-        for hh_idx in range(n_households):
-            state_values = {}
-            for state, cd_list in cds_by_state.items():
-                cd_idx, _ = cd_list[0]
-                col_idx = cd_idx * n_households + hh_idx
-                state_values[state] = row[col_idx]
-
-            # Look for households where:
-            # 1. At least 2 states have non-zero SNAP
-            # 2. The values differ significantly (>10% relative difference)
-            nonzero_states = {s: v for s, v in state_values.items() if v > 0}
-
-            if len(nonzero_states) >= 2:
-                values = list(nonzero_states.values())
-                max_val = max(values)
-                min_val = min(values)
-                if min_val > 0 and (max_val - min_val) / min_val > 0.1:
-                    example_households.append(
-                        {
-                            "hh_idx": hh_idx,
-                            "original_state": state_fips_orig[hh_idx],
-                            "state_values": nonzero_states,
-                            "max_val": max_val,
-                            "min_val": min_val,
-                            "variation": (max_val - min_val) / min_val,
-                        }
-                    )
-
-            if len(example_households) >= 5:
-                break
-
-        # Assert we found at least one household with variation
-        assert len(example_households) > 0, (
-            "Expected to find households with >10% SNAP variation across "
-            "states, confirming state-specific rules are applied"
-        )
diff --git a/policyengine_us_data/tests/test_local_area_calibration/test_period_selection_and_uprating.py b/policyengine_us_data/tests/test_local_area_calibration/test_period_selection_and_uprating.py
deleted file mode 100644
index b6523f91b..000000000
--- a/policyengine_us_data/tests/test_local_area_calibration/test_period_selection_and_uprating.py
+++ /dev/null
@@ -1,246 +0,0 @@
-"""
-Tests for best-period selection and uprating in SparseMatrixBuilder.
-"""
-
-import unittest
-import tempfile
-import os
-import pandas as pd
-from sqlalchemy import create_engine, text
-
-from policyengine_us_data.datasets.cps.local_area_calibration.sparse_matrix_builder import (
-    SparseMatrixBuilder,
-)
-from policyengine_us_data.db.create_database_tables import (
-    TARGET_OVERVIEW_VIEW,
-)
-
-
-class TestPeriodSelectionAndUprating(unittest.TestCase):
-    """Test best-period SQL CTE and uprating logic."""
-
-    @classmethod
-    def setUpClass(cls):
-        cls.temp_db = tempfile.NamedTemporaryFile(suffix=".db", delete=False)
-        cls.db_path = cls.temp_db.name
-        cls.temp_db.close()
-
-        cls.db_uri = f"sqlite:///{cls.db_path}"
-        engine = create_engine(cls.db_uri)
-
-        with engine.connect() as conn:
-            conn.execute(
-                text("CREATE TABLE strata (" "stratum_id INTEGER PRIMARY KEY)")
-            )
-            conn.execute(
-                text(
-                    "CREATE TABLE stratum_constraints ("
-                    "constraint_id INTEGER PRIMARY KEY, "
-                    "stratum_id INTEGER, "
-                    "constraint_variable TEXT, "
-                    "operation TEXT, "
-                    "value TEXT)"
-                )
-            )
-            conn.execute(
-                text(
-                    "CREATE TABLE targets ("
-                    "target_id INTEGER PRIMARY KEY, "
-                    "stratum_id INTEGER, "
-                    "variable TEXT, "
-                    "value REAL, "
-                    "period INTEGER, "
-                    "active INTEGER DEFAULT 1)"
-                )
-            )
-
-            conn.execute(text(TARGET_OVERVIEW_VIEW))
-            conn.commit()
-
-    @classmethod
-    def tearDownClass(cls):
-        os.unlink(cls.db_path)
-
-    def setUp(self):
-        engine = create_engine(self.db_uri)
-        with engine.connect() as conn:
-            conn.execute(text("DELETE FROM targets"))
-            conn.execute(text("DELETE FROM stratum_constraints"))
-            conn.execute(text("DELETE FROM strata"))
-            conn.commit()
-
-    def _insert_test_data(self, strata, constraints, targets):
-        engine = create_engine(self.db_uri)
-        with engine.connect() as conn:
-            for stratum_id, group_id in strata:
-                conn.execute(
-                    text("INSERT INTO strata VALUES (:sid)"),
-                    {"sid": stratum_id},
-                )
-            for i, (stratum_id, var, op, val) in enumerate(constraints):
-                conn.execute(
-                    text(
-                        "INSERT INTO stratum_constraints "
-                        "VALUES (:cid, :sid, :var, :op, :val)"
-                    ),
-                    {
-                        "cid": i + 1,
-                        "sid": stratum_id,
-                        "var": var,
-                        "op": op,
-                        "val": val,
-                    },
-                )
-            for i, (
-                stratum_id,
-                variable,
-                value,
-                period,
-            ) in enumerate(targets):
-                conn.execute(
-                    text(
-                        "INSERT INTO targets "
-                        "(target_id, stratum_id, variable, "
-                        "value, period) "
-                        "VALUES (:tid, :sid, :var, :val, :period)"
-                    ),
-                    {
-                        "tid": i + 1,
-                        "sid": stratum_id,
-                        "var": variable,
-                        "val": value,
-                        "period": period,
-                    },
-                )
-            conn.commit()
-
-    def _make_builder(self, time_period=2024):
-        return SparseMatrixBuilder(
-            db_uri=self.db_uri,
-            time_period=time_period,
-            cds_to_calibrate=["601"],
-        )
-
-    # ---- Period selection tests ----
-
-    def test_best_period_prefers_past(self):
-        """Targets at 2022 and 2026 -> picks 2022 for time_period=2024."""
-        self._insert_test_data(
-            strata=[(1, 1)],
-            constraints=[
-                (1, "congressional_district_geoid", "=", "601"),
-            ],
-            targets=[
-                (1, "snap", 1000, 2022),
-                (1, "snap", 2000, 2026),
-            ],
-        )
-        builder = self._make_builder(time_period=2024)
-        df = builder._query_targets({"stratum_ids": [1]})
-        self.assertEqual(len(df), 1)
-        self.assertEqual(df.iloc[0]["period"], 2022)
-        self.assertEqual(df.iloc[0]["value"], 1000)
-
-    def test_best_period_uses_future_when_no_past(self):
-        """Target only at 2026 -> picks 2026 for time_period=2024."""
-        self._insert_test_data(
-            strata=[(1, 1)],
-            constraints=[
-                (1, "congressional_district_geoid", "=", "601"),
-            ],
-            targets=[
-                (1, "snap", 5000, 2026),
-            ],
-        )
-        builder = self._make_builder(time_period=2024)
-        df = builder._query_targets({"stratum_ids": [1]})
-        self.assertEqual(len(df), 1)
-        self.assertEqual(df.iloc[0]["period"], 2026)
-
-    def test_best_period_exact_match(self):
-        """Targets at 2022, 2024, 2026 -> picks 2024 exactly."""
-        self._insert_test_data(
-            strata=[(1, 1)],
-            constraints=[
-                (1, "congressional_district_geoid", "=", "601"),
-            ],
-            targets=[
-                (1, "snap", 1000, 2022),
-                (1, "snap", 1500, 2024),
-                (1, "snap", 2000, 2026),
-            ],
-        )
-        builder = self._make_builder(time_period=2024)
-        df = builder._query_targets({"stratum_ids": [1]})
-        self.assertEqual(len(df), 1)
-        self.assertEqual(df.iloc[0]["period"], 2024)
-        self.assertEqual(df.iloc[0]["value"], 1500)
-
-    def test_independent_per_stratum_and_variable(self):
-        """Different strata/variables select independently."""
-        self._insert_test_data(
-            strata=[(1, 1), (2, 1)],
-            constraints=[
-                (1, "congressional_district_geoid", "=", "601"),
-                (2, "congressional_district_geoid", "=", "601"),
-            ],
-            targets=[
-                (1, "snap", 1000, 2024),
-                (1, "snap", 800, 2022),
-                (2, "person_count", 500, 2022),
-                (2, "person_count", 600, 2026),
-            ],
-        )
-        builder = self._make_builder(time_period=2024)
-        df = builder._query_targets({"stratum_ids": [1, 2]})
-        self.assertEqual(len(df), 2)
-        snap_row = df[df["variable"] == "snap"].iloc[0]
-        self.assertEqual(snap_row["period"], 2024)
-        count_row = df[df["variable"] == "person_count"].iloc[0]
-        self.assertEqual(count_row["period"], 2022)
-
-    # ---- Uprating info tests ----
-
-    def test_cpi_uprating_for_dollar_vars(self):
-        builder = self._make_builder(time_period=2024)
-        factors = {(2022, "cpi"): 1.06, (2022, "pop"): 1.01}
-        factor, type_ = builder._get_uprating_info("snap", 2022, factors)
-        self.assertAlmostEqual(factor, 1.06)
-        self.assertEqual(type_, "cpi")
-
-    def test_pop_uprating_for_count_vars(self):
-        builder = self._make_builder(time_period=2024)
-        factors = {(2022, "cpi"): 1.06, (2022, "pop"): 1.01}
-        factor, type_ = builder._get_uprating_info(
-            "person_count", 2022, factors
-        )
-        self.assertAlmostEqual(factor, 1.01)
-        self.assertEqual(type_, "pop")
-
-    def test_no_uprating_for_current_period(self):
-        builder = self._make_builder(time_period=2024)
-        factors = {(2024, "cpi"): 1.0, (2024, "pop"): 1.0}
-        factor, type_ = builder._get_uprating_info("snap", 2024, factors)
-        self.assertAlmostEqual(factor, 1.0)
-        self.assertEqual(type_, "none")
-
-    def test_pop_uprating_households_variable(self):
-        builder = self._make_builder(time_period=2024)
-        factors = {(2022, "cpi"): 1.06, (2022, "pop"): 1.02}
-        factor, type_ = builder._get_uprating_info("households", 2022, factors)
-        self.assertAlmostEqual(factor, 1.02)
-        self.assertEqual(type_, "pop")
-
-    def test_pop_uprating_tax_units_variable(self):
-        builder = self._make_builder(time_period=2024)
-        factors = {(2022, "cpi"): 1.06, (2022, "pop"): 1.02}
-        factor, type_ = builder._get_uprating_info("tax_units", 2022, factors)
-        self.assertAlmostEqual(factor, 1.02)
-        self.assertEqual(type_, "pop")
-
-    def test_missing_factor_defaults_to_1(self):
-        builder = self._make_builder(time_period=2024)
-        factors = {}
-        factor, type_ = builder._get_uprating_info("snap", 2020, factors)
-        self.assertAlmostEqual(factor, 1.0)
-        self.assertEqual(type_, "cpi")
diff --git a/policyengine_us_data/tests/test_local_area_calibration/test_same_state.py b/policyengine_us_data/tests/test_local_area_calibration/test_same_state.py
deleted file mode 100644
index 065b99201..000000000
--- a/policyengine_us_data/tests/test_local_area_calibration/test_same_state.py
+++ /dev/null
@@ -1,126 +0,0 @@
-"""Test same-state values match original simulation values."""
-
-import pytest
-import numpy as np
-from collections import defaultdict
-
-from .conftest import VARIABLES_TO_TEST, N_VERIFICATION_SAMPLES
-
-
-@pytest.mark.skip(
-    reason="Sparse matrix builder not used in production; test needs rework after time_period fix"
-)
-def test_same_state_matches_original(
-    sim,
-    X_sparse,
-    targets_df,
-    test_cds,
-    n_households,
-    household_ids,
-    household_states,
-):
-    """
-    Same-state non-zero cells must match ORIGINAL simulation values.
-
-    When household stays in same state, X_sparse should contain the value
-    from the original simulation (ground truth from H5 dataset).
-
-    Uses stratified sampling to ensure all variables in VARIABLES_TO_TEST
-    are covered with approximately equal samples per variable.
-    """
-    seed = 42
-    rng = np.random.default_rng(seed)
-    n_hh = n_households
-    hh_ids = household_ids
-    hh_states = household_states
-
-    nonzero_rows, nonzero_cols = X_sparse.nonzero()
-
-    # Group same-state cells by variable for stratified sampling
-    variable_to_indices = defaultdict(list)
-    variables_to_test = {v[0] for v in VARIABLES_TO_TEST}
-
-    for i in range(len(nonzero_rows)):
-        row_idx = nonzero_rows[i]
-        col_idx = nonzero_cols[i]
-        cd_idx = col_idx // n_hh
-        hh_idx = col_idx % n_hh
-        cd = test_cds[cd_idx]
-        dest_state = int(cd) // 100
-        orig_state = int(hh_states[hh_idx])
-
-        # Only include same-state cells
-        if dest_state != orig_state:
-            continue
-
-        variable = targets_df.iloc[row_idx]["variable"]
-        if variable in variables_to_test:
-            variable_to_indices[variable].append(i)
-
-    if not variable_to_indices:
-        pytest.skip("No same-state non-zero cells found for test variables")
-
-    # Stratified sampling: sample proportionally from each variable
-    samples_per_var = max(
-        1, N_VERIFICATION_SAMPLES // len(variable_to_indices)
-    )
-    sample_indices = []
-
-    for variable, indices in variable_to_indices.items():
-        n_to_sample = min(samples_per_var, len(indices))
-        sampled = rng.choice(indices, n_to_sample, replace=False)
-        sample_indices.extend(sampled)
-
-    # Cache original values per variable to avoid repeated calculations
-    original_values_cache = {}
-
-    def get_original_values(variable):
-        if variable not in original_values_cache:
-            original_values_cache[variable] = sim.calculate(
-                variable, map_to="household"
-            ).values
-        return original_values_cache[variable]
-
-    errors = []
-    variables_tested = set()
-
-    for idx in sample_indices:
-        row_idx = nonzero_rows[idx]
-        col_idx = nonzero_cols[idx]
-        cd_idx = col_idx // n_hh
-        hh_idx = col_idx % n_hh
-        variable = targets_df.iloc[row_idx]["variable"]
-        actual = float(X_sparse[row_idx, col_idx])
-
-        # Compare to ORIGINAL simulation values (ground truth)
-        original_values = get_original_values(variable)
-        expected = float(original_values[hh_idx])
-
-        variables_tested.add(variable)
-
-        if not np.isclose(actual, expected, atol=0.5):
-            errors.append(
-                {
-                    "hh_id": hh_ids[hh_idx],
-                    "hh_idx": hh_idx,
-                    "variable": variable,
-                    "actual": actual,
-                    "expected": expected,
-                    "diff": actual - expected,
-                    "rel_diff": (
-                        (actual - expected) / expected
-                        if expected != 0
-                        else np.inf
-                    ),
-                }
-            )
-
-    missing_vars = variables_to_test - variables_tested
-    if missing_vars:
-        print(f"Warning: No same-state cells found for: {missing_vars}")
-
-    assert not errors, (
-        f"Same-state verification failed: {len(errors)}/{len(sample_indices)} "
-        f"mismatches across {len(variables_tested)} variables. "
-        f"First 5: {errors[:5]}"
-    )