From c8ddf471f5ba46ed86332325469d348081d3947e Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Mon, 23 Feb 2026 07:00:56 -0500 Subject: [PATCH 1/2] fix: upload datasets to public HuggingFace repo The push workflow uploads to policyengine/policyengine-uk-data-private but policyengine-uk downloads from policyengine/policyengine-uk-data. This means new columns (like highest_education) never reach downstream consumers. Fix by also uploading to the public repo. Co-Authored-By: Claude Opus 4.6 --- changelog_entry.yaml | 4 ++++ .../storage/upload_completed_datasets.py | 11 ++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29bb..41dc06271 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,4 @@ +- bump: patch + changes: + fixed: + - Upload datasets to public HuggingFace repo (policyengine/policyengine-uk-data) in addition to private repo, so policyengine-uk gets the latest data. diff --git a/policyengine_uk_data/storage/upload_completed_datasets.py b/policyengine_uk_data/storage/upload_completed_datasets.py index b110bb6e8..4fe0a0df0 100644 --- a/policyengine_uk_data/storage/upload_completed_datasets.py +++ b/policyengine_uk_data/storage/upload_completed_datasets.py @@ -1,5 +1,8 @@ from policyengine_uk_data.storage import STORAGE_FOLDER -from policyengine_uk_data.utils.data_upload import upload_data_files +from policyengine_uk_data.utils.data_upload import ( + upload_data_files, + upload_files_to_hf, +) def upload_datasets(): @@ -21,6 +24,12 @@ def upload_datasets(): gcs_bucket_name="policyengine-uk-data-private", ) + # Also upload to the public repo consumed by policyengine-uk + upload_files_to_hf( + files=dataset_files, + hf_repo_name="policyengine/policyengine-uk-data", + ) + if __name__ == "__main__": upload_datasets() From b7f8e3072cee81fef3b9dc270129c1eb58fd9622 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Mon, 23 Feb 2026 07:43:20 -0500 Subject: [PATCH 2/2] fix: pass version parameter to public repo upload upload_files_to_hf requires a version parameter for tagging. Without it the upload would fail with TypeError at runtime. Co-Authored-By: Claude Opus 4.6 --- policyengine_uk_data/storage/upload_completed_datasets.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/policyengine_uk_data/storage/upload_completed_datasets.py b/policyengine_uk_data/storage/upload_completed_datasets.py index 4fe0a0df0..144cf6cf0 100644 --- a/policyengine_uk_data/storage/upload_completed_datasets.py +++ b/policyengine_uk_data/storage/upload_completed_datasets.py @@ -1,3 +1,5 @@ +from importlib import metadata + from policyengine_uk_data.storage import STORAGE_FOLDER from policyengine_uk_data.utils.data_upload import ( upload_data_files, @@ -17,6 +19,8 @@ def upload_datasets(): if not file_path.exists(): raise ValueError(f"File {file_path} does not exist.") + version = metadata.version("policyengine-uk-data") + upload_data_files( files=dataset_files, hf_repo_name="policyengine/policyengine-uk-data-private", @@ -27,6 +31,7 @@ def upload_datasets(): # Also upload to the public repo consumed by policyengine-uk upload_files_to_hf( files=dataset_files, + version=version, hf_repo_name="policyengine/policyengine-uk-data", )