diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29b..41dc0627 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,4 @@ +- bump: patch + changes: + fixed: + - Upload datasets to public HuggingFace repo (policyengine/policyengine-uk-data) in addition to private repo, so policyengine-uk gets the latest data. diff --git a/policyengine_uk_data/storage/upload_completed_datasets.py b/policyengine_uk_data/storage/upload_completed_datasets.py index b110bb6e..144cf6cf 100644 --- a/policyengine_uk_data/storage/upload_completed_datasets.py +++ b/policyengine_uk_data/storage/upload_completed_datasets.py @@ -1,5 +1,10 @@ +from importlib import metadata + from policyengine_uk_data.storage import STORAGE_FOLDER -from policyengine_uk_data.utils.data_upload import upload_data_files +from policyengine_uk_data.utils.data_upload import ( + upload_data_files, + upload_files_to_hf, +) def upload_datasets(): @@ -14,6 +19,8 @@ def upload_datasets(): if not file_path.exists(): raise ValueError(f"File {file_path} does not exist.") + version = metadata.version("policyengine-uk-data") + upload_data_files( files=dataset_files, hf_repo_name="policyengine/policyengine-uk-data-private", @@ -21,6 +28,13 @@ def upload_datasets(): gcs_bucket_name="policyengine-uk-data-private", ) + # Also upload to the public repo consumed by policyengine-uk + upload_files_to_hf( + files=dataset_files, + version=version, + hf_repo_name="policyengine/policyengine-uk-data", + ) + if __name__ == "__main__": upload_datasets()