From 51bbc14c3ea37a66d8aaae6ba024c2be3af7e1cc Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Fri, 6 Feb 2026 15:15:37 -0700 Subject: [PATCH 01/10] Delete an unused import. --- rimport | 1 - 1 file changed, 1 deletion(-) diff --git a/rimport b/rimport index 3539cd3..4516412 100755 --- a/rimport +++ b/rimport @@ -9,7 +9,6 @@ Do `rimport --help` for more information. from __future__ import annotations import argparse -import logging import os import pwd import shutil From 7880d622e9bb6959d55886b1352b47e5c6836305 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Fri, 6 Feb 2026 15:47:28 -0700 Subject: [PATCH 02/10] rimport now does relinking step. Resolves ESMCI/inputdataTools#14. --- relink.py | 6 +- rimport | 30 ++++++- .../test_replace_files_with_symlinks.py | 36 +++++++- .../test_replace_one_file_with_symlink.py | 7 +- tests/rimport/test_check_relink_worked.py | 61 +++++++++++++ tests/rimport/test_cmdline.py | 23 +++++ tests/rimport/test_main.py | 78 +++++++++++++++++ tests/rimport/test_stage_data.py | 85 ++++++++++++++++++- 8 files changed, 310 insertions(+), 16 deletions(-) create mode 100644 tests/rimport/test_check_relink_worked.py diff --git a/relink.py b/relink.py index e9f2cc5..0335e50 100644 --- a/relink.py +++ b/relink.py @@ -222,6 +222,7 @@ def replace_files_with_symlinks( for file_path in find_owned_files_scandir( item_to_process, user_uid, inputdata_root ): + logger.info("'%s':", file_path) replace_one_file_with_symlink( inputdata_root, target_dir, file_path, dry_run=dry_run ) @@ -238,7 +239,6 @@ def replace_one_file_with_symlink(inputdata_root, target_dir, file_path, dry_run file_path (str): The path of the file to be replaced. dry_run (bool): If True, only show what would be done without making changes. """ - logger.info("'%s':", file_path) # Determine the relative path and the new link's destination relative_path = os.path.relpath(file_path, inputdata_root) @@ -270,7 +270,7 @@ def replace_one_file_with_symlink(inputdata_root, target_dir, file_path, dry_run os.rename(link_name, link_name + ".tmp") logger.info("%sDeleted original file: %s", INDENT, link_name) except OSError as e: - logger.error("%sError deleting file %s: %s. Skipping.", INDENT, link_name, e) + logger.error("%sError deleting file %s: %s. Skipping relink.", INDENT, link_name, e) return # Create the symbolic link, handling necessary parent directories @@ -283,7 +283,7 @@ def replace_one_file_with_symlink(inputdata_root, target_dir, file_path, dry_run except OSError as e: os.rename(link_name + ".tmp", link_name) logger.error( - "%sError creating symlink for %s: %s. Skipping.", INDENT, link_name, e + "%sError creating symlink for %s: %s. Skipping relink.", INDENT, link_name, e ) diff --git a/rimport b/rimport index 4516412..82d422f 100755 --- a/rimport +++ b/rimport @@ -18,6 +18,8 @@ from typing import Iterable, List from urllib.request import Request, urlopen from urllib.error import HTTPError +from relink import replace_one_file_with_symlink + import shared INDENT = shared.INDENT @@ -143,10 +145,24 @@ def normalize_paths(root: Path, relnames: Iterable[str]) -> List[Path]: return paths +def check_relink_worked(src: Path, dst: Path) -> None: + """Check whether relink worked + + Args: + src (Path): Source file (should have been converted to symlink) + dst (Path): Destination file (symlink target) + + Raises: + RuntimeError: If src is not a symlink pointing to dst. + """ + if not (src.is_symlink() and src.resolve() == dst): + raise RuntimeError("Error relinking during rimport") + + def stage_data( src: Path, inputdata_root: Path, staging_root: Path, check: bool = False ) -> None: - """Stage a file by mirroring its path under `staging_root`. + """Stage a file by mirroring its path under `staging_root`, then replace with symlink to staged. Destination path is computed by replacing the `inputdata_root` prefix of `src` with `staging_root`, i.e.: @@ -162,6 +178,7 @@ def stage_data( RuntimeError: If `src` is a live symlink pointing outside staging, or if `src` is outside the inputdata root, or if `src` is already under staging directory. RuntimeError: If `src` is a broken symlink. + RuntimeError: If it failed to replace `src` with a symlink to the staged file. FileNotFoundError: If `src` does not exist. Guardrails: @@ -199,20 +216,25 @@ def stage_data( dst = staging_root / rel if dst.exists(): - logger.info("%sFile is already published but NOT linked; do", INDENT) - logger.info("%srelink.py %s", 2 * INDENT, rel) - logger.info("%sto resolve.", INDENT) + logger.info("File is already published but NOT linked; linking now.") + replace_one_file_with_symlink(inputdata_root, staging_root, str(src)) print_can_file_be_downloaded(can_file_be_downloaded(rel, staging_root)) + check_relink_worked(src, dst) return if check: logger.info("%sFile is not already published", INDENT) return + # Copy file to destination dst.parent.mkdir(parents=True, exist_ok=True) shutil.copy2(src, dst) logger.info("%s[rimport] staged %s -> %s", INDENT, src, dst) + # Replace original with symlink to destination + replace_one_file_with_symlink(inputdata_root, staging_root, str(src)) + check_relink_worked(src, dst) + def ensure_running_as(target_user: str, argv: list[str]) -> None: """Ensure the script is running as the target user, re-executing via sudo if needed. diff --git a/tests/relink/test_replace_files_with_symlinks.py b/tests/relink/test_replace_files_with_symlinks.py index 6fa2820..3742b43 100644 --- a/tests/relink/test_replace_files_with_symlinks.py +++ b/tests/relink/test_replace_files_with_symlinks.py @@ -12,6 +12,8 @@ from unittest.mock import patch, call import pytest +import shared + # Add parent directory to path to import relink module sys.path.insert( 0, os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -20,6 +22,15 @@ import relink # noqa: E402 +@pytest.fixture(autouse=True) +def configure_logging_for_tests(): + """Configure logging for all tests in this module.""" + shared.configure_logging(logging.INFO) + yield + # Cleanup + relink.logger.handlers.clear() + + @pytest.fixture(name="mock_replace_one") def fixture_mock_replace_one(): """Fixture that mocks relink.replace_one_file_with_symlink""" @@ -27,7 +38,7 @@ def fixture_mock_replace_one(): yield mock -def test_basic_file_replacement_given_dir(temp_dirs, current_user, mock_replace_one): +def test_basic_file_replacement_given_dir(temp_dirs, current_user, mock_replace_one, caplog): """Test basic functionality: given directory, replace owned file with symlink.""" inputdata_root, target_dir = temp_dirs username = current_user @@ -55,8 +66,11 @@ def test_basic_file_replacement_given_dir(temp_dirs, current_user, mock_replace_ dry_run=False, ) + # Verify message with filename was printed + assert f"'{source_file}':" in caplog.text -def test_basic_file_replacement_given_file(temp_dirs, current_user, mock_replace_one): + +def test_basic_file_replacement_given_file(temp_dirs, current_user, mock_replace_one, caplog): """Test basic functionality: given owned file, replace with symlink.""" inputdata_root, target_dir = temp_dirs username = current_user @@ -84,8 +98,11 @@ def test_basic_file_replacement_given_file(temp_dirs, current_user, mock_replace dry_run=False, ) + # Verify message with filename was printed + assert f"'{source_file}':" in caplog.text + -def test_dry_run(temp_dirs, current_user, mock_replace_one): +def test_dry_run(temp_dirs, current_user, mock_replace_one, caplog): """Test that dry_run=True is passed correctly.""" inputdata_root, target_dir = temp_dirs username = current_user @@ -117,6 +134,9 @@ def test_dry_run(temp_dirs, current_user, mock_replace_one): dry_run=True, ) + # Verify message with filename was printed + assert f"'{source_file}':" in caplog.text + def test_nested_directory_structure(temp_dirs, current_user, mock_replace_one): """Test with nested directory structures.""" @@ -175,6 +195,9 @@ def test_skip_existing_symlinks(temp_dirs, current_user, caplog, mock_replace_on # Verify replace_one_file_with_symlink() wasn't called mock_replace_one.assert_not_called() + # Verify message with filename was NOT printed + assert f"'{source_link}':" not in caplog.text + def test_missing_target_file(temp_dirs, current_user, caplog, mock_replace_one): """Test behavior when target file doesn't exist.""" @@ -224,7 +247,7 @@ def test_invalid_username(temp_dirs, caplog, mock_replace_one): mock_replace_one.assert_not_called() -def test_multiple_files(temp_dirs, current_user, mock_replace_one): +def test_multiple_files(temp_dirs, current_user, mock_replace_one, caplog): """Test with multiple files in the directory.""" inputdata_root, target_dir = temp_dirs username = current_user @@ -251,6 +274,11 @@ def test_multiple_files(temp_dirs, current_user, mock_replace_one): calls.append(call(inputdata_root, target_dir, source_file, dry_run=False)) mock_replace_one.assert_has_calls(calls, any_order=True) + # Verify message with filename was printed + for i in range(5): + source_file = os.path.join(inputdata_root, f"file_{i}.txt") + assert f"'{source_file}':" in caplog.text + def test_multiple_files_nested(temp_dirs, current_user, mock_replace_one): """Test with multiple files scattered throughout a nested directory tree.""" diff --git a/tests/relink/test_replace_one_file_with_symlink.py b/tests/relink/test_replace_one_file_with_symlink.py index 741f9f8..2a84cd8 100644 --- a/tests/relink/test_replace_one_file_with_symlink.py +++ b/tests/relink/test_replace_one_file_with_symlink.py @@ -118,7 +118,7 @@ def test_absolute_paths(temp_dirs): os.chdir(cwd) -def test_print_found_owned_file(temp_dirs, caplog): +def test_no_print_found_owned_file(temp_dirs, caplog): """Test that message with filename is printed.""" source_dir, target_dir = temp_dirs @@ -135,9 +135,8 @@ def test_print_found_owned_file(temp_dirs, caplog): with caplog.at_level(logging.INFO): relink.replace_one_file_with_symlink(source_dir, target_dir, source_file) - # Check that message was logged - assert f"'{source_file}':" in caplog.text - assert source_file in caplog.text + # Check that message was NOT logged (should happen in replace_files_with_symlinks instead) + assert f"'{source_file}':" not in caplog.text def test_print_deleted_and_created_messages(temp_dirs, caplog): diff --git a/tests/rimport/test_check_relink_worked.py b/tests/rimport/test_check_relink_worked.py new file mode 100644 index 0000000..7e01f5b --- /dev/null +++ b/tests/rimport/test_check_relink_worked.py @@ -0,0 +1,61 @@ +""" +Tests for check_relink_worked() function in rimport script. +""" + +import os +import importlib.util +from importlib.machinery import SourceFileLoader + +import pytest + + +# Import rimport module from file without .py extension +rimport_path = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), + "rimport", +) +loader = SourceFileLoader("rimport", rimport_path) +spec = importlib.util.spec_from_loader("rimport", loader) +if spec is None: + raise ImportError(f"Could not create spec for rimport from {rimport_path}") +rimport = importlib.util.module_from_spec(spec) +# Don't add to sys.modules to avoid conflict with other test files +loader.exec_module(rimport) + +def test_ok(tmp_path): + """Check that it doesn't error if src is a symlink pointing to dst""" + # Set up + dst = tmp_path / "dst.nc" + src = tmp_path / "src.nc" + src.symlink_to(dst) + + # Shouldn't error + rimport.check_relink_worked(src, dst) + +def test_error_not_symlink(tmp_path): + """Check that it does error if src isn't a symlink""" + # Set up + dst = tmp_path / "dst.nc" + src = tmp_path / "src.nc" + + # Should error + with pytest.raises(RuntimeError) as exc_info: + rimport.check_relink_worked(src, dst) + + # Verify error message was printed + assert "Error relinking during rimport" in str(exc_info.value) + +def test_error_symlink_but_not_to_dst(tmp_path): + """Check that it does error if src is a symlink but not pointing to dst""" + # Set up + dst = tmp_path / "dst.nc" + other = tmp_path / "other.nc" + src = tmp_path / "src.nc" + src.symlink_to(other) + + # Should error + with pytest.raises(RuntimeError) as exc_info: + rimport.check_relink_worked(src, dst) + + # Verify error message was printed + assert "Error relinking during rimport" in str(exc_info.value) \ No newline at end of file diff --git a/tests/rimport/test_cmdline.py b/tests/rimport/test_cmdline.py index d209422..62edd7e 100644 --- a/tests/rimport/test_cmdline.py +++ b/tests/rimport/test_cmdline.py @@ -82,6 +82,10 @@ def test_file_option_stages_single_file( assert staged_file.exists() assert staged_file.read_text() == "test data" + # Verify file was relinked + assert test_file.is_symlink() + assert test_file.resolve() == staged_file + def test_list_option_stages_multiple_files( self, rimport_script, test_env, rimport_env ): @@ -127,6 +131,12 @@ def test_list_option_stages_multiple_files( assert (staging_root / "file1.nc").read_text() == "data1" assert (staging_root / "file2.nc").read_text() == "data2" + # Verify both files were relinked + assert file1.is_symlink() + assert file1.resolve() == (staging_root / "file1.nc") + assert file2.is_symlink() + assert file2.resolve() == (staging_root / "file2.nc") + def test_preserves_directory_structure(self, rimport_script, test_env, rimport_env): """Test that directory structure is preserved in staging.""" inputdata_root = test_env["inputdata_root"] @@ -163,6 +173,10 @@ def test_preserves_directory_structure(self, rimport_script, test_env, rimport_e assert staged_file.exists() assert staged_file.read_text() == "nested data" + # Verify file was relinked + assert nested_file.is_symlink() + assert nested_file.resolve() == staged_file + def test_error_for_nonexistent_file(self, rimport_script, test_env, rimport_env): """Test that error is reported for nonexistent file.""" inputdata_root = test_env["inputdata_root"] @@ -308,6 +322,12 @@ def test_list_with_comments_and_blanks(self, rimport_script, test_env, rimport_e assert (staging_root / "file1.nc").exists() assert (staging_root / "file2.nc").exists() + # Verify both files were relinked + assert file1.is_symlink() + assert file1.resolve() == (staging_root / "file1.nc") + assert file2.is_symlink() + assert file2.resolve() == (staging_root / "file2.nc") + def test_prints_and_exits_for_already_published_linked_file( self, rimport_script, test_env, rimport_env ): @@ -473,5 +493,8 @@ def test_check_doesnt_copy(self, rimport_script, test_env, rimport_env): staged_file = staging_root / "test.nc" assert not staged_file.exists() + # Verify file was not replaced with a symlink + assert not test_file.is_symlink() + # Verify message was printed assert "not already published" in result.stdout diff --git a/tests/rimport/test_main.py b/tests/rimport/test_main.py index b35ae2d..76575c5 100644 --- a/tests/rimport/test_main.py +++ b/tests/rimport/test_main.py @@ -367,3 +367,81 @@ def stage_data_side_effect(src, *_args, **_kwargs): captured = capsys.readouterr() # Should have 2 error messages assert captured.err.count("error processing") == 2 + + @patch.object(rimport, "replace_one_file_with_symlink") + @patch.object(rimport, "get_staging_root") + @patch.object(rimport, "normalize_paths") + @patch.object(rimport, "ensure_running_as") + def test_error_if_file_already_published_but_relink_fails( + self, + _mock_ensure_running_as, + mock_normalize_paths, + mock_get_staging_root, + _mock_replace_one_file_with_symlink, + tmp_path, + capsys, + ): + """ + Test that main() returns error code 1 if attempting to relink an already-published file + fails. + """ + inputdata_root = tmp_path / "inputdata" + inputdata_root.mkdir() + staging_root = tmp_path / "staging" + staging_root.mkdir() + + filename = "test.nc" + src = inputdata_root / filename + src.write_text("some data") + assert src.exists() + dst = staging_root / filename + dst.write_text("some data") + + mock_get_staging_root.return_value = staging_root + test_file = inputdata_root / filename + mock_normalize_paths.return_value = [test_file] + + result = rimport.main(["-inputdata", str(inputdata_root), str(src)]) + + assert result == 1 + captured = capsys.readouterr() + assert "rimport: error processing" in captured.err + assert "Error relinking during rimport" in captured.err + + @patch.object(rimport, "replace_one_file_with_symlink") + @patch.object(rimport, "get_staging_root") + @patch.object(rimport, "normalize_paths") + @patch.object(rimport, "ensure_running_as") + def test_error_if_file_newly_published_but_relink_fails( + self, + _mock_ensure_running_as, + mock_normalize_paths, + mock_get_staging_root, + _mock_replace_one_file_with_symlink, + tmp_path, + capsys, + ): + """ + Test that main() returns error code 1 if attempting to relink a newly-published file + fails. + """ + inputdata_root = tmp_path / "inputdata" + inputdata_root.mkdir() + staging_root = tmp_path / "staging" + staging_root.mkdir() + + filename = "test.nc" + src = inputdata_root / filename + src.write_text("some data") + assert src.exists() + + mock_get_staging_root.return_value = staging_root + test_file = inputdata_root / filename + mock_normalize_paths.return_value = [test_file] + + result = rimport.main(["-inputdata", str(inputdata_root), str(src)]) + + assert result == 1 + captured = capsys.readouterr() + assert "rimport: error processing" in captured.err + assert "Error relinking during rimport" in captured.err diff --git a/tests/rimport/test_stage_data.py b/tests/rimport/test_stage_data.py index 85c3bb1..fe3a957 100644 --- a/tests/rimport/test_stage_data.py +++ b/tests/rimport/test_stage_data.py @@ -68,6 +68,10 @@ def test_copies_file_to_staging(self, inputdata_root, staging_root): assert dst.exists() assert dst.read_text() == "data content" + # Verify original was replaced with symlink + assert src.is_symlink() + assert src.resolve() == dst + def test_check_doesnt_copy(self, inputdata_root, staging_root, caplog): """Test that a file is NOT copied to the staging directory if check is True""" # Create file in inputdata root @@ -80,6 +84,7 @@ def test_check_doesnt_copy(self, inputdata_root, staging_root, caplog): # Verify file was NOT copied to staging dst = staging_root / "file.nc" assert not dst.exists() + assert not src.is_symlink() # Verify message was logged assert "not already published" in caplog.text @@ -99,6 +104,10 @@ def test_preserves_directory_structure(self, inputdata_root, staging_root): assert dst.exists() assert dst.read_text() == "nested data" + # Verify original was replaced with symlink + assert src.is_symlink() + assert src.resolve() == dst + def test_prints_live_symlink_already_published_not_downloadable( self, inputdata_root, staging_root, caplog ): @@ -195,11 +204,15 @@ def test_prints_published_but_not_linked( # Verify the right messages were logged or not msg = "File is already published and linked" assert msg not in caplog.text - msg = "File is already published but NOT linked; do" + msg = "File is already published but NOT linked; linking" assert msg in caplog.text msg = "File is available for download" assert msg in caplog.text + # Verify the file got linked + assert inputdata.is_symlink() + assert inputdata.resolve() == staged + def test_raises_error_for_live_symlink_pointing_somewhere_other_than_staging( self, tmp_path, inputdata_root, staging_root ): @@ -301,6 +314,10 @@ def test_preserves_file_metadata(self, inputdata_root, staging_root): assert dst_stat.st_mtime == src_stat.st_mtime assert dst_stat.st_mode == src_stat.st_mode + # Verify the file got linked + assert src.is_symlink() + assert src.resolve() == dst + def test_handles_files_with_spaces(self, inputdata_root, staging_root): """Test handling files with spaces in names.""" # Create file with spaces in inputdata root @@ -315,6 +332,10 @@ def test_handles_files_with_spaces(self, inputdata_root, staging_root): assert dst.exists() assert dst.read_text() == "data" + # Verify the file got linked + assert src.is_symlink() + assert src.resolve() == dst + def test_handles_files_with_special_characters(self, inputdata_root, staging_root): """Test handling files with special characters.""" # Create file with special chars in inputdata root @@ -328,3 +349,65 @@ def test_handles_files_with_special_characters(self, inputdata_root, staging_roo dst = staging_root / "file-name_123@test.nc" assert dst.exists() assert dst.read_text() == "data" + + # Verify the file got linked + assert src.is_symlink() + assert src.resolve() == dst + + @patch.object(rimport, "replace_one_file_with_symlink") + def test_not_already_published_errors_if_relink_fails( + self, _mock_replace_one_file_with_symlink, inputdata_root, staging_root + ): + """ + Test that it errors if, after publishing, replace_one_file_with_symlink() doesn't make the + symlink. + """ + # Create file in inputdata root + src = inputdata_root / "file.nc" + src.write_text("data content") + + assert not inputdata_root.is_symlink() + assert not src.is_symlink() + dst = staging_root / "file.nc" + assert not dst.exists() + + # Stage the file but expect failure during relink check + with pytest.raises(RuntimeError) as exc_info: + rimport.stage_data(src, inputdata_root, staging_root) + + # Verify file was copied to staging + assert dst.exists() + assert dst.read_text() == "data content" + + # Verify error message was printed + assert "Error relinking during rimport" in str(exc_info.value) + + @patch.object(rimport, "can_file_be_downloaded") + @patch.object(rimport, "replace_one_file_with_symlink") + def test_already_published_errors_if_relink_fails( + self, + _mock_replace_one_file_with_symlink, + mock_can_file_be_downloaded, + inputdata_root, + staging_root, + ): + """ + Test that it errors if, for an already-published file, replace_one_file_with_symlink() + doesn't make the symlink. + """ + # Create a real file in staging AND in inputdata + filename = "real_file.nc" + staged = staging_root / filename + staged.write_text("data") + inputdata = inputdata_root / filename + inputdata.write_text("data") + + # Mock can_file_be_downloaded to return True + mock_can_file_be_downloaded.return_value = True + + # Expect failure during relink check + with pytest.raises(RuntimeError) as exc_info: + rimport.stage_data(inputdata, inputdata_root, staging_root) + + # Verify error message was printed + assert "Error relinking during rimport" in str(exc_info.value) From 07aedeb3bc8c2e55ee2646a022c4183b1236b314 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Fri, 6 Feb 2026 17:09:02 -0700 Subject: [PATCH 03/10] rimport: Print message about not needing to run relink.py. --- rimport | 6 +++++- tests/rimport/test_main.py | 9 +++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/rimport b/rimport index 82d422f..d940a6d 100755 --- a/rimport +++ b/rimport @@ -431,7 +431,11 @@ def main(argv: List[str] | None = None) -> int: errors += 1 logger.error("%srimport: error processing %s: %s", INDENT, p, e) - return 0 if errors == 0 else 1 + if errors: + return 1 + if not args.check: + logger.info("\nNo need to run relink.py") + return 0 if __name__ == "__main__": diff --git a/tests/rimport/test_main.py b/tests/rimport/test_main.py index 76575c5..105ab28 100644 --- a/tests/rimport/test_main.py +++ b/tests/rimport/test_main.py @@ -41,6 +41,7 @@ def test_single_file_success( mock_get_staging_root, mock_stage_data, tmp_path, + caplog, ): """Test main() logic flow when a single file stages successfully.""" # Setup @@ -62,6 +63,7 @@ def test_single_file_success( mock_stage_data.assert_called_once_with( test_file, inputdata_root, staging_root, False ) + assert "No need to run relink.py" in caplog.text @patch.object(rimport, "stage_data") @patch.object(rimport, "get_staging_root") @@ -153,6 +155,10 @@ def stage_data_side_effect(src, *_args, **_kwargs): assert "error processing" in captured.err assert "Test error for file2" in captured.err + # Check that message about not needing to run relink was NOT printed + assert "No need to run relink.py" not in captured.err + assert "No need to run relink.py" not in captured.out + @patch.object(rimport, "ensure_running_as") def test_nonexistent_inputdata_directory( self, _mock_ensure_running_as, tmp_path, capsys @@ -226,6 +232,7 @@ def test_check_mode_calls( mock_get_staging_root, mock_stage_data, tmp_path, + caplog, ): """Test that --check mode skips the user check but does call stage_data.""" inputdata_root = tmp_path / "inputdata" @@ -248,6 +255,8 @@ def test_check_mode_calls( mock_stage_data.assert_called_once_with( test_file, inputdata_root, staging_root, True ) + # Message about relink.py should not have been printed + assert "No need to run relink.py" not in caplog.text @patch.object(rimport, "stage_data") @patch.object(rimport, "get_staging_root") From 2ddfa707044ee6741947806422be3624f9c1ffa6 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Fri, 6 Feb 2026 17:09:32 -0700 Subject: [PATCH 04/10] rimport main() tests: Avoid magic booleans. --- tests/rimport/test_main.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/rimport/test_main.py b/tests/rimport/test_main.py index 105ab28..3451199 100644 --- a/tests/rimport/test_main.py +++ b/tests/rimport/test_main.py @@ -60,8 +60,9 @@ def test_single_file_success( # Verify assert result == 0 mock_normalize_paths.assert_called_once_with(inputdata_root, ["test.nc"]) + check = False mock_stage_data.assert_called_once_with( - test_file, inputdata_root, staging_root, False + test_file, inputdata_root, staging_root, check ) assert "No need to run relink.py" in caplog.text @@ -106,10 +107,11 @@ def test_file_list_success( inputdata_root, ["file1.nc", "file2.nc"] ) assert mock_stage_data.call_count == 2 + check = False mock_stage_data.assert_has_calls( [ - call(file1, inputdata_root, staging_root, False), - call(file2, inputdata_root, staging_root, False), + call(file1, inputdata_root, staging_root, check), + call(file2, inputdata_root, staging_root, check), ] ) @@ -252,8 +254,9 @@ def test_check_mode_calls( # ensure_running_as should NOT be called in check mode mock_ensure_running_as.assert_not_called() # stage_data should be called with check=True + check = True mock_stage_data.assert_called_once_with( - test_file, inputdata_root, staging_root, True + test_file, inputdata_root, staging_root, check ) # Message about relink.py should not have been printed assert "No need to run relink.py" not in caplog.text From e917dfbe72cb0cccafb38fb0694f7fe9dff4c158 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Fri, 6 Feb 2026 17:14:06 -0700 Subject: [PATCH 05/10] Reformat with black. --- relink.py | 9 +++++++-- tests/relink/test_replace_files_with_symlinks.py | 8 ++++++-- tests/rimport/test_check_relink_worked.py | 5 ++++- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/relink.py b/relink.py index 0335e50..1863433 100644 --- a/relink.py +++ b/relink.py @@ -270,7 +270,9 @@ def replace_one_file_with_symlink(inputdata_root, target_dir, file_path, dry_run os.rename(link_name, link_name + ".tmp") logger.info("%sDeleted original file: %s", INDENT, link_name) except OSError as e: - logger.error("%sError deleting file %s: %s. Skipping relink.", INDENT, link_name, e) + logger.error( + "%sError deleting file %s: %s. Skipping relink.", INDENT, link_name, e + ) return # Create the symbolic link, handling necessary parent directories @@ -283,7 +285,10 @@ def replace_one_file_with_symlink(inputdata_root, target_dir, file_path, dry_run except OSError as e: os.rename(link_name + ".tmp", link_name) logger.error( - "%sError creating symlink for %s: %s. Skipping relink.", INDENT, link_name, e + "%sError creating symlink for %s: %s. Skipping relink.", + INDENT, + link_name, + e, ) diff --git a/tests/relink/test_replace_files_with_symlinks.py b/tests/relink/test_replace_files_with_symlinks.py index 3742b43..73b0244 100644 --- a/tests/relink/test_replace_files_with_symlinks.py +++ b/tests/relink/test_replace_files_with_symlinks.py @@ -38,7 +38,9 @@ def fixture_mock_replace_one(): yield mock -def test_basic_file_replacement_given_dir(temp_dirs, current_user, mock_replace_one, caplog): +def test_basic_file_replacement_given_dir( + temp_dirs, current_user, mock_replace_one, caplog +): """Test basic functionality: given directory, replace owned file with symlink.""" inputdata_root, target_dir = temp_dirs username = current_user @@ -70,7 +72,9 @@ def test_basic_file_replacement_given_dir(temp_dirs, current_user, mock_replace_ assert f"'{source_file}':" in caplog.text -def test_basic_file_replacement_given_file(temp_dirs, current_user, mock_replace_one, caplog): +def test_basic_file_replacement_given_file( + temp_dirs, current_user, mock_replace_one, caplog +): """Test basic functionality: given owned file, replace with symlink.""" inputdata_root, target_dir = temp_dirs username = current_user diff --git a/tests/rimport/test_check_relink_worked.py b/tests/rimport/test_check_relink_worked.py index 7e01f5b..3b28068 100644 --- a/tests/rimport/test_check_relink_worked.py +++ b/tests/rimport/test_check_relink_worked.py @@ -22,6 +22,7 @@ # Don't add to sys.modules to avoid conflict with other test files loader.exec_module(rimport) + def test_ok(tmp_path): """Check that it doesn't error if src is a symlink pointing to dst""" # Set up @@ -32,6 +33,7 @@ def test_ok(tmp_path): # Shouldn't error rimport.check_relink_worked(src, dst) + def test_error_not_symlink(tmp_path): """Check that it does error if src isn't a symlink""" # Set up @@ -45,6 +47,7 @@ def test_error_not_symlink(tmp_path): # Verify error message was printed assert "Error relinking during rimport" in str(exc_info.value) + def test_error_symlink_but_not_to_dst(tmp_path): """Check that it does error if src is a symlink but not pointing to dst""" # Set up @@ -58,4 +61,4 @@ def test_error_symlink_but_not_to_dst(tmp_path): rimport.check_relink_worked(src, dst) # Verify error message was printed - assert "Error relinking during rimport" in str(exc_info.value) \ No newline at end of file + assert "Error relinking during rimport" in str(exc_info.value) From 9ae09b90e3a6631fa010b642ff37d7f76b29f804 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Fri, 6 Feb 2026 17:14:27 -0700 Subject: [PATCH 06/10] Add previous commit to .git-blame-ignore-revs. --- .git-blame-ignore-revs | 1 + 1 file changed, 1 insertion(+) diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 2ff4992..6e0f1f1 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -2,4 +2,5 @@ 235927ce73125df31ec3a0049b067afa1f0a135b 25eefd52d023f97870d8b4a27988f8fc91c3ed73 7d8c46cce63ff1b93836b72cdad74ec796b09ced +e917dfbe72cb0cccafb38fb0694f7fe9dff4c158 From 62d6b6155f2b26b6e2f6769b0a11064c5fec0a99 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Fri, 6 Feb 2026 17:18:17 -0700 Subject: [PATCH 07/10] rimport: Update help text. --- rimport | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rimport b/rimport index d940a6d..1080705 100755 --- a/rimport +++ b/rimport @@ -48,7 +48,7 @@ def build_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser( description=( f"Copy files from CESM inputdata directory ({DEFAULT_INPUTDATA_ROOT}) to a publishing" - " directory." + " directory, then replace the original with a symlink to the copy." ), add_help=False, # Disable automatic help to add custom -help flag ) From a5122679f7704d5aea71509a19e28911aa65a4ea Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Fri, 6 Feb 2026 17:18:56 -0700 Subject: [PATCH 08/10] rimport: Update docstrings. --- rimport | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/rimport b/rimport index 1080705..82c52c3 100755 --- a/rimport +++ b/rimport @@ -2,7 +2,8 @@ # TODO: Move all the Python into new file rimport.py for simpler testing. Keep rimport as a # convenience wrapper. """ -Copy files from CESM inputdata directory to a publishing directory. +Copy files from CESM inputdata directory to a publishing directory, then replace the original with a +symlink to the copy. Do `rimport --help` for more information. """ @@ -35,13 +36,6 @@ logger = shared.logger def build_parser() -> argparse.ArgumentParser: """Build and configure the argument parser for rimport. - Creates an ArgumentParser with the following options: - - Mutually exclusive required group: - --file: Import a single file (relative to inputdata directory) - --list: Import multiple files from a list file - - Optional: - --inputdata: Override the default inputdata directory - Returns: argparse.ArgumentParser: Configured parser ready to parse command-line arguments. """ @@ -375,7 +369,7 @@ def get_files_to_process(file: str, filelist: str, items_to_process: list): def main(argv: List[str] | None = None) -> int: """Main entry point for the rimport tool. - Copies files from the CESM inputdata directory to a staging/publishing directory, + Copies and relinks files from the CESM inputdata directory to a staging/publishing directory, preserving the directory structure. Ensures the script runs as the correct user (STAGE_OWNER) and handles both single files and file lists. @@ -391,7 +385,7 @@ def main(argv: List[str] | None = None) -> int: Exit Codes: 0: All files staged successfully. - 1: One or more files failed to stage (errors printed to stderr). + 1: One or more files failed to stage or relink (errors printed to stderr). 2: Fatal error (missing inputdata directory, missing file list, etc.). """ parser = build_parser() From 8914291ff3d4dfdf1a8dec277251bddcda0a0921 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Sat, 7 Feb 2026 12:40:10 -0700 Subject: [PATCH 09/10] Update readme. --- README.md | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index fa65886..41a4cd7 100644 --- a/README.md +++ b/README.md @@ -1,30 +1,28 @@ # inputdataTools -rimport used for publishing inputdata -relink.py used to remove files from $CESMDATAROOT and replace with links. +Tools used for publishing CESM input data. -Process to publish data: -The first step is to place your datafile(s) in /glade/campaign/cesm/cesmdata/inputdata following the inputdata naming convention. -When you have tested on derecho and are ready to share the new file(s) publically: +## Process to publish data +1. Place your datafile(s) in `/glade/campaign/cesm/cesmdata/inputdata/` (`$CESMDATAROOT`) following the input data naming conventions (see below). +2. When you have tested on derecho and are ready to share the new file(s) publically, run the `rimport` script. This will ask you for a password and 2FA login before it can copy the files from this "input data" directory" to the "publication" or "staging" directory. (This authentication should be possible for any member of the `cseg` group.) +3. Once that's done, `rimport` will replace the original with a link to the copy. +4. Sometime in the next 24 hours, your file should be uploaded to the GDEX server and available for download during CESM runs. -As user cesmdata run the rimport script. This requires a 2FA login, everyone in cseg should have access to the account, you will need to -contact cislhelp and request access if you are new to the group. +The `relink.py` script was previously used for step 3 above, but that functionality is now built into `rimport`. It's still there if you want to use it by itself. -As owner of the files in /glade/campaign/cesm/cesmdata/inputdata run script relink.py, this will remove the files from /glade/campaign/cesm/cesmdata/inputdata -and replace them with links to the published data location. /glade/campaign/collections/gdex/data/d651077/cesmdata/inputdata/ - -Filenames and metadata: +## Filenames and metadata: There is a good description of metadata that should be included in inputdata files here: https://www.cesm.ucar.edu/models/cam/metadata Filenames should be descriptive and should contain the date the file was created. Other information in the filename is also useful to keep as shown in the list below. Files published in inputdata should never be overwritten. + Replacement files should be different at least by creation date. Files that come from CESM simualtions should normally follow the output naming conventions from https://www.cesm.ucar.edu/models/cesm2/naming-conventions#modelOutputFilenames -Files should be placed under the appropriate directory for the component it's used or applicable for (so under lnd/clm2 for data that applies to the CLM/CTSM land model). Subdirectories under those levels should be used to seperate data by general types as needed for that component. +Files should be placed under the appropriate directory for the component it's used or applicable for (so under `lnd/clm2/` for data that applies to the CLM/CTSM land model). Subdirectories under those levels should be used to seperate data by general types as needed for that component. Some suggestions on things to include in the filename: - Spatial resolution of the gridded data - Year (or years) for which the data was observed or applicable to - Institution or project source of the data -- Creation date in the form of _cMMDDYY.nc +- Creation date in the form of `_cMMDDYY.nc` - CESM casename that was used to create the data (also simulation date for it) (see output file naming conventions above) -- Things needed to distinquish it from other similar files in inputdata (i.e. things like number of vertical levels, land-mask, number of Plant Functional Types, etc.) +- Things needed to distinquish it from other similar input files (e.g., number of vertical levels, land mask, number of Plant Functional Types, etc.) From 201090930f44d19d99b8ea444d79a04e41fe7aa3 Mon Sep 17 00:00:00 2001 From: Sam Rabin Date: Sat, 7 Feb 2026 12:43:34 -0700 Subject: [PATCH 10/10] Another readme update. --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 41a4cd7..6f4e1ab 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,9 @@ Tools used for publishing CESM input data. 3. Once that's done, `rimport` will replace the original with a link to the copy. 4. Sometime in the next 24 hours, your file should be uploaded to the GDEX server and available for download during CESM runs. -The `relink.py` script was previously used for step 3 above, but that functionality is now built into `rimport`. It's still there if you want to use it by itself. +Notes: +- Use `rimport --check` if you'd like to see the current status of a file, including whether it's available for download. +- The `relink.py` script was previously used for step 3 above, but that functionality is now built into `rimport`. It's still there if you want to use it by itself. ## Filenames and metadata: