diff --git a/.gitignore b/.gitignore index bee8a64..d8a1bb0 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ __pycache__ +.coverage diff --git a/rimport b/rimport index 55e8510..9c09239 100755 --- a/rimport +++ b/rimport @@ -15,12 +15,16 @@ import shutil import sys from pathlib import Path from typing import Iterable, List +from urllib.request import Request, urlopen +from urllib.error import HTTPError import shared DEFAULT_INPUTDATA_ROOT = Path(shared.DEFAULT_INPUTDATA_ROOT) DEFAULT_STAGING_ROOT = Path(shared.DEFAULT_STAGING_ROOT) STAGE_OWNER = "cesmdata" +INDENT = " " +INPUTDATA_URL = "https://osdf-data.gdex.ucar.edu/ncar/gdex/d651077/cesmdata/inputdata" def build_parser() -> argparse.ArgumentParser: @@ -73,6 +77,14 @@ def build_parser() -> argparse.ArgumentParser: ), ) + parser.add_argument( + "--check", + "-check", + "-c", + action="store_true", + help="Check whether file(s) is/are already published.", + ) + # Provide -help to mirror legacy behavior (in addition to -h and --help) parser.add_argument( "-h", @@ -108,33 +120,33 @@ def read_filelist(list_path: Path) -> List[str]: return lines -def resolve_paths(root: Path, relnames: Iterable[str]) -> List[Path]: - """Convert relative or absolute path names to resolved absolute Paths. +def normalize_paths(root: Path, relnames: Iterable[str]) -> List[Path]: + """Convert relative or absolute path names to normalized absolute Paths. For each name in relnames: - - If the name is relative, it is resolved relative to `root` - - If the name is already absolute, it is resolved as-is - All paths are resolved to their canonical absolute form. + - If the name is relative, it is assumed to be relative to `root` and made absolute + All paths are then normalized to their absolute form, replacing . and .. as needed. + + Note that symlinks are NOT resolved. Args: - root: Base directory for resolving relative paths. - relnames: Iterable of path names (relative or absolute) to resolve. + root: Base directory under which relative paths are assumed to be. + relnames: Iterable of path names (relative or absolute) to normalize. Returns: - List of resolved absolute Path objects. + List of normalized absolute Path objects. """ paths: List[Path] = [] for name in relnames: - p = ( - (root / name).resolve() - if not Path(name).is_absolute() - else Path(name).resolve() - ) + p = root / name if not Path(name).is_absolute() else Path(name) + p = Path(os.path.normpath(p.absolute())) paths.append(p) return paths -def stage_data(src: Path, inputdata_root: Path, staging_root: Path) -> None: +def stage_data( + src: Path, inputdata_root: Path, staging_root: Path, check: bool = False +) -> None: """Stage a file by mirroring its path under `staging_root`. Destination path is computed by replacing the `inputdata_root` prefix of `src` @@ -145,22 +157,32 @@ def stage_data(src: Path, inputdata_root: Path, staging_root: Path) -> None: src: Source file path to stage. inputdata_root: Root directory of the inputdata tree. staging_root: Root directory where files will be staged. + check: If True, just check whether the file is already published. Raises: - RuntimeError: If `src` is a live symlink (already published), or if `src` - is outside the inputdata root, or if `src` is already under staging directory. + RuntimeError: If `src` is a live symlink pointing outside staging, or if `src` is outside + the inputdata root, or if `src` is already under staging directory. RuntimeError: If `src` is a broken symlink. FileNotFoundError: If `src` does not exist. Guardrails: - * Raise if `src` is a *live* symlink ("already published"). + * Raise if `src` is a *live* symlink to a file outside staging root ("outside staging"). * Raise if `src` is a broken symlink or is outside the inputdata root. """ - if src.is_symlink() and src.exists(): - # TODO: This should be a regular message, not an error. - raise RuntimeError("File is already published.") - if src.is_symlink() and not src.exists(): - raise RuntimeError(f"Source is a broken symlink: {src}") + if src.is_symlink(): + if not os.path.exists(src.resolve()): + raise RuntimeError(f"Source is a broken symlink: {src}") + if not src.resolve().is_relative_to(staging_root.resolve()): + raise RuntimeError( + f"Source is a symlink, but target ({src.resolve()}) is outside staging directory " + f"({staging_root})" + ) + print(f"{INDENT}File is already published and linked.") + print_can_file_be_downloaded( + can_file_be_downloaded(src.resolve(), staging_root) + ) + return + if not src.exists(): raise FileNotFoundError(f"source not found: {src}") @@ -176,9 +198,23 @@ def stage_data(src: Path, inputdata_root: Path, staging_root: Path) -> None: ) from exc dst = staging_root / rel + + if dst.exists(): + print(f"{INDENT}File is already published but NOT linked; do") + print(f"{2*INDENT}relink.py {rel}") + print(f"{INDENT}to resolve.") + print_can_file_be_downloaded( + can_file_be_downloaded(rel, staging_root) + ) + return + + if check: + print(f"{INDENT}File is not already published") + return + dst.parent.mkdir(parents=True, exist_ok=True) shutil.copy2(src, dst) - print(f"[rimport] staged {src} -> {dst}") + print(f"{INDENT}[rimport] staged {src} -> {dst}") def ensure_running_as(target_user: str, argv: list[str]) -> None: @@ -235,6 +271,50 @@ def get_staging_root() -> Path: return DEFAULT_STAGING_ROOT +def can_file_be_downloaded(file_relpath: Path, staging_root: Path, timeout: float = 10): + """Check whether a file is available for download from the CESM inputdata server. + + Sends a HEAD request to the CESM inputdata URL to verify if the file exists and is + accessible without downloading the entire file. + + Args: + file_relpath: Relative path to the file (relative to staging_root), or an absolute + path that will be made relative to staging_root. + staging_root: Root directory of the staging area, used to compute relative path + if file_relpath is absolute. + timeout: Maximum time in seconds to wait for the server response. Default is 10. + + Returns: + bool: True if the file is accessible (HTTP status 2xx or 3xx), False otherwise + (including 404, network errors, timeouts, etc.). + """ + # Get URL + if file_relpath.is_absolute(): + file_relpath = file_relpath.relative_to(staging_root) + url = os.path.join(INPUTDATA_URL, file_relpath) + + # Check whether URL can be accessed + req = Request(url, method="HEAD") + try: + with urlopen(req, timeout=timeout) as resp: + return 200 <= resp.status < 400 + except HTTPError: + # Server reached, but resource doesn't exist (404, 410, etc.) + return False + + +def print_can_file_be_downloaded(file_can_be_downloaded: bool): + """Print a message indicating whether a file is available for download. + + Args: + file_can_be_downloaded: Boolean indicating if the file can be downloaded. + """ + if file_can_be_downloaded: + print(f"{INDENT}File is available for download.") + else: + print(f"{INDENT}File is not (yet) available for download.") + + def main(argv: List[str] | None = None) -> int: """Main entry point for the rimport tool. @@ -263,7 +343,7 @@ def main(argv: List[str] | None = None) -> int: # Ensure we are running as the STAGE_OWNER account before touching the tree # Set env var RIMPORT_SKIP_USER_CHECK=1 if you prefer to run `sudox -u STAGE_OWNER rimport …` # explicitly (or for testing). - if os.getenv("RIMPORT_SKIP_USER_CHECK") != "1": + if not args.check and os.getenv("RIMPORT_SKIP_USER_CHECK") != "1": ensure_running_as(STAGE_OWNER, sys.argv) root = Path(args.inputdata).expanduser().resolve() @@ -285,17 +365,18 @@ def main(argv: List[str] | None = None) -> int: return 2 # Resolve to full paths (keep accepting absolute names too) - paths = resolve_paths(root, relnames) + paths = normalize_paths(root, relnames) staging_root = get_staging_root() # Execute the new action per file errors = 0 for p in paths: + print(f"'{p}':") try: - stage_data(p, root, staging_root) + stage_data(p, root, staging_root, args.check) except Exception as e: # pylint: disable=broad-exception-caught # General Exception keeps CLI robust for batch runs errors += 1 - print(f"rimport: error processing {p}: {e}", file=sys.stderr) + print(f"{INDENT}rimport: error processing {p}: {e}", file=sys.stderr) return 0 if errors == 0 else 1 diff --git a/tests/rimport/test_build_parser.py b/tests/rimport/test_build_parser.py index ec09c30..6213dbd 100644 --- a/tests/rimport/test_build_parser.py +++ b/tests/rimport/test_build_parser.py @@ -84,6 +84,19 @@ def test_inputdata_default(self): args = parser.parse_args(["-file", "test.txt"]) assert args.inputdata == rimport.DEFAULT_INPUTDATA_ROOT + def test_check_default(self): + """Test that --check has the correct default value.""" + parser = rimport.build_parser() + args = parser.parse_args(["-file", "test.txt"]) + assert args.check is False + + @pytest.mark.parametrize("check_flag", ["-check", "-c", "--check"]) + def test_check_arguments_accepted(self, check_flag): + """Test that all check argument flags are accepted.""" + parser = rimport.build_parser() + args = parser.parse_args(["-file", "test.txt", check_flag]) + assert args.check is True + def test_inputdata_custom(self): """Test that -inputdata can be customized.""" parser = rimport.build_parser() diff --git a/tests/rimport/test_can_file_be_downloaded.py b/tests/rimport/test_can_file_be_downloaded.py new file mode 100644 index 0000000..30fcfa4 --- /dev/null +++ b/tests/rimport/test_can_file_be_downloaded.py @@ -0,0 +1,64 @@ +""" +Tests for can_file_be_downloaded() function in rimport script. +""" + +import os +import sys +import importlib.util +from importlib.machinery import SourceFileLoader +from pathlib import Path + +import pytest + +from shared import DEFAULT_STAGING_ROOT + +# Import rimport module from file without .py extension +rimport_path = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), + "rimport", +) +loader = SourceFileLoader("rimport", rimport_path) +spec = importlib.util.spec_from_loader("rimport", loader) +if spec is None: + raise ImportError(f"Could not create spec for rimport from {rimport_path}") +rimport = importlib.util.module_from_spec(spec) +sys.modules["rimport"] = rimport +loader.exec_module(rimport) + +RELPATH_THAT_DOES_EXIST = os.path.join( + "share", "meshes", "ne3pg3_ESMFmesh_c221214_cdf5.asc" +) + + +class TestCanFileBeDownloaded: + """Test suite for can_file_be_downloaded() function.""" + + @pytest.mark.skipif(not os.path.exists("/glade"), reason="This test can only run on Glade") + def test_existing_file_exists(self): + """Test that the file that should exist does. If not, other tests will definitely fail.""" + file_abspath = Path(os.path.join(DEFAULT_STAGING_ROOT, RELPATH_THAT_DOES_EXIST)) + assert file_abspath.exists() + + def test_true_abspath(self): + """Test that can_file_be_downloaded() is true for an existing file given absolute path""" + file_abspath = Path(os.path.join(DEFAULT_STAGING_ROOT, RELPATH_THAT_DOES_EXIST)) + assert rimport.can_file_be_downloaded( + file_abspath, + DEFAULT_STAGING_ROOT, + ) + + def test_true_relpath(self): + """Test that can_file_be_downloaded() is true for an existing file given relative path""" + file_relpath = Path(RELPATH_THAT_DOES_EXIST) + assert rimport.can_file_be_downloaded( + file_relpath, + DEFAULT_STAGING_ROOT, + ) + + def test_false_nonexistent(self): + """Test that can_file_be_downloaded() is false for a nonexistent file""" + file_relpath = Path("weurueridniduafnea/smfnigsroerij/msdif8ernnr.nc") + assert not rimport.can_file_be_downloaded( + file_relpath, + DEFAULT_STAGING_ROOT, + ) diff --git a/tests/rimport/test_cmdline.py b/tests/rimport/test_cmdline.py index f5e7957..d209422 100644 --- a/tests/rimport/test_cmdline.py +++ b/tests/rimport/test_cmdline.py @@ -307,3 +307,171 @@ def test_list_with_comments_and_blanks(self, rimport_script, test_env, rimport_e # Verify both files were staged assert (staging_root / "file1.nc").exists() assert (staging_root / "file2.nc").exists() + + def test_prints_and_exits_for_already_published_linked_file( + self, rimport_script, test_env, rimport_env + ): + """ + Test that stage_data returns early with msg if file already published/linked. Note that the + only thing this test does that the stage_data tests don't is to check that main() correctly + passes the unresolved symlink to normalize_paths. + """ + inputdata_root = test_env["inputdata_root"] + staging_root = test_env["staging_root"] + + # Create a real file in staging and a symlink to it in inputdata + real_file = staging_root / "real_file.nc" + real_file.write_text("data") + src = inputdata_root / "link.nc" + src.symlink_to(real_file) + + # Run rimport with -file option + command = [ + sys.executable, + rimport_script, + "-file", + "link.nc", + "-inputdata", + str(inputdata_root), + ] + + result = subprocess.run( + command, + capture_output=True, + text=True, + check=False, + env=rimport_env, + ) + + # Verify success + assert result.returncode == 0, f"Command failed: {result.stderr}" + + # Verify the right message was printed + msg = "File is already published and linked" + assert msg in result.stdout + + # Verify the WRONG message was NOT printed + msg = "is already under staging directory" + assert msg not in result.stdout + + def test_error_broken_symlink(self, rimport_script, test_env, rimport_env): + """ + Test that stage_data errors with msg if file is a link w/ nonexistent target. Note that the + only thing this test does that the stage_data tests don't is to check that main() correctly + passes the unresolved symlink to stage_data. + """ + inputdata_root = test_env["inputdata_root"] + staging_root = test_env["staging_root"] + + # Create a symlink in inputdata pointing to a nonexistent file + real_file = staging_root / "real_file.nc" + src = inputdata_root / "link.nc" + src.symlink_to(real_file) + + # Run rimport + command = [ + sys.executable, + rimport_script, + "-file", + "link.nc", + "-inputdata", + str(inputdata_root), + ] + + result = subprocess.run( + command, + capture_output=True, + text=True, + check=False, + env=rimport_env, + ) + + # Verify failure + assert result.returncode != 0, f"Command unexpectedly passed: {result.stdout}" + + # Verify the right message was printed + msg = "Source is a broken symlink" + assert msg in result.stderr + + def test_error_symlink_pointing_outside_staging( + self, rimport_script, test_env, rimport_env + ): + """ + Test that stage_data errors w/ msg if file is link w/ target outside staging. Note that the + only thing this test does that the stage_data tests don't is to check that main() correctly + passes the unresolved symlink to stage_data. + """ + inputdata_root = test_env["inputdata_root"] + tmp_path = test_env["tmp_path"] + + # Create a real file outside staging and a symlink to it in inputdata + real_file = tmp_path / "real_file.nc" + real_file.write_text("data") + src = inputdata_root / "link.nc" + src.symlink_to(real_file) + + # Run rimport + command = [ + sys.executable, + rimport_script, + "-file", + "link.nc", + "-inputdata", + str(inputdata_root), + ] + + result = subprocess.run( + command, + capture_output=True, + text=True, + check=False, + env=rimport_env, + ) + + # Verify failure + assert result.returncode != 0, f"Command unexpectedly passed: {result.stdout}" + + # Verify the right message was printed + msg = "is outside staging directory" + assert msg in result.stderr + + def test_check_doesnt_copy(self, rimport_script, test_env, rimport_env): + """Test that a file is NOT copied to the staging directory if check is True.""" + inputdata_root = test_env["inputdata_root"] + staging_root = test_env["staging_root"] + + # Create a file in inputdata + test_file = inputdata_root / "test.nc" + test_file.write_text("test data") + + # Make sure --check skips ensure_running_as() + del rimport_env["RIMPORT_SKIP_USER_CHECK"] + + # Run rimport with --check option + command = [ + sys.executable, + rimport_script, + "-file", + "test.nc", + "-inputdata", + str(inputdata_root), + "--check", + ] + + result = subprocess.run( + command, + capture_output=True, + text=True, + check=False, + env=rimport_env, + ) + + # Verify success + assert result.returncode == 0, f"Command failed: {result.stderr}" + + # Verify file was not staged + staged_file = staging_root / "test.nc" + assert not staged_file.exists() + + # Verify message was printed + assert "not already published" in result.stdout diff --git a/tests/rimport/test_resolve_paths.py b/tests/rimport/test_normalize_paths.py similarity index 77% rename from tests/rimport/test_resolve_paths.py rename to tests/rimport/test_normalize_paths.py index 6a8a780..594569a 100644 --- a/tests/rimport/test_resolve_paths.py +++ b/tests/rimport/test_normalize_paths.py @@ -1,5 +1,5 @@ """ -Tests for resolve_paths() function in rimport script. +Tests for normalize_paths() function in rimport script. """ import os @@ -33,11 +33,11 @@ def fixture_root(tmp_path): class TestResolvePaths: - """Test suite for resolve_paths() function.""" + """Test suite for normalize_paths() function.""" def test_single_relative_path(self, root): """Test resolving a single relative path.""" - result = rimport.resolve_paths(root, ["file1.nc"]) + result = rimport.normalize_paths(root, ["file1.nc"]) assert len(result) == 1 assert result[0] == (root / "file1.nc").resolve() @@ -45,7 +45,7 @@ def test_single_relative_path(self, root): def test_multiple_relative_paths(self, root): """Test resolving multiple relative paths.""" relnames = ["file1.nc", "file2.nc", "file3.nc"] - result = rimport.resolve_paths(root, relnames) + result = rimport.normalize_paths(root, relnames) assert len(result) == 3 assert result[0] == (root / "file1.nc").resolve() @@ -55,7 +55,7 @@ def test_multiple_relative_paths(self, root): def test_nested_relative_paths(self, root): """Test resolving nested relative paths.""" relnames = ["dir1/file1.nc", "dir2/subdir/file2.nc"] - result = rimport.resolve_paths(root, relnames) + result = rimport.normalize_paths(root, relnames) assert len(result) == 2 assert result[0] == (root / "dir1" / "file1.nc").resolve() @@ -66,7 +66,7 @@ def test_absolute_path(self, tmp_path, root): abs_path = tmp_path / "other" / "file.nc" relnames = [str(abs_path)] - result = rimport.resolve_paths(root, relnames) + result = rimport.normalize_paths(root, relnames) assert len(result) == 1 assert result[0] == abs_path.resolve() @@ -76,7 +76,7 @@ def test_mixed_relative_and_absolute(self, tmp_path, root): abs_path = tmp_path / "other" / "file.nc" relnames = ["file1.nc", str(abs_path), "dir/file2.nc"] - result = rimport.resolve_paths(root, relnames) + result = rimport.normalize_paths(root, relnames) assert len(result) == 3 assert result[0] == (root / "file1.nc").resolve() @@ -85,7 +85,7 @@ def test_mixed_relative_and_absolute(self, tmp_path, root): def test_empty_list(self, root): """Test with empty list of names.""" - result = rimport.resolve_paths(root, []) + result = rimport.normalize_paths(root, []) assert len(result) == 0 assert result == [] @@ -93,7 +93,7 @@ def test_empty_list(self, root): def test_paths_with_spaces(self, root): """Test paths with spaces in names.""" relnames = ["file with spaces.nc", "dir with spaces/file.nc"] - result = rimport.resolve_paths(root, relnames) + result = rimport.normalize_paths(root, relnames) assert len(result) == 2 assert result[0] == (root / "file with spaces.nc").resolve() @@ -102,7 +102,7 @@ def test_paths_with_spaces(self, root): def test_paths_with_special_characters(self, root): """Test paths with special characters.""" relnames = ["file-name_123.nc", "dir@test/file.nc"] - result = rimport.resolve_paths(root, relnames) + result = rimport.normalize_paths(root, relnames) assert len(result) == 2 assert result[0] == (root / "file-name_123.nc").resolve() @@ -110,7 +110,7 @@ def test_paths_with_special_characters(self, root): def test_returns_path_objects(self, root): """Test that result contains Path objects.""" - result = rimport.resolve_paths(root, ["file.nc"]) + result = rimport.normalize_paths(root, ["file.nc"]) assert len(result) == 1 assert isinstance(result[0], Path) @@ -118,9 +118,20 @@ def test_returns_path_objects(self, root): def test_resolves_dot_and_dotdot(self, root): """Test that . and .. are resolved.""" relnames = ["./file1.nc", "dir/../file2.nc", "dir/./file3.nc"] - result = rimport.resolve_paths(root, relnames) + result = rimport.normalize_paths(root, relnames) assert len(result) == 3 assert result[0] == (root / "file1.nc").resolve() assert result[1] == (root / "file2.nc").resolve() assert result[2] == (root / "dir" / "file3.nc").resolve() + + def test_abs_symlink_unchanged(self, root): + """Test that an absolute path to a symlink is unchanged""" + # Create a real file in staging and a symlink to it in inputdata + real_file = root / "real_file.nc" + real_file.write_text("data") + symlink = root / "link.nc" + symlink.symlink_to(real_file) + + result = rimport.normalize_paths(root, [symlink]) + assert result[0] == symlink diff --git a/tests/rimport/test_stage_data.py b/tests/rimport/test_stage_data.py index f8f607a..d24e769 100644 --- a/tests/rimport/test_stage_data.py +++ b/tests/rimport/test_stage_data.py @@ -6,6 +6,7 @@ import sys import importlib.util from importlib.machinery import SourceFileLoader +from unittest.mock import patch import pytest @@ -56,6 +57,22 @@ def test_copies_file_to_staging(self, inputdata_root, staging_root): assert dst.exists() assert dst.read_text() == "data content" + def test_check_doesnt_copy(self, inputdata_root, staging_root, capsys): + """Test that a file is NOT copied to the staging directory if check is True""" + # Create file in inputdata root + src = inputdata_root / "file.nc" + src.write_text("data content") + + # Check the file + rimport.stage_data(src, inputdata_root, staging_root, check=True) + + # Verify file was NOT copied to staging + dst = staging_root / "file.nc" + assert not dst.exists() + + # Verify message was printed + assert "not already published" in capsys.readouterr().out.strip() + def test_preserves_directory_structure(self, inputdata_root, staging_root): """Test that directory structure is preserved in staging.""" # Create nested file in inputdata root @@ -71,18 +88,120 @@ def test_preserves_directory_structure(self, inputdata_root, staging_root): assert dst.exists() assert dst.read_text() == "nested data" - def test_raises_error_for_live_symlink( + def test_prints_live_symlink_already_published_not_downloadable( + self, inputdata_root, staging_root, capsys + ): + """ + Test that staging a live, already-published symlink prints a message and returns + immediately without copying anything. Should say it's not available for download. + """ + # Create a real file in staging and a symlink to it in inputdata + real_file = staging_root / "real_file.nc" + real_file.write_text("data") + src = inputdata_root / "link.nc" + src.symlink_to(real_file) + + # Mock shutil.copy2 to verify it's never called + with patch("shutil.copy2") as mock_copy: + # Should print message for live symlink and return early + rimport.stage_data(src, inputdata_root, staging_root) + + # Verify the right messages were printed + stdout = capsys.readouterr().out.strip() + msg = "File is already published and linked" + assert msg in stdout + msg = "File is not (yet) available for download" + assert msg in stdout + + # Verify the WRONG message was NOT printed + msg = "is already under staging directory" + assert msg not in stdout + + # Verify that shutil.copy2 was never called (function returned early) + mock_copy.assert_not_called() + + def test_prints_live_symlink_already_published_is_downloadable( + self, inputdata_root, staging_root, capsys + ): + """ + Like test_prints_live_symlink_already_published_not_downloadable, but mocks + can_file_be_downloaded() to test "is available for download" message. + """ + # Create a real file in staging and a symlink to it in inputdata + real_file = staging_root / "real_file.nc" + real_file.write_text("data") + src = inputdata_root / "link.nc" + src.symlink_to(real_file) + + # Mock shutil.copy2 to verify it's never called + with patch("shutil.copy2") as mock_copy: + # Mock can_file_be_downloaded to return True + with patch("rimport.can_file_be_downloaded", return_value=True): + # Should print message for live symlink and return early + rimport.stage_data(src, inputdata_root, staging_root) + + # Verify that shutil.copy2 was never called (function returned early) + mock_copy.assert_not_called() + + # Verify the right messages were printed + stdout = capsys.readouterr().out.strip() + msg = "File is already published and linked" + assert msg in stdout + msg = "File is available for download" + assert msg in stdout + + # Verify the WRONG message was NOT printed + msg = "is already under staging directory" + assert msg not in stdout + + def test_prints_published_but_not_linked( + self, inputdata_root, staging_root, capsys + ): + """ + Tests printed message for when a file has been published (copied to staging root) but not + yet linked (inputdata version replaced with symlink to staging version). + """ + # Create a real file in staging AND in inputdata + filename = "real_file.nc" + staged = staging_root / filename + staged.write_text("data") + inputdata = inputdata_root / filename + inputdata.write_text("data") + + # Mock shutil.copy2 to verify it's never called + with patch("shutil.copy2") as mock_copy: + # Mock can_file_be_downloaded to return True + with patch("rimport.can_file_be_downloaded", return_value=True): + # Should print message for live symlink and return early + rimport.stage_data(inputdata, inputdata_root, staging_root) + + # Verify that shutil.copy2 was never called (function returned early) + mock_copy.assert_not_called() + + # Verify the right messages were printed or not + stdout = capsys.readouterr().out.strip() + msg = "File is already published and linked" + assert msg not in stdout + msg = "File is already published but NOT linked; do" + assert msg in stdout + msg = "File is available for download" + assert msg in stdout + + def test_raises_error_for_live_symlink_pointing_somewhere_other_than_staging( self, tmp_path, inputdata_root, staging_root ): - """Test that staging a live symlink raises RuntimeError.""" - # Create a real file and a symlink to it + """ + Test that staging a live symlink that points to somewhere other than staging directory + raises RuntimeError with accurate message. + """ + # Create a real file outside the staging directory and a symlink to it real_file = tmp_path / "real_file.nc" real_file.write_text("data") src = inputdata_root / "link.nc" src.symlink_to(real_file) # Should raise RuntimeError for live symlink - with pytest.raises(RuntimeError, match="already published"): + with pytest.raises(RuntimeError, match="outside staging directory"): rimport.stage_data(src, inputdata_root, staging_root) def test_raises_error_for_broken_symlink( @@ -94,7 +213,7 @@ def test_raises_error_for_broken_symlink( src.symlink_to(tmp_path / "nonexistent.nc") # Should raise RuntimeError for broken symlink - with pytest.raises(RuntimeError, match="broken symlink"): + with pytest.raises(RuntimeError, match="Source is a broken symlink"): rimport.stage_data(src, inputdata_root, staging_root) def test_raises_error_for_nonexistent_file(self, inputdata_root, staging_root): @@ -136,7 +255,7 @@ def test_raises_error_for_file_outside_inputdata_root_with_special_str( rimport.stage_data(src, inputdata_root, staging_root) def test_raises_error_for_already_published_file( - self, tmp_path, inputdata_root, staging_root + self, inputdata_root, staging_root ): """Test that staging an already published file raises RuntimeError.""" # Create a file in staging_root