From 711493ef1a88e937bf927252f75261beb12bbd16 Mon Sep 17 00:00:00 2001 From: Bastien Orivel Date: Wed, 1 Apr 2026 18:24:21 +0200 Subject: [PATCH] Percent encode artifact names in fetch-content Previously, trying to use fetch-content for an artifact that had a space in it would result in errors like `Download failed: URL can't contain control characters. '[...]' (found at least ' ')` This commit changes fetch-content so we encode the artifact name when building the download URL instead of putting it verbatim in the URL. Because we're later reusing the URL to get the base name to know which file to write, we now decode the basename so the resulting file doesn't contain percent encoded characters. Before: ``` $ export MOZ_FETCHES='[{"task":"FNpL6lwoTVyuWE3wgNeuUw","artifact":"public/Twilight Princess-0.2.3.apworld","extract":false}]' $ uv run python3 src/taskgraph/run-task/fetch-content task-artifacts -d /tmp/fetch-test attempt 1/5 Downloading https://taskcluster.bananium.fr/api/queue/v1/task/DBFKDCtCSi6gg35xPqZHOA/artifacts/public/Twilight Princess-0.3.0.apworld to /tmp/fetch-test/Twilight Princess-0.3.0.apworld Downloading https://taskcluster.bananium.fr/api/queue/v1/task/DBFKDCtCSi6gg35xPqZHOA/artifacts/public/Twilight Princess-0.3.0.apworld Download failed: URL can't contain control characters. '/api/queue/v1/task/DBFKDCtCSi6gg35xPqZHOA/artifacts/public/Twilight Princess-0.3.0.apworld' (found at least ' ') sleeping for 60.00s (attempt 1/5) ``` After: ``` $ export MOZ_FETCHES='[{"task":"FNpL6lwoTVyuWE3wgNeuUw","artifact":"public/Twilight Princess-0.2.3.apworld","extract":false}]' $ uv run python3 src/taskgraph/run-task/fetch-content task-artifacts -d /tmp/fetch-test attempt 1/5 Downloading https://taskcluster.bananium.fr/api/queue/v1/task/DBFKDCtCSi6gg35xPqZHOA/artifacts/public/Twilight%20Princess-0.3.0.apworld to /tmp/fetch-test/Twilight Princess-0.3.0.apworld Downloading https://taskcluster.bananium.fr/api/queue/v1/task/DBFKDCtCSi6gg35xPqZHOA/artifacts/public/Twilight%20Princess-0.3.0.apworld https://taskcluster.bananium.fr/api/queue/v1/task/DBFKDCtCSi6gg35xPqZHOA/artifacts/public/Twilight%20Princess-0.3.0.apworld resolved to 155321 bytes with sha256 77c8ccca3da7b040b94a7c8b4c68dc5234556dbc1c5b51f65afc19655f6d00ab in 0.486s PERFHERDER_DATA: {"framework": {"name": "build_metrics"}, "suites": [{"name": "fetch_content", "value": 0.4895111569785513, "lowerIsBetter": true, "shouldAlert": false, "subtests": []}]} ``` As another workaround, users could percent encode the artifact name in the fetch config but that would result with the filename on disk still having the percent encoded characters in it and be confusing because the artifact in the config wouldn't match what the task actually has. i.e: MOZ_FETCHES='[{"task":"FNpL6lwoTVyuWE3wgNeuUw","artifact":"public/Twilight%20Princess-0.2.3.apworld","extract":false}]' --- src/taskgraph/run-task/fetch-content | 7 +-- test/test_scripts_fetch_content.py | 80 ++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 3 deletions(-) diff --git a/src/taskgraph/run-task/fetch-content b/src/taskgraph/run-task/fetch-content index 596723029..85aaf7463 100755 --- a/src/taskgraph/run-task/fetch-content +++ b/src/taskgraph/run-task/fetch-content @@ -626,7 +626,7 @@ def fetch_and_extract(url, dest_dir, extract=True, sha256=None, size=None): the destination directory. """ - basename = urllib.parse.urlparse(url).path.split("/")[-1] + basename = urllib.parse.unquote(urllib.parse.urlparse(url).path.split("/")[-1]) dest_path = dest_dir / basename download_to_path(url, dest_path, sha256=sha256, size=size) @@ -920,16 +920,17 @@ def command_task_artifacts(args): sha256 = None if fetch.get("verify-hash"): sha256 = get_hash(fetch, root_url) + encoded_artifact = urllib.parse.quote(fetch["artifact"]) if fetch["artifact"].startswith("public/"): path = "task/{task}/artifacts/{artifact}".format( - task=fetch["task"], artifact=fetch["artifact"] + task=fetch["task"], artifact=encoded_artifact ) url = api(root_url, "queue", "v1", path) else: url = ("{proxy_url}/api/queue/v1/task/{task}/artifacts/{artifact}").format( proxy_url=os.environ["TASKCLUSTER_PROXY_URL"], task=fetch["task"], - artifact=fetch["artifact"], + artifact=encoded_artifact, ) downloads.append((url, extdir, fetch["extract"], sha256)) diff --git a/test/test_scripts_fetch_content.py b/test/test_scripts_fetch_content.py index ea7efe44f..9ebf7943d 100644 --- a/test/test_scripts_fetch_content.py +++ b/test/test_scripts_fetch_content.py @@ -1,3 +1,4 @@ +import json import os import pathlib import urllib.request @@ -97,6 +98,85 @@ def getheader(field): raise +@pytest.mark.parametrize( + "artifact,expected_url_suffix", + ( + pytest.param( + "public/foo.apworld", + "task/abc123/artifacts/public/foo.apworld", + id="simple artifact name", + ), + pytest.param( + "public/Twilight Princess-0.2.3.apworld", + "task/abc123/artifacts/public/Twilight%20Princess-0.2.3.apworld", + id="artifact name with space", + ), + ), +) +def test_command_task_artifacts_url_encoding( + monkeypatch, + tmp_path, + fetch_content_mod, + artifact, + expected_url_suffix, +): + fetches = [{"task": "abc123", "artifact": artifact, "extract": False}] + monkeypatch.setenv("MOZ_FETCHES", json.dumps(fetches)) + monkeypatch.setenv("TASKCLUSTER_ROOT_URL", "https://tc.example.com") + + captured_urls = [] + + def mock_fetch_urls(downloads): + for url, dest_dir, extract, sha256 in downloads: + captured_urls.append(url) + + monkeypatch.setattr(fetch_content_mod, "fetch_urls", mock_fetch_urls) + + args = MagicMock() + args.dest = str(tmp_path) + fetch_content_mod.command_task_artifacts(args) + + assert len(captured_urls) == 1 + url = captured_urls[0] + assert url == f"https://tc.example.com/api/queue/v1/{expected_url_suffix}" + + +@pytest.mark.parametrize( + "url,expected_dest_filename", + ( + pytest.param( + "https://tc.example.com/api/queue/v1/task/abc/artifacts/public/foo.apworld", + "foo.apworld", + id="simple", + ), + pytest.param( + "https://tc.example.com/api/queue/v1/task/abc/artifacts/public/Twilight%20Princess-0.2.3.apworld", + "Twilight Princess-0.2.3.apworld", + id="url-encoded space", + ), + ), +) +def test_fetch_and_extract_dest_filename( + monkeypatch, + tmp_path, + fetch_content_mod, + url, + expected_dest_filename, +): + downloaded_to = [] + + def mock_download_to_path(url, path, sha256=None, size=None): + downloaded_to.append(path) + path.touch() + + monkeypatch.setattr(fetch_content_mod, "download_to_path", mock_download_to_path) + + fetch_content_mod.fetch_and_extract(url, tmp_path, extract=False) + + assert len(downloaded_to) == 1 + assert downloaded_to[0].name == expected_dest_filename + + @pytest.mark.parametrize( "expected,orig,dest,strip_components,add_prefix", [