From d4cea676e998b291961f0f0b953a7de7b7a35940 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Tue, 10 Mar 2026 20:05:54 +0900 Subject: [PATCH 01/12] wip wip --- python/private/pypi/parse_simpleapi_html.bzl | 24 ++- python/private/pypi/simpleapi_download.bzl | 175 +++++++++++-------- 2 files changed, 123 insertions(+), 76 deletions(-) diff --git a/python/private/pypi/parse_simpleapi_html.bzl b/python/private/pypi/parse_simpleapi_html.bzl index 563130791e..78669d5ff8 100644 --- a/python/private/pypi/parse_simpleapi_html.bzl +++ b/python/private/pypi/parse_simpleapi_html.bzl @@ -16,16 +16,20 @@ Parse SimpleAPI HTML in Starlark. """ +load("//python/private:normalize_name.bzl", "normalize_name") load(":version_from_filename.bzl", "version_from_filename") -def parse_simpleapi_html(*, content): +def parse_simpleapi_html(*, content, parse_index = False): """Get the package URLs for given shas by parsing the Simple API HTML. Args: - content(str): The Simple API HTML content. + content: {type}`str` The Simple API HTML content. + parse_index: {type}`bool` whether to parse the content as the index page of the PyPI index, + e.g. the `https://pypi.org/simple/`. This only has the URLs for the individual package. Returns: - A list of structs with: + If it is the index page, return the map of package to URL it can be queried from. + Otherwise, a list of structs with: * filename: {type}`str` The filename of the artifact. * version: {type}`str` The version of the artifact. * url: {type}`str` The URL to download the artifact. @@ -59,6 +63,8 @@ def parse_simpleapi_html(*, content): # https://packaging.python.org/en/latest/specifications/simple-repository-api/#versioning-pypi-s-simple-api fail("Unsupported API version: {}".format(api_version)) + packages = {} + # 2. Iterate using find() to avoid huge list allocations from .split("": { + # "": "", + # } + # } + # } + download = read_simpleapi( + ctx = ctx, + attr = attr, + url = urllib.strip_empty_path_segments("{index_url}/".format( + index_url = index_url, + )), + parse_index = True, + versions = None, + block = block, + allow_fail = False, + **kwargs + ) + if hasattr(download, "wait"): + downloads[index_url] = download + else: + results[index_url] = download + + for index_url, download in downloads.items(): + results[index_url] = download.wait() + + found_on_index = {} + for index_url, result in results.items(): + sources = [pkg for pkg in attr.sources if pkg not in found_on_index] + + available_packages = result.output + sources = [pkg for pkg in sources if normalize_name(pkg) in available_packages] + found_on_index.update({ + pkg: urllib.absolute_url(index_url, available_packages[normalize_name(pkg)]) + for pkg in sources + }) + + failed_sources = [pkg for pkg in attr.sources if pkg not in found_on_index] if failed_sources: pkg_index_urls = { pkg: index_url_overrides.get( @@ -148,7 +191,7 @@ def simpleapi_download( _fail( """ -Failed to download metadata of the following packages from urls: +Failed to find packages on PyPI of the following packages from urls: {pkg_index_urls} If you would like to skip downloading metadata for these packages please add 'simpleapi_skip={failed_sources}' to your 'pip.parse' call. @@ -159,22 +202,9 @@ If you would like to skip downloading metadata for these packages please add 'si ) return None - if warn_overrides: - index_url_overrides = { - pkg: found_on_index[pkg] - for pkg in attr.sources - if found_on_index[pkg] != attr.index_url - } - - if index_url_overrides: - # buildifier: disable=print - print("You can use the following `index_url_overrides` to avoid the 404 warnings:\n{}".format( - render.dict(index_url_overrides), - )) - - return contents + return {normalize_name(pkg): url for pkg, url in found_on_index.items()} -def _read_simpleapi(ctx, url, attr, cache, versions, get_auth = None, **download_kwargs): +def _read_simpleapi(ctx, url, attr, cache, versions, get_auth = None, parse_index = False, **download_kwargs): """Read SimpleAPI. Args: @@ -189,6 +219,7 @@ def _read_simpleapi(ctx, url, attr, cache, versions, get_auth = None, **download cache: {type}`struct` the `pypi_cache` instance. versions: {type}`list[str] The versions that have been requested. get_auth: A function to get auth information. Used in tests. + parse_index: TODO **download_kwargs: Any extra params to ctx.download. Note that output and auth will be passed for you. @@ -242,6 +273,7 @@ def _read_simpleapi(ctx, url, attr, cache, versions, get_auth = None, **download output = output, cache = cache, cache_key = cache_key, + parse_index = parse_index, ), ) @@ -251,15 +283,16 @@ def _read_simpleapi(ctx, url, attr, cache, versions, get_auth = None, **download output = output, cache = cache, cache_key = cache_key, + parse_index = parse_index, ) -def _read_index_result(ctx, *, result, output, cache, cache_key): +def _read_index_result(ctx, *, result, output, cache, cache_key, parse_index): if not result.success: return struct(success = False) content = ctx.read(output) - output = parse_simpleapi_html(content = content) + output = parse_simpleapi_html(content = content, parse_index = parse_index) if output: cache.setdefault(cache_key, output) return struct(success = True, output = output) From 0107a54801377f16ea1caf6e206076d201d13aec Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Fri, 20 Mar 2026 13:09:51 +0900 Subject: [PATCH 02/12] add facts --- python/private/pypi/pypi_cache.bzl | 51 +++++++++++++++ python/private/pypi/simpleapi_download.bzl | 73 ++++++++-------------- 2 files changed, 76 insertions(+), 48 deletions(-) diff --git a/python/private/pypi/pypi_cache.bzl b/python/private/pypi/pypi_cache.bzl index 28c6cbeafb..747bf6a7a1 100644 --- a/python/private/pypi/pypi_cache.bzl +++ b/python/private/pypi/pypi_cache.bzl @@ -122,6 +122,15 @@ def _filter_packages(dists, requested_versions): if dists == None or not requested_versions: return dists + if type(dists) == "dict": + pkgs = requested_versions + filtered = { + pkg: url + for pkg, url in dists.items() + if pkg in pkgs + } + return filtered + sha256s_by_version = {} whls = {} sdists = {} @@ -193,6 +202,12 @@ def _get_from_facts(facts, known_facts, index_url, requested_versions, facts_ver # cannot trust known facts, different version that we know how to parse return None + if type(requested_versions) == "dict": + return _filter_packages( + dists = known_facts.get("index_urls", {}).get(index_url, {}), + requested_versions = requested_versions, + ) + known_sources = {} root_url, _, distribution = index_url.rstrip("/").rpartition("/") @@ -266,10 +281,46 @@ def _store_facts(facts, fact_version, index_url, value): facts["fact_version"] = fact_version + if type(value) == "dict": + # facts: { + # "index_urls": { + # "": { + # "": "", + # }, + # }, + # }, + for pkg, url in value.items(): + facts.setdefault("index_urls", {}).setdefault(index_url, {}).setdefault(pkg, url) + return value + root_url, _, distribution = index_url.rstrip("/").rpartition("/") distribution = distribution.rstrip("/") root_url = root_url.rstrip("/") + # The schema is + # facts: { + # "dist_hashes": { + # "": { + # "": { + # "": "", + # }, + # }, + # }, + # "dist_filenames": { + # "": { + # "": { + # "": "", # if it is different from the URL + # }, + # }, + # }, + # "dist_yanked": { + # "": { + # "": { + # "": "", # if the package is yanked + # }, + # }, + # }, + # }, for sha256, d in (value.sdists | value.whls).items(): facts.setdefault("dist_hashes", {}).setdefault(root_url, {}).setdefault(distribution, {}).setdefault(d.url, sha256) if not d.url.endswith(d.filename): diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl index f15d835a48..a1dd447e1e 100644 --- a/python/private/pypi/simpleapi_download.bzl +++ b/python/private/pypi/simpleapi_download.bzl @@ -75,6 +75,11 @@ def simpleapi_download( for p, i in (attr.index_url_overrides or {}).items() } + sources = { + normalize_name(pkg): versions + for pkg, versions in attr.sources.items() + } + # NOTE @aignas 2024-03-31: we are not merging results from multiple indexes # to replicate how `pip` would handle this case. contents = {} @@ -83,8 +88,9 @@ def simpleapi_download( dist_urls = _get_dist_urls( ctx, - index_urls, - index_url_overrides, + index_urls = index_urls, + index_url_overrides = index_url_overrides, + sources = sources, read_simpleapi = read_simpleapi, cache = cache, get_auth = get_auth, @@ -95,11 +101,6 @@ def simpleapi_download( ctx.report_progress("Fetch package lists from PyPI index") - sources = { - normalize_name(pkg): versions - for pkg, versions in attr.sources.items() - } - downloads = {} contents = {} for pkg, url in dist_urls.items(): @@ -125,29 +126,10 @@ def simpleapi_download( return contents -def _get_dist_urls(ctx, index_urls, index_url_overrides, read_simpleapi, *, attr, block, _fail = fail, **kwargs): - if index_url_overrides: - first_index = index_urls[0] - return { - pkg: urllib.strip_empty_path_segments("{index_url}/{distribution}/".format( - index_url = index_url_overrides.get(normalize_name(pkg), first_index).rstrip("/"), - distribution = pkg, - )) - for pkg in attr.sources - } - +def _get_dist_urls(ctx, *, index_urls, index_url_overrides, sources, read_simpleapi, attr, block, _fail = fail, **kwargs): downloads = {} results = {} for index_url in index_urls: - # TODO @aignas 2026-03-20: pull from the cache/facts - # we can store the following schema: - # facts: { - # "index_urls": { - # "": { - # "": "", - # } - # } - # } download = read_simpleapi( ctx = ctx, attr = attr, @@ -155,7 +137,7 @@ def _get_dist_urls(ctx, index_urls, index_url_overrides, read_simpleapi, *, attr index_url = index_url, )), parse_index = True, - versions = None, + versions = {pkg: None for pkg in sources}, block = block, allow_fail = False, **kwargs @@ -170,25 +152,25 @@ def _get_dist_urls(ctx, index_urls, index_url_overrides, read_simpleapi, *, attr found_on_index = {} for index_url, result in results.items(): - sources = [pkg for pkg in attr.sources if pkg not in found_on_index] - - available_packages = result.output - sources = [pkg for pkg in sources if normalize_name(pkg) in available_packages] + # Filter out the things that we have already found found_on_index.update({ - pkg: urllib.absolute_url(index_url, available_packages[normalize_name(pkg)]) + pkg: urllib.absolute_url(index_url, result.output[pkg]) for pkg in sources }) + sources = [ + pkg + for pkg in sources + if pkg not in found_on_index + ] - failed_sources = [pkg for pkg in attr.sources if pkg not in found_on_index] - if failed_sources: + if sources: pkg_index_urls = { - pkg: index_url_overrides.get( - normalize_name(pkg), - index_urls, - ) - for pkg in failed_sources + pkg: index_url_overrides.get(pkg, index_urls) + for pkg in sources } + # TODO @aignas 2026-03-20: we haven't found these pkgs on the index, so we can + # print a warning, or we can fallback to PyPI. For now let's fail _fail( """ Failed to find packages on PyPI of the following packages from urls: @@ -196,13 +178,13 @@ Failed to find packages on PyPI of the following packages from urls: If you would like to skip downloading metadata for these packages please add 'simpleapi_skip={failed_sources}' to your 'pip.parse' call. """.format( - pkg_index_urls = render.dict(pkg_index_urls), - failed_sources = render.list(failed_sources), + pkg_index_urls = render.dict(dict(sorted(pkg_index_urls.items()))), + failed_sources = render.list(sources), ), ) return None - return {normalize_name(pkg): url for pkg, url in found_on_index.items()} + return found_on_index def _read_simpleapi(ctx, url, attr, cache, versions, get_auth = None, parse_index = False, **download_kwargs): """Read SimpleAPI. @@ -227,11 +209,6 @@ def _read_simpleapi(ctx, url, attr, cache, versions, get_auth = None, parse_inde A similar object to what `download` would return except that in result.out will be the parsed simple api contents. """ - # NOTE @aignas 2024-03-31: some of the simple APIs use relative URLs for - # the whl location and we cannot handle multiple URLs at once by passing - # them to ctx.download if we want to correctly handle the relative URLs. - # TODO: Add a test that env subbed index urls do not leak into the lock file. - real_url = urllib.strip_empty_path_segments(envsubst(url, attr.envsubst, ctx.getenv)) cache_key = (url, real_url, versions) From 0492f31707349406229ffc12109dd6eab82bcea5 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Fri, 20 Mar 2026 13:48:46 +0900 Subject: [PATCH 03/12] finish POC --- python/private/pypi/pypi_cache.bzl | 11 ++++++----- python/private/pypi/simpleapi_download.bzl | 4 +--- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/python/private/pypi/pypi_cache.bzl b/python/private/pypi/pypi_cache.bzl index 747bf6a7a1..bc92de0bde 100644 --- a/python/private/pypi/pypi_cache.bzl +++ b/python/private/pypi/pypi_cache.bzl @@ -89,6 +89,9 @@ def _pypi_cache_get(self, key): if not cached and versions: # Could not get from in-memory, read from lockfile facts cached = self._facts.get(index_url, versions) + else: + # TODO @aignas 2026-03-20: add a test here + self._facts.setdefault(index_url, cached) return cached @@ -123,13 +126,11 @@ def _filter_packages(dists, requested_versions): return dists if type(dists) == "dict": - pkgs = requested_versions - filtered = { + return { pkg: url for pkg, url in dists.items() - if pkg in pkgs + if pkg in requested_versions } - return filtered sha256s_by_version = {} whls = {} @@ -290,7 +291,7 @@ def _store_facts(facts, fact_version, index_url, value): # }, # }, for pkg, url in value.items(): - facts.setdefault("index_urls", {}).setdefault(index_url, {}).setdefault(pkg, url) + facts.setdefault("index_urls", {}).setdefault(index_url, {})[pkg] = url return value root_url, _, distribution = index_url.rstrip("/").rpartition("/") diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl index a1dd447e1e..b8caacef82 100644 --- a/python/private/pypi/simpleapi_download.bzl +++ b/python/private/pypi/simpleapi_download.bzl @@ -82,13 +82,11 @@ def simpleapi_download( # NOTE @aignas 2024-03-31: we are not merging results from multiple indexes # to replicate how `pip` would handle this case. - contents = {} - index_urls = [attr.index_url] + attr.extra_index_urls read_simpleapi = read_simpleapi or _read_simpleapi dist_urls = _get_dist_urls( ctx, - index_urls = index_urls, + index_urls = [attr.index_url] + attr.extra_index_urls, index_url_overrides = index_url_overrides, sources = sources, read_simpleapi = read_simpleapi, From b77854d0e28543f54b3763e05d5ae059581f152f Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Fri, 20 Mar 2026 16:10:07 +0900 Subject: [PATCH 04/12] remove a warning --- python/private/pypi/simpleapi_download.bzl | 31 +++++----------------- 1 file changed, 6 insertions(+), 25 deletions(-) diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl index b8caacef82..a73dc4538a 100644 --- a/python/private/pypi/simpleapi_download.bzl +++ b/python/private/pypi/simpleapi_download.bzl @@ -19,7 +19,6 @@ A file that houses private functions used in the `bzlmod` extension with the sam load("//python/private:auth.bzl", _get_auth = "get_auth") load("//python/private:envsubst.bzl", "envsubst") load("//python/private:normalize_name.bzl", "normalize_name") -load("//python/private:text_util.bzl", "render") load(":parse_simpleapi_html.bzl", "parse_simpleapi_html") load(":urllib.bzl", "urllib") @@ -150,37 +149,19 @@ def _get_dist_urls(ctx, *, index_urls, index_url_overrides, sources, read_simple found_on_index = {} for index_url, result in results.items(): - # Filter out the things that we have already found - found_on_index.update({ - pkg: urllib.absolute_url(index_url, result.output[pkg]) - for pkg in sources - }) sources = [ pkg for pkg in sources if pkg not in found_on_index ] - if sources: - pkg_index_urls = { - pkg: index_url_overrides.get(pkg, index_urls) + # Filter out the things that we have already found + found_on_index.update({ + pkg: urllib.absolute_url(index_url, result.output[pkg]) for pkg in sources - } - - # TODO @aignas 2026-03-20: we haven't found these pkgs on the index, so we can - # print a warning, or we can fallback to PyPI. For now let's fail - _fail( - """ -Failed to find packages on PyPI of the following packages from urls: -{pkg_index_urls} - -If you would like to skip downloading metadata for these packages please add 'simpleapi_skip={failed_sources}' to your 'pip.parse' call. -""".format( - pkg_index_urls = render.dict(dict(sorted(pkg_index_urls.items()))), - failed_sources = render.list(sources), - ), - ) - return None + # TODO @aignas 2026-03-20: add a test here + if index_url_overrides.get(pkg, index_url) + }) return found_on_index From cb97d74fd86804ec4ab629b90f92e212ac947f46 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Fri, 20 Mar 2026 16:21:02 +0900 Subject: [PATCH 05/12] add a test --- python/private/pypi/pypi_cache.bzl | 1 - python/private/pypi/simpleapi_download.bzl | 9 +++--- tests/pypi/pypi_cache/pypi_cache_tests.bzl | 33 ++++++++++++++++++++++ 3 files changed, 38 insertions(+), 5 deletions(-) diff --git a/python/private/pypi/pypi_cache.bzl b/python/private/pypi/pypi_cache.bzl index bc92de0bde..2d2418c6ae 100644 --- a/python/private/pypi/pypi_cache.bzl +++ b/python/private/pypi/pypi_cache.bzl @@ -90,7 +90,6 @@ def _pypi_cache_get(self, key): # Could not get from in-memory, read from lockfile facts cached = self._facts.get(index_url, versions) else: - # TODO @aignas 2026-03-20: add a test here self._facts.setdefault(index_url, cached) return cached diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl index a73dc4538a..de12b9d675 100644 --- a/python/private/pypi/simpleapi_download.bzl +++ b/python/private/pypi/simpleapi_download.bzl @@ -73,16 +73,17 @@ def simpleapi_download( normalize_name(p): i for p, i in (attr.index_url_overrides or {}).items() } - sources = { normalize_name(pkg): versions for pkg, versions in attr.sources.items() } - # NOTE @aignas 2024-03-31: we are not merging results from multiple indexes - # to replicate how `pip` would handle this case. read_simpleapi = read_simpleapi or _read_simpleapi + ctx.report_progress("Fetch package lists from PyPI index") + + # NOTE: we are not merging results from multiple indexes to replicate how `pip` would + # handle this case. What we do is we select a particular index to download the packages dist_urls = _get_dist_urls( ctx, index_urls = [attr.index_url] + attr.extra_index_urls, @@ -96,7 +97,7 @@ def simpleapi_download( _fail = _fail, ) - ctx.report_progress("Fetch package lists from PyPI index") + ctx.report_progress("Fetching package URLs from PyPI index") downloads = {} contents = {} diff --git a/tests/pypi/pypi_cache/pypi_cache_tests.bzl b/tests/pypi/pypi_cache/pypi_cache_tests.bzl index 7b6168ce7b..3cf01c7450 100644 --- a/tests/pypi/pypi_cache/pypi_cache_tests.bzl +++ b/tests/pypi/pypi_cache/pypi_cache_tests.bzl @@ -155,6 +155,39 @@ def _test_pypi_cache_writes_to_facts(env): "fact_version": "v1", # Facts version }) + # When we get the other items cached in memory, they get written to facts + got = cache.get((key[0], key[1], ["1.1.0"])) + got.whls().contains_exactly({ + "sha_whl_2": fake_result.whls["sha_whl_2"], + }) + got.sdists().contains_exactly({}) + got.sha256s_by_version().contains_exactly({ + "1.1.0": fake_result.sha256s_by_version["1.1.0"], + }) + + # Then when we get facts at the end + cache.get_facts().contains_exactly({ + "dist_hashes": { + # We are not using the real index URL, because we may have credentials in here + "https://{PYPI_INDEX_URL}": { + "pkg": { + "https://pypi.org/files/pkg-1.0.0-py3-none-any.whl": "sha_whl", + "https://pypi.org/files/pkg-1.0.0.tar.gz": "sha_sdist", + "https://pypi.org/files/pkg-1.1.0-py3-none-any.whl": "sha_whl_2", + }, + }, + }, + "dist_yanked": { + "https://{PYPI_INDEX_URL}": { + "pkg": { + "sha_sdist": "", + "sha_whl": "Security issue", + }, + }, + }, + "fact_version": "v1", # Facts version + }) + _tests.append(_test_pypi_cache_writes_to_facts) def _test_pypi_cache_reads_from_facts(env): From 7e71a5898037d6aee581bf039f8019c11bcbc836 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Fri, 20 Mar 2026 16:25:15 +0900 Subject: [PATCH 06/12] add a test --- .../parse_simpleapi_html_tests.bzl | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/pypi/parse_simpleapi_html/parse_simpleapi_html_tests.bzl b/tests/pypi/parse_simpleapi_html/parse_simpleapi_html_tests.bzl index f72d61371c..933a0783f2 100644 --- a/tests/pypi/parse_simpleapi_html/parse_simpleapi_html_tests.bzl +++ b/tests/pypi/parse_simpleapi_html/parse_simpleapi_html_tests.bzl @@ -42,6 +42,29 @@ def _generate_html(*items): ]), ) +def _test_index(env): + # buildifier: disable=unsorted-dict-items + tests = [ + ( + [ + struct(attrs = ['href="/simple/foo/"'], filename = "foo"), + struct(attrs = ['href="./b-ar/"'], filename = "b-._.-aR"), + ], + { + "b_ar": "./b-ar/", + "foo": "/simple/foo/", + }, + ), + ] + + for (input, want) in tests: + html = _generate_html(*input) + got = parse_simpleapi_html(content = html, parse_index = True) + + env.expect.that_dict(got).contains_exactly(want) + +_tests.append(_test_index) + def _test_sdist(env): # buildifier: disable=unsorted-dict-items tests = [ From 1cd90d76c1fac9758776899e119bd83a9b795c23 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sat, 21 Mar 2026 23:42:25 +0900 Subject: [PATCH 07/12] fix index override handling and fix a few tests --- python/private/pypi/parse_simpleapi_html.bzl | 2 +- python/private/pypi/simpleapi_download.bzl | 38 ++-- .../simpleapi_download_tests.bzl | 162 +++++++----------- 3 files changed, 82 insertions(+), 120 deletions(-) diff --git a/python/private/pypi/parse_simpleapi_html.bzl b/python/private/pypi/parse_simpleapi_html.bzl index 78669d5ff8..83b006ffd2 100644 --- a/python/private/pypi/parse_simpleapi_html.bzl +++ b/python/private/pypi/parse_simpleapi_html.bzl @@ -132,7 +132,7 @@ def parse_simpleapi_html(*, content, parse_index = False): else: sdists[sha256] = dist - if packages: + if parse_index: return packages return struct( diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl index de12b9d675..55e11e6fd6 100644 --- a/python/private/pypi/simpleapi_download.bzl +++ b/python/private/pypi/simpleapi_download.bzl @@ -110,6 +110,7 @@ def simpleapi_download( versions = sources[pkg], get_auth = get_auth, block = not parallel_download, + parse_index = False, ) if hasattr(result, "wait"): # We will process it in a separate loop: @@ -127,6 +128,10 @@ def simpleapi_download( def _get_dist_urls(ctx, *, index_urls, index_url_overrides, sources, read_simpleapi, attr, block, _fail = fail, **kwargs): downloads = {} results = {} + for extra in index_url_overrides.values(): + if extra not in index_urls: + index_urls.append(extra) + for index_url in index_urls: download = read_simpleapi( ctx = ctx, @@ -137,7 +142,6 @@ def _get_dist_urls(ctx, *, index_urls, index_url_overrides, sources, read_simple parse_index = True, versions = {pkg: None for pkg in sources}, block = block, - allow_fail = False, **kwargs ) if hasattr(download, "wait"): @@ -150,23 +154,27 @@ def _get_dist_urls(ctx, *, index_urls, index_url_overrides, sources, read_simple found_on_index = {} for index_url, result in results.items(): - sources = [ - pkg - for pkg in sources - if pkg not in found_on_index - ] - - # Filter out the things that we have already found - found_on_index.update({ - pkg: urllib.absolute_url(index_url, result.output[pkg]) - for pkg in sources - # TODO @aignas 2026-03-20: add a test here - if index_url_overrides.get(pkg, index_url) - }) + for pkg in sources: + if pkg in found_on_index: + # We have already found the package, skip + continue + + if index_url_overrides.get(pkg, index_url) != index_url: + # we should not use this index for the package + continue + + if not hasattr(result.output, "get"): + fail(result.output) + + found = result.output.get(pkg) + if not found: + continue + + found_on_index[pkg] = urllib.absolute_url(index_url, found) return found_on_index -def _read_simpleapi(ctx, url, attr, cache, versions, get_auth = None, parse_index = False, **download_kwargs): +def _read_simpleapi(ctx, url, attr, cache, versions, parse_index, get_auth = None, **download_kwargs): """Read SimpleAPI. Args: diff --git a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl index 9a6b7ca5af..8d8a26dd4e 100644 --- a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl +++ b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl @@ -23,26 +23,30 @@ _tests = [] def _test_simple(env): calls = [] - def read_simpleapi(ctx, url, versions, attr, cache, get_auth, block, allow_fail): - _ = ctx, attr, cache, get_auth, versions # buildifier: disable=unused-variable - env.expect.that_bool(block).equals(False) - env.expect.that_bool(allow_fail).equals(True) - calls.append(url) - if "foo" in url and "main" in url: + def read_simpleapi(ctx, url, versions, attr, cache, get_auth, block, parse_index): + if parse_index: return struct( - output = "", - success = False, - ) - else: - return struct( - output = struct( - sdists = {"deadbeef": url.strip("/").split("/")[-1]}, - whls = {"deadb33f": url.strip("/").split("/")[-1]}, - sha256s_by_version = {"fizz": url.strip("/").split("/")[-1]}, - ), success = True, + output = { + "bar": "/bar/", + "baz": "/baz/", + } if "main" in url else { + "foo": "/foo/", + }, ) + _ = ctx, attr, cache, get_auth, versions # buildifier: disable=unused-variable + env.expect.that_bool(block).equals(False) + calls.append(url) + return struct( + output = struct( + sdists = {"deadbeef": url.strip("/").split("/")[-1]}, + whls = {"deadb33f": url.strip("/").split("/")[-1]}, + sha256s_by_version = {"fizz": url.strip("/").split("/")[-1]}, + ), + success = True, + ) + contents = simpleapi_download( ctx = struct( getenv = {}.get, @@ -50,8 +54,8 @@ def _test_simple(env): ), attr = struct( index_url_overrides = {}, - index_url = "main", - extra_index_urls = ["extra"], + index_url = "https://main.com", + extra_index_urls = ["https://extra.com"], sources = {"bar": None, "baz": None, "foo": None}, envsubst = [], ), @@ -61,26 +65,25 @@ def _test_simple(env): ) env.expect.that_collection(calls).contains_exactly([ - "extra/foo/", - "main/bar/", - "main/baz/", - "main/foo/", + "https://extra.com/foo/", + "https://main.com/bar/", + "https://main.com/baz/", ]) env.expect.that_dict(contents).contains_exactly({ "bar": struct( - index_url = "main/bar/", + index_url = "https://main.com/bar/", sdists = {"deadbeef": "bar"}, sha256s_by_version = {"fizz": "bar"}, whls = {"deadb33f": "bar"}, ), "baz": struct( - index_url = "main/baz/", + index_url = "https://main.com/baz/", sdists = {"deadbeef": "baz"}, sha256s_by_version = {"fizz": "baz"}, whls = {"deadb33f": "baz"}, ), "foo": struct( - index_url = "extra/foo/", + index_url = "https://extra.com/foo/", sdists = {"deadbeef": "foo"}, sha256s_by_version = {"fizz": "foo"}, whls = {"deadb33f": "foo"}, @@ -89,85 +92,25 @@ def _test_simple(env): _tests.append(_test_simple) -def _test_fail(env): +def _test_index_overrides(env): calls = [] fails = [] - def read_simpleapi(ctx, url, versions, attr, cache, get_auth, block, allow_fail): - _ = ctx, attr, cache, get_auth, versions # buildifier: disable=unused-variable - env.expect.that_bool(block).equals(False) - env.expect.that_bool(allow_fail).equals(True) - calls.append(url) - if "foo" in url: + def read_simpleapi(ctx, *, url, versions, attr, cache, get_auth, block, parse_index): + if parse_index: return struct( - output = "", - success = False, - ) - if "bar" in url: - return struct( - output = "", - success = False, - ) - else: - return struct( - output = struct( - sdists = {}, - whls = {}, - sha256s_by_version = {}, - ), success = True, + output = { + "Baz": "/baz/", # let's test normalization + "bar": "/bar/", + "foo": "/foo-should-fail/", + } if "main" in url else { + "foo": "/foo/", + }, ) - simpleapi_download( - ctx = struct( - getenv = {}.get, - report_progress = lambda _: None, - ), - attr = struct( - index_url_overrides = {}, - index_url = "main", - extra_index_urls = ["extra"], - sources = {"bar": None, "baz": None, "foo": None}, - envsubst = [], - ), - cache = pypi_cache(), - parallel_download = True, - read_simpleapi = read_simpleapi, - _fail = fails.append, - ) - - env.expect.that_collection(fails).contains_exactly([ - """ -Failed to download metadata of the following packages from urls: -{ - "bar": ["main", "extra"], - "foo": ["main", "extra"], -} - -If you would like to skip downloading metadata for these packages please add 'simpleapi_skip=[ - "bar", - "foo", -]' to your 'pip.parse' call. -""", - ]) - env.expect.that_collection(calls).contains_exactly([ - "main/foo/", - "main/bar/", - "main/baz/", - "extra/foo/", - "extra/bar/", - ]) - -_tests.append(_test_fail) - -def _test_allow_fail_single_index(env): - calls = [] - fails = [] - - def read_simpleapi(ctx, *, url, versions, attr, cache, get_auth, block, allow_fail): _ = ctx, attr, cache, get_auth, versions # buildifier: disable=unused-variable env.expect.that_bool(block).equals(False) - env.expect.that_bool(allow_fail).equals(False) calls.append(url) return struct( output = struct( @@ -185,9 +128,9 @@ def _test_allow_fail_single_index(env): ), attr = struct( index_url_overrides = { - "foo": "extra", + "foo": "https://extra.com", }, - index_url = "main", + index_url = "https://main.com", extra_index_urls = [], sources = {"bar": None, "baz": None, "foo": None}, envsubst = [], @@ -200,32 +143,32 @@ def _test_allow_fail_single_index(env): env.expect.that_collection(fails).contains_exactly([]) env.expect.that_collection(calls).contains_exactly([ - "main/bar/", - "main/baz/", - "extra/foo/", + "https://main.com/bar/", + "https://main.com/baz/", + "https://extra.com/foo/", ]) env.expect.that_dict(contents).contains_exactly({ "bar": struct( - index_url = "main/bar/", + index_url = "https://main.com/bar/", sdists = {"deadbeef": "bar"}, sha256s_by_version = {"fizz": "bar"}, whls = {"deadb33f": "bar"}, ), "baz": struct( - index_url = "main/baz/", + index_url = "https://main.com/baz/", sdists = {"deadbeef": "baz"}, sha256s_by_version = {"fizz": "baz"}, whls = {"deadb33f": "baz"}, ), "foo": struct( - index_url = "extra/foo/", + index_url = "https://extra.com/foo/", sdists = {"deadbeef": "foo"}, sha256s_by_version = {"fizz": "foo"}, whls = {"deadb33f": "foo"}, ), }) -_tests.append(_test_allow_fail_single_index) +_tests.append(_test_index_overrides) def _test_download_url(env): downloads = {} @@ -233,6 +176,17 @@ def _test_download_url(env): def download(url, output, **kwargs): _ = kwargs # buildifier: disable=unused-variable downloads[url[0]] = output + + if len(downloads) == 1: + return struct( + success = True, + output = """ + bar + baz + foo + """, + ) + return struct(success = True) simpleapi_download( From 3eb7adf3e592085f8e550db8c4ef64d5e0e12836 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sun, 22 Mar 2026 00:35:04 +0900 Subject: [PATCH 08/12] handle envsubst when reading the index --- python/private/pypi/simpleapi_download.bzl | 12 ++-- python/private/pypi/urllib.bzl | 2 +- .../simpleapi_download_tests.bzl | 67 ++++++++++++++----- 3 files changed, 59 insertions(+), 22 deletions(-) diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl index 55e11e6fd6..5a633e9915 100644 --- a/python/private/pypi/simpleapi_download.bzl +++ b/python/private/pypi/simpleapi_download.bzl @@ -163,14 +163,18 @@ def _get_dist_urls(ctx, *, index_urls, index_url_overrides, sources, read_simple # we should not use this index for the package continue - if not hasattr(result.output, "get"): - fail(result.output) - found = result.output.get(pkg) if not found: continue - found_on_index[pkg] = urllib.absolute_url(index_url, found) + # The spec says that we should be able to reach the thing via `/`, + # so let's extract that + found, _, part = found.rpartition("/") + if not part: + _, _, part = found.rpartition("/") + found_on_index[pkg] = urllib.strip_empty_path_segments( + "{}/{}/".format(index_url, part), + ) return found_on_index diff --git a/python/private/pypi/urllib.bzl b/python/private/pypi/urllib.bzl index ca6ded76b1..ea4cd32cc9 100644 --- a/python/private/pypi/urllib.bzl +++ b/python/private/pypi/urllib.bzl @@ -3,7 +3,7 @@ def _get_root_directory(url): scheme_end = url.find("://") if scheme_end == -1: - fail("Invalid URL format") + fail("Invalid URL format: '{}'".format(url)) scheme = url[:scheme_end] host_end = url.find("/", scheme_end + 3) diff --git a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl index 8d8a26dd4e..25494505ba 100644 --- a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl +++ b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl @@ -101,8 +101,8 @@ def _test_index_overrides(env): return struct( success = True, output = { - "Baz": "/baz/", # let's test normalization "bar": "/bar/", + "baz": "/baz/", "foo": "/foo-should-fail/", } if "main" in url else { "foo": "/foo/", @@ -172,21 +172,21 @@ _tests.append(_test_index_overrides) def _test_download_url(env): downloads = {} + reads = [ + # The first read is the index which seeds the downloads later + """ + bar + baz + foo + """, + "", + "", + "", + ] def download(url, output, **kwargs): _ = kwargs # buildifier: disable=unused-variable downloads[url[0]] = output - - if len(downloads) == 1: - return struct( - success = True, - output = """ - bar - baz - foo - """, - ) - return struct(success = True) simpleapi_download( @@ -194,14 +194,16 @@ def _test_download_url(env): getenv = {}.get, download = download, report_progress = lambda _: None, - read = lambda i: "contents of " + i, + # We will first add a download to the list, so this is a poor man's `next(foo)` + # implementation + read = lambda i: reads[len(downloads) - 1], path = lambda i: "path/for/" + i, ), attr = struct( index_url_overrides = {}, index_url = "https://example.com/main/simple/", extra_index_urls = [], - sources = {"bar": None, "baz": None, "foo": None}, + sources = {"bar": ["1.0"], "baz": ["1.0"], "foo": ["1.0"]}, envsubst = [], ), cache = pypi_cache(), @@ -210,6 +212,7 @@ def _test_download_url(env): ) env.expect.that_dict(downloads).contains_exactly({ + "https://example.com/main/simple/": "path/for/https___example_com_main_simple.html", "https://example.com/main/simple/bar/": "path/for/https___example_com_main_simple_bar.html", "https://example.com/main/simple/baz/": "path/for/https___example_com_main_simple_baz.html", "https://example.com/main/simple/foo/": "path/for/https___example_com_main_simple_foo.html", @@ -219,6 +222,18 @@ _tests.append(_test_download_url) def _test_download_url_parallel(env): downloads = {} + reads = [ + # The first read is the index which seeds the downloads later + """ + bar + baz + foo + """, + "", + "", + "", + "", + ] def download(url, output, **kwargs): _ = kwargs # buildifier: disable=unused-variable @@ -230,13 +245,15 @@ def _test_download_url_parallel(env): getenv = {}.get, download = download, report_progress = lambda _: None, - read = lambda i: "contents of " + i, + # We will first add a download to the list, so this is a poor man's `next(foo)` + # implementation. We use 2 because we will enqueue 2 downloads in parallel. + read = lambda i: reads[len(downloads) - 2], path = lambda i: "path/for/" + i, ), attr = struct( index_url_overrides = {}, index_url = "https://example.com/main/simple/", - extra_index_urls = [], + extra_index_urls = ["https://example.com/extra/simple/"], sources = {"bar": None, "baz": None, "foo": None}, envsubst = [], ), @@ -246,6 +263,8 @@ def _test_download_url_parallel(env): ) env.expect.that_dict(downloads).contains_exactly({ + "https://example.com/extra/simple/": "path/for/https___example_com_extra_simple.html", + "https://example.com/main/simple/": "path/for/https___example_com_main_simple.html", "https://example.com/main/simple/bar/": "path/for/https___example_com_main_simple_bar.html", "https://example.com/main/simple/baz/": "path/for/https___example_com_main_simple_baz.html", "https://example.com/main/simple/foo/": "path/for/https___example_com_main_simple_foo.html", @@ -255,6 +274,17 @@ _tests.append(_test_download_url_parallel) def _test_download_envsubst_url(env): downloads = {} + reads = [ + # The first read is the index which seeds the downloads later + """ + bar + baz + foo + """, + "", + "", + "", + ] def download(url, output, **kwargs): _ = kwargs # buildifier: disable=unused-variable @@ -266,7 +296,9 @@ def _test_download_envsubst_url(env): getenv = {"INDEX_URL": "https://example.com/main/simple/"}.get, download = download, report_progress = lambda _: None, - read = lambda i: "contents of " + i, + # We will first add a download to the list, so this is a poor man's `next(foo)` + # implementation + read = lambda i: reads[len(downloads) - 1], path = lambda i: "path/for/" + i, ), attr = struct( @@ -282,6 +314,7 @@ def _test_download_envsubst_url(env): ) env.expect.that_dict(downloads).contains_exactly({ + "https://example.com/main/simple/": "path/for/~index_url~.html", "https://example.com/main/simple/bar/": "path/for/~index_url~_bar.html", "https://example.com/main/simple/baz/": "path/for/~index_url~_baz.html", "https://example.com/main/simple/foo/": "path/for/~index_url~_foo.html", From fdafe7d6af50bc6bf3fab251be61facc46fbde96 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sun, 22 Mar 2026 00:36:08 +0900 Subject: [PATCH 09/12] handle envsubst when reading the index --- python/private/pypi/simpleapi_download.bzl | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl index 5a633e9915..0f1ea9557d 100644 --- a/python/private/pypi/simpleapi_download.bzl +++ b/python/private/pypi/simpleapi_download.bzl @@ -169,12 +169,14 @@ def _get_dist_urls(ctx, *, index_urls, index_url_overrides, sources, read_simple # The spec says that we should be able to reach the thing via `/`, # so let's extract that + parts = [index_url] found, _, part = found.rpartition("/") + parts.append(part) if not part: _, _, part = found.rpartition("/") - found_on_index[pkg] = urllib.strip_empty_path_segments( - "{}/{}/".format(index_url, part), - ) + parts.append(part) + + found_on_index[pkg] = urllib.strip_empty_path_segments("/".join(parts)) return found_on_index From c3b68993789c817d2491a655c94bbe732d8399ae Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sun, 22 Mar 2026 00:47:44 +0900 Subject: [PATCH 10/12] Ensure the URL construction for dist is robust enough --- python/private/pypi/simpleapi_download.bzl | 16 ++++++---------- .../simpleapi_download_tests.bzl | 19 ++++++++++--------- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl index 0f1ea9557d..e559196638 100644 --- a/python/private/pypi/simpleapi_download.bzl +++ b/python/private/pypi/simpleapi_download.bzl @@ -167,16 +167,12 @@ def _get_dist_urls(ctx, *, index_urls, index_url_overrides, sources, read_simple if not found: continue - # The spec says that we should be able to reach the thing via `/`, - # so let's extract that - parts = [index_url] - found, _, part = found.rpartition("/") - parts.append(part) - if not part: - _, _, part = found.rpartition("/") - parts.append(part) - - found_on_index[pkg] = urllib.strip_empty_path_segments("/".join(parts)) + # Ignore the URL here because we know how to construct it. + + found_on_index[pkg] = urllib.strip_empty_path_segments("{}/{}/".format( + index_url, + pkg.replace("_", "-"), # Use the official normalization for URLs + )) return found_on_index diff --git a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl index 25494505ba..2ab4063952 100644 --- a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl +++ b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl @@ -101,8 +101,9 @@ def _test_index_overrides(env): return struct( success = True, output = { + # normalized + "ba_z": "/ba-z/", "bar": "/bar/", - "baz": "/baz/", "foo": "/foo-should-fail/", } if "main" in url else { "foo": "/foo/", @@ -132,7 +133,7 @@ def _test_index_overrides(env): }, index_url = "https://main.com", extra_index_urls = [], - sources = {"bar": None, "baz": None, "foo": None}, + sources = {"ba_z": None, "bar": None, "foo": None}, envsubst = [], ), cache = pypi_cache(), @@ -144,22 +145,22 @@ def _test_index_overrides(env): env.expect.that_collection(fails).contains_exactly([]) env.expect.that_collection(calls).contains_exactly([ "https://main.com/bar/", - "https://main.com/baz/", + "https://main.com/ba-z/", "https://extra.com/foo/", ]) env.expect.that_dict(contents).contains_exactly({ + "ba_z": struct( + index_url = "https://main.com/ba-z/", + sdists = {"deadbeef": "ba-z"}, + sha256s_by_version = {"fizz": "ba-z"}, + whls = {"deadb33f": "ba-z"}, + ), "bar": struct( index_url = "https://main.com/bar/", sdists = {"deadbeef": "bar"}, sha256s_by_version = {"fizz": "bar"}, whls = {"deadb33f": "bar"}, ), - "baz": struct( - index_url = "https://main.com/baz/", - sdists = {"deadbeef": "baz"}, - sha256s_by_version = {"fizz": "baz"}, - whls = {"deadb33f": "baz"}, - ), "foo": struct( index_url = "https://extra.com/foo/", sdists = {"deadbeef": "foo"}, From 26e56bb3bfa661594f7ea0413fd62feb0c13a142 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sun, 22 Mar 2026 01:01:06 +0900 Subject: [PATCH 11/12] fix more tests --- tests/pypi/hub_builder/hub_builder_tests.bzl | 33 ++++++++++++++------ 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/tests/pypi/hub_builder/hub_builder_tests.bzl b/tests/pypi/hub_builder/hub_builder_tests.bzl index 637c7881c2..31a41f6af5 100644 --- a/tests/pypi/hub_builder/hub_builder_tests.bzl +++ b/tests/pypi/hub_builder/hub_builder_tests.bzl @@ -247,12 +247,19 @@ def _test_simple_extras_vs_no_extras(env): _tests.append(_test_simple_extras_vs_no_extras) def _test_simple_extras_vs_no_extras_simpleapi(env): - def mockread_simpleapi(*_, **__): + def mockread_simpleapi(*_, parse_index, **__): + if parse_index: + content = """\ + simple-0.0.1-py3-none-any.whl
+""" return struct( output = parse_simpleapi_html( - content = """\ - simple-0.0.1-py3-none-any.whl
-""", + content = content, + parse_index = parse_index, ), success = True, ) @@ -489,10 +496,13 @@ def _test_simple_with_markers(env): _tests.append(_test_simple_with_markers) def _test_torch_experimental_index_url(env): - def mockread_simpleapi(*_, **__): - return struct( - output = parse_simpleapi_html( - content = """\ + def mockread_simpleapi(*_, parse_index, **__): + if parse_index: + content = """\ + torch +""" + else: + content = """\ torch-2.4.1+cpu-cp310-cp310-linux_x86_64.whl
torch-2.4.1+cpu-cp310-cp310-win_amd64.whl
torch-2.4.1+cpu-cp311-cp311-linux_x86_64.whl
@@ -513,7 +523,12 @@ def _test_torch_experimental_index_url(env): torch-2.4.1-cp38-none-macosx_11_0_arm64.whl
torch-2.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
torch-2.4.1-cp39-none-macosx_11_0_arm64.whl
-""", +""" + + return struct( + output = parse_simpleapi_html( + content = content, + parse_index = parse_index, ), success = True, ) From 0f493abbce261a335f0c6590298fa6e87e7e7122 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Sun, 22 Mar 2026 01:11:41 +0900 Subject: [PATCH 12/12] doc: self review --- CHANGELOG.md | 9 +++++++-- python/private/pypi/BUILD.bazel | 3 +-- python/private/pypi/pypi_cache.bzl | 3 +++ python/private/pypi/simpleapi_download.bzl | 3 ++- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 18be4def9c..ec9467e7a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -67,8 +67,13 @@ END_UNRELEASED_TEMPLATE Other changes: * (pypi) Update dependencies used for `compile_pip_requirements`, building sdists in the `whl_library` rule and fetching wheels using `pip`. -* (pypi) We will set `allow_fail` to `False` if the {attr}`experimental_index_url_overrides` is set - to a non-empty value. This means that failures will be no-longer cached in this particular case. +* (pypi) Before using the bazel downloader to fetch the PyPI package metadata + we will from now on fetch the lists of available packages on each index. The + used package mappings will be written as facts to the `MODULE.bazel.lock` file + on supported bazel versions and it should be done at most once. As a result, + per-package {obj}`experimental_index_url_overrides` is no longer needed if the index URLs are + passed to the `pip.parse` via `experimental_index_url` and `experimental_extra_index_urls`. + Fixes ([#3260](https://github.com/bazel-contrib/rules_python/issues/3260) and [#2632](https://github.com/bazel-contrib/rules_python/issues/2632)) diff --git a/python/private/pypi/BUILD.bazel b/python/private/pypi/BUILD.bazel index 6b4822333c..869be4705a 100644 --- a/python/private/pypi/BUILD.bazel +++ b/python/private/pypi/BUILD.bazel @@ -244,6 +244,7 @@ bzl_library( srcs = ["parse_simpleapi_html.bzl"], deps = [ ":version_from_filename_bzl", + "//python/private:normalize_name_bzl", ], ) @@ -424,8 +425,6 @@ bzl_library( ":urllib_bzl", "//python/private:auth_bzl", "//python/private:normalize_name_bzl", - "//python/private:text_util_bzl", - "@bazel_features//:features", ], ) diff --git a/python/private/pypi/pypi_cache.bzl b/python/private/pypi/pypi_cache.bzl index 2d2418c6ae..7b24102263 100644 --- a/python/private/pypi/pypi_cache.bzl +++ b/python/private/pypi/pypi_cache.bzl @@ -90,6 +90,9 @@ def _pypi_cache_get(self, key): # Could not get from in-memory, read from lockfile facts cached = self._facts.get(index_url, versions) else: + # We might be using something from memory that is not yet stored in facts (e.g. we processed + # the requirements.txt for one Python version and the deps got cached, but new python + # version means different deps, which may add extras. self._facts.setdefault(index_url, cached) return cached diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl index e559196638..3551f18a0c 100644 --- a/python/private/pypi/simpleapi_download.bzl +++ b/python/private/pypi/simpleapi_download.bzl @@ -191,7 +191,8 @@ def _read_simpleapi(ctx, url, attr, cache, versions, parse_index, get_auth = Non cache: {type}`struct` the `pypi_cache` instance. versions: {type}`list[str] The versions that have been requested. get_auth: A function to get auth information. Used in tests. - parse_index: TODO + parse_index: {type}`bool` Whether to parse the content as a root index page + (e.g. `/simple/`) instead of a package-specific page. **download_kwargs: Any extra params to ctx.download. Note that output and auth will be passed for you.