From bec1251643434693752524f2d5353009cbd1ebb9 Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Fri, 27 Feb 2026 08:57:36 +0900 Subject: [PATCH 1/2] refactor(pypi): factor out a simple implementation of the PyPI cache We want to keep a `dict` like interface and later we would like to use the same interface to also do more things. I expect the cache key to become different in the future (i.e. include requested versions in it) so that we can check if we have the right versions in the MODULE.bazel.lock file or if we should actually call to PyPI. Work towards #2731 --- python/private/pypi/BUILD.bazel | 6 +++ python/private/pypi/extension.bzl | 3 +- python/private/pypi/pypi_cache.bzl | 45 ++++++++++++++++++++++ python/private/pypi/simpleapi_download.bzl | 20 +++++----- 4 files changed, 63 insertions(+), 11 deletions(-) create mode 100644 python/private/pypi/pypi_cache.bzl diff --git a/python/private/pypi/BUILD.bazel b/python/private/pypi/BUILD.bazel index 48a1837f36..ba667a2c4d 100644 --- a/python/private/pypi/BUILD.bazel +++ b/python/private/pypi/BUILD.bazel @@ -123,6 +123,7 @@ bzl_library( ":pep508_env_bzl", ":pip_repository_attrs_bzl", ":platform_bzl", + ":pypi_cache_bzl", ":simpleapi_download_bzl", ":whl_library_bzl", "//python/private:auth_bzl", @@ -355,6 +356,11 @@ bzl_library( srcs = ["platform.bzl"], ) +bzl_library( + name = "pypi_cache_bzl", + srcs = ["pypi_cache.bzl"], +) + bzl_library( name = "pypi_repo_utils_bzl", srcs = ["pypi_repo_utils.bzl"], diff --git a/python/private/pypi/extension.bzl b/python/private/pypi/extension.bzl index 1ec9142bbb..5fded728bf 100644 --- a/python/private/pypi/extension.bzl +++ b/python/private/pypi/extension.bzl @@ -27,6 +27,7 @@ load(":parse_whl_name.bzl", "parse_whl_name") load(":pep508_env.bzl", "env") load(":pip_repository_attrs.bzl", "ATTRS") load(":platform.bzl", _plat = "platform") +load(":pypi_cache.bzl", "pypi_cache") load(":simpleapi_download.bzl", "simpleapi_download") load(":whl_library.bzl", "whl_library") @@ -224,7 +225,7 @@ You cannot use both the additive_build_content and additive_build_content_file a # dict[str repo, HubBuilder] # See `hub_builder.bzl%hub_builder()` for `HubBuilder` pip_hub_map = {} - simpleapi_cache = {} + simpleapi_cache = pypi_cache() for mod in module_ctx.modules: for pip_attr in mod.tags.parse: diff --git a/python/private/pypi/pypi_cache.bzl b/python/private/pypi/pypi_cache.bzl new file mode 100644 index 0000000000..09c986af73 --- /dev/null +++ b/python/private/pypi/pypi_cache.bzl @@ -0,0 +1,45 @@ +"""A cache for the PyPI index contents evaluation. + +This is design to work as the following: +- in-memory cache for results of PyPI index queries, so that we are not calling PyPI multiple times + for the same package for different hub repos. + +In the future the same will be used to: +- Store PyPI index query results as facts in the MODULE.bazel.lock file +""" + +def pypi_cache(): + """The cache for PyPI index queries.""" + self = struct( + store = {}, + ) + + return struct( + setdefault = lambda key, parsed_result: _pypi_cache_setdefault(self, key, parsed_result), + get = lambda key: _pypi_cache_get(self, key), + ) + +def _pypi_cache_setdefault(self, key, parsed_result): + """Store the value if not yet cached. + + Args: + self: {type}`struct` The self of this implementation. + key: {type}`str` The cache key, can be any string. + parsed_result: {type}`struct` The result of `parse_simpleapi_html` function. + + Returns: + The `parse_result`. + """ + return self.store.setdefault(key, parsed_result) + +def _pypi_cache_get(self, key): + """Return the parsed result from the cache. + + Args: + self: {type}`struct` The self of this implementation. + key: {type}`str` The cache key, can be any string. + + Returns: + The {type}`struct` or `None` based on if the result is in the cache or not. + """ + return self.store.get(key) diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl index 52ff02a178..5cb338a8fd 100644 --- a/python/private/pypi/simpleapi_download.bzl +++ b/python/private/pypi/simpleapi_download.bzl @@ -49,14 +49,13 @@ def simpleapi_download( * netrc: The netrc parameter for ctx.download, see http_file for docs. * auth_patterns: The auth_patterns parameter for ctx.download, see http_file for docs. - cache: A dictionary that can be used as a cache between calls during a - single evaluation of the extension. We use a dictionary as a cache - so that we can reuse calls to the simple API when evaluating the - extension. Using the canonical_id parameter of the module_ctx would - deposit the simple API responses to the bazel cache and that is - undesirable because additions to the PyPI index would not be - reflected when re-evaluating the extension unless we do - `bazel clean --expunge`. + cache: An opaque object used to cache call results. For implementation + see ./pypi_cache.bzl file. We use the canonical_id parameter for the key + value to ensure that distribution fetches from different indexes do not cause + cache collisions, because the index may return different locations from where + the files should be downloaded. We are not using the built-in cache in the + `download` function because the index may get updated at any time and we need + to be able to refresh the data. parallel_download: A boolean to enable usage of bazel 7.1 non-blocking downloads. read_simpleapi: a function for reading and parsing of the SimpleAPI contents. Used in tests. @@ -197,8 +196,9 @@ def _read_simpleapi(ctx, url, attr, cache, get_auth = None, **download_kwargs): )) cache_key = real_url - if cache_key in cache: - return struct(success = True, output = cache[cache_key]) + cached_result = cache.get(cache_key) + if cached_result: + return struct(success = True, output = cached_result) output_str = envsubst( url, From 25200ad7987d0ae6f85a5d5debeabf4f4f95216e Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Fri, 27 Feb 2026 15:33:53 +0900 Subject: [PATCH 2/2] update more tests and address comments --- python/private/pypi/hub_builder.bzl | 2 +- python/private/pypi/pypi_cache.bzl | 16 +++++++++------- tests/pypi/hub_builder/hub_builder_tests.bzl | 1 + .../simpleapi_download_tests.bzl | 11 ++++++----- 4 files changed, 17 insertions(+), 13 deletions(-) diff --git a/python/private/pypi/hub_builder.bzl b/python/private/pypi/hub_builder.bzl index f0aa6a73bc..bf849c3f83 100644 --- a/python/private/pypi/hub_builder.bzl +++ b/python/private/pypi/hub_builder.bzl @@ -31,7 +31,7 @@ def hub_builder( simpleapi_download_fn, evaluate_markers_fn, logger, - simpleapi_cache = {}): + simpleapi_cache): """Return a hub builder instance Args: diff --git a/python/private/pypi/pypi_cache.bzl b/python/private/pypi/pypi_cache.bzl index 09c986af73..a83f96bffd 100644 --- a/python/private/pypi/pypi_cache.bzl +++ b/python/private/pypi/pypi_cache.bzl @@ -8,17 +8,19 @@ In the future the same will be used to: - Store PyPI index query results as facts in the MODULE.bazel.lock file """ -def pypi_cache(): +def pypi_cache(store = None): """The cache for PyPI index queries.""" - self = struct( - store = {}, - ) - return struct( + # buildifier: disable=uninitialized + self = struct( + _store = store or {}, setdefault = lambda key, parsed_result: _pypi_cache_setdefault(self, key, parsed_result), get = lambda key: _pypi_cache_get(self, key), ) + # buildifier: enable=uninitialized + return self + def _pypi_cache_setdefault(self, key, parsed_result): """Store the value if not yet cached. @@ -30,7 +32,7 @@ def _pypi_cache_setdefault(self, key, parsed_result): Returns: The `parse_result`. """ - return self.store.setdefault(key, parsed_result) + return self._store.setdefault(key, parsed_result) def _pypi_cache_get(self, key): """Return the parsed result from the cache. @@ -42,4 +44,4 @@ def _pypi_cache_get(self, key): Returns: The {type}`struct` or `None` based on if the result is in the cache or not. """ - return self.store.get(key) + return self._store.get(key) diff --git a/tests/pypi/hub_builder/hub_builder_tests.bzl b/tests/pypi/hub_builder/hub_builder_tests.bzl index 03cefd13c5..c2809c11cb 100644 --- a/tests/pypi/hub_builder/hub_builder_tests.bzl +++ b/tests/pypi/hub_builder/hub_builder_tests.bzl @@ -99,6 +99,7 @@ def hub_builder( "unit-test", printer = log_printer, ), + simpleapi_cache = {}, ) self = struct( build = lambda: env.expect.that_struct( diff --git a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl index 8dc307235a..616c6c087f 100644 --- a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl +++ b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl @@ -15,6 +15,7 @@ "" load("@rules_testing//lib:test_suite.bzl", "test_suite") +load("//python/private/pypi:pypi_cache.bzl", "pypi_cache") # buildifier: disable=bzl-visibility load("//python/private/pypi:simpleapi_download.bzl", "simpleapi_download", "strip_empty_path_segments") # buildifier: disable=bzl-visibility _tests = [] @@ -52,7 +53,7 @@ def _test_simple(env): sources = ["foo", "bar", "baz"], envsubst = [], ), - cache = {}, + cache = pypi_cache(), parallel_download = True, read_simpleapi = read_simpleapi, ) @@ -112,7 +113,7 @@ def _test_fail(env): sources = ["foo", "bar", "baz"], envsubst = [], ), - cache = {}, + cache = pypi_cache(), parallel_download = True, read_simpleapi = read_simpleapi, _fail = fails.append, @@ -165,7 +166,7 @@ def _test_download_url(env): sources = ["foo", "bar", "baz"], envsubst = [], ), - cache = {}, + cache = pypi_cache(), parallel_download = False, get_auth = lambda ctx, urls, ctx_attr: struct(), ) @@ -201,7 +202,7 @@ def _test_download_url_parallel(env): sources = ["foo", "bar", "baz"], envsubst = [], ), - cache = {}, + cache = pypi_cache(), parallel_download = True, get_auth = lambda ctx, urls, ctx_attr: struct(), ) @@ -237,7 +238,7 @@ def _test_download_envsubst_url(env): sources = ["foo", "bar", "baz"], envsubst = ["INDEX_URL"], ), - cache = {}, + cache = pypi_cache(), parallel_download = False, get_auth = lambda ctx, urls, ctx_attr: struct(), )