diff --git a/python/private/pypi/BUILD.bazel b/python/private/pypi/BUILD.bazel index 48a1837f36..ba667a2c4d 100644 --- a/python/private/pypi/BUILD.bazel +++ b/python/private/pypi/BUILD.bazel @@ -123,6 +123,7 @@ bzl_library( ":pep508_env_bzl", ":pip_repository_attrs_bzl", ":platform_bzl", + ":pypi_cache_bzl", ":simpleapi_download_bzl", ":whl_library_bzl", "//python/private:auth_bzl", @@ -355,6 +356,11 @@ bzl_library( srcs = ["platform.bzl"], ) +bzl_library( + name = "pypi_cache_bzl", + srcs = ["pypi_cache.bzl"], +) + bzl_library( name = "pypi_repo_utils_bzl", srcs = ["pypi_repo_utils.bzl"], diff --git a/python/private/pypi/extension.bzl b/python/private/pypi/extension.bzl index 1ec9142bbb..5fded728bf 100644 --- a/python/private/pypi/extension.bzl +++ b/python/private/pypi/extension.bzl @@ -27,6 +27,7 @@ load(":parse_whl_name.bzl", "parse_whl_name") load(":pep508_env.bzl", "env") load(":pip_repository_attrs.bzl", "ATTRS") load(":platform.bzl", _plat = "platform") +load(":pypi_cache.bzl", "pypi_cache") load(":simpleapi_download.bzl", "simpleapi_download") load(":whl_library.bzl", "whl_library") @@ -224,7 +225,7 @@ You cannot use both the additive_build_content and additive_build_content_file a # dict[str repo, HubBuilder] # See `hub_builder.bzl%hub_builder()` for `HubBuilder` pip_hub_map = {} - simpleapi_cache = {} + simpleapi_cache = pypi_cache() for mod in module_ctx.modules: for pip_attr in mod.tags.parse: diff --git a/python/private/pypi/hub_builder.bzl b/python/private/pypi/hub_builder.bzl index f0aa6a73bc..bf849c3f83 100644 --- a/python/private/pypi/hub_builder.bzl +++ b/python/private/pypi/hub_builder.bzl @@ -31,7 +31,7 @@ def hub_builder( simpleapi_download_fn, evaluate_markers_fn, logger, - simpleapi_cache = {}): + simpleapi_cache): """Return a hub builder instance Args: diff --git a/python/private/pypi/pypi_cache.bzl b/python/private/pypi/pypi_cache.bzl new file mode 100644 index 0000000000..a83f96bffd --- /dev/null +++ b/python/private/pypi/pypi_cache.bzl @@ -0,0 +1,47 @@ +"""A cache for the PyPI index contents evaluation. + +This is design to work as the following: +- in-memory cache for results of PyPI index queries, so that we are not calling PyPI multiple times + for the same package for different hub repos. + +In the future the same will be used to: +- Store PyPI index query results as facts in the MODULE.bazel.lock file +""" + +def pypi_cache(store = None): + """The cache for PyPI index queries.""" + + # buildifier: disable=uninitialized + self = struct( + _store = store or {}, + setdefault = lambda key, parsed_result: _pypi_cache_setdefault(self, key, parsed_result), + get = lambda key: _pypi_cache_get(self, key), + ) + + # buildifier: enable=uninitialized + return self + +def _pypi_cache_setdefault(self, key, parsed_result): + """Store the value if not yet cached. + + Args: + self: {type}`struct` The self of this implementation. + key: {type}`str` The cache key, can be any string. + parsed_result: {type}`struct` The result of `parse_simpleapi_html` function. + + Returns: + The `parse_result`. + """ + return self._store.setdefault(key, parsed_result) + +def _pypi_cache_get(self, key): + """Return the parsed result from the cache. + + Args: + self: {type}`struct` The self of this implementation. + key: {type}`str` The cache key, can be any string. + + Returns: + The {type}`struct` or `None` based on if the result is in the cache or not. + """ + return self._store.get(key) diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl index 52ff02a178..5cb338a8fd 100644 --- a/python/private/pypi/simpleapi_download.bzl +++ b/python/private/pypi/simpleapi_download.bzl @@ -49,14 +49,13 @@ def simpleapi_download( * netrc: The netrc parameter for ctx.download, see http_file for docs. * auth_patterns: The auth_patterns parameter for ctx.download, see http_file for docs. - cache: A dictionary that can be used as a cache between calls during a - single evaluation of the extension. We use a dictionary as a cache - so that we can reuse calls to the simple API when evaluating the - extension. Using the canonical_id parameter of the module_ctx would - deposit the simple API responses to the bazel cache and that is - undesirable because additions to the PyPI index would not be - reflected when re-evaluating the extension unless we do - `bazel clean --expunge`. + cache: An opaque object used to cache call results. For implementation + see ./pypi_cache.bzl file. We use the canonical_id parameter for the key + value to ensure that distribution fetches from different indexes do not cause + cache collisions, because the index may return different locations from where + the files should be downloaded. We are not using the built-in cache in the + `download` function because the index may get updated at any time and we need + to be able to refresh the data. parallel_download: A boolean to enable usage of bazel 7.1 non-blocking downloads. read_simpleapi: a function for reading and parsing of the SimpleAPI contents. Used in tests. @@ -197,8 +196,9 @@ def _read_simpleapi(ctx, url, attr, cache, get_auth = None, **download_kwargs): )) cache_key = real_url - if cache_key in cache: - return struct(success = True, output = cache[cache_key]) + cached_result = cache.get(cache_key) + if cached_result: + return struct(success = True, output = cached_result) output_str = envsubst( url, diff --git a/tests/pypi/hub_builder/hub_builder_tests.bzl b/tests/pypi/hub_builder/hub_builder_tests.bzl index 03cefd13c5..c2809c11cb 100644 --- a/tests/pypi/hub_builder/hub_builder_tests.bzl +++ b/tests/pypi/hub_builder/hub_builder_tests.bzl @@ -99,6 +99,7 @@ def hub_builder( "unit-test", printer = log_printer, ), + simpleapi_cache = {}, ) self = struct( build = lambda: env.expect.that_struct( diff --git a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl index 8dc307235a..616c6c087f 100644 --- a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl +++ b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl @@ -15,6 +15,7 @@ "" load("@rules_testing//lib:test_suite.bzl", "test_suite") +load("//python/private/pypi:pypi_cache.bzl", "pypi_cache") # buildifier: disable=bzl-visibility load("//python/private/pypi:simpleapi_download.bzl", "simpleapi_download", "strip_empty_path_segments") # buildifier: disable=bzl-visibility _tests = [] @@ -52,7 +53,7 @@ def _test_simple(env): sources = ["foo", "bar", "baz"], envsubst = [], ), - cache = {}, + cache = pypi_cache(), parallel_download = True, read_simpleapi = read_simpleapi, ) @@ -112,7 +113,7 @@ def _test_fail(env): sources = ["foo", "bar", "baz"], envsubst = [], ), - cache = {}, + cache = pypi_cache(), parallel_download = True, read_simpleapi = read_simpleapi, _fail = fails.append, @@ -165,7 +166,7 @@ def _test_download_url(env): sources = ["foo", "bar", "baz"], envsubst = [], ), - cache = {}, + cache = pypi_cache(), parallel_download = False, get_auth = lambda ctx, urls, ctx_attr: struct(), ) @@ -201,7 +202,7 @@ def _test_download_url_parallel(env): sources = ["foo", "bar", "baz"], envsubst = [], ), - cache = {}, + cache = pypi_cache(), parallel_download = True, get_auth = lambda ctx, urls, ctx_attr: struct(), ) @@ -237,7 +238,7 @@ def _test_download_envsubst_url(env): sources = ["foo", "bar", "baz"], envsubst = ["INDEX_URL"], ), - cache = {}, + cache = pypi_cache(), parallel_download = False, get_auth = lambda ctx, urls, ctx_attr: struct(), )