Skip to content

Commit

Permalink
feat: add py_image_layer (#402)
Browse files Browse the repository at this point in the history
Replaces #349

### Changes are visible to end-users: yes

- Searched for relevant documentation and updated as needed: yes
- Breaking change (forces users to change their own code or config): no
- Suggested release notes appear below: yes

Add `py_image_layer` macro for creating py container images.

### Test plan

I will add a test in a follow-up.
  • Loading branch information
thesayyn authored Oct 4, 2024
1 parent fb114ab commit c155afa
Show file tree
Hide file tree
Showing 9 changed files with 264 additions and 6 deletions.
2 changes: 1 addition & 1 deletion MODULE.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ module(

# Lower-bound versions of direct dependencies.
# When bumping, add a comment explaining what's required from the newer release.
bazel_dep(name = "aspect_bazel_lib", version = "1.40.0")
bazel_dep(name = "aspect_bazel_lib", version = "2.9.1") # py_image_layer requires 2.x for the `tar` rule.
bazel_dep(name = "bazel_skylib", version = "1.4.2")
bazel_dep(name = "rules_python", version = "0.29.0")
bazel_dep(name = "platforms", version = "0.0.7")
Expand Down
5 changes: 5 additions & 0 deletions docs/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ stardoc_with_diff_test(
bzl_library_target = "//py/private:py_pex_binary",
)

stardoc_with_diff_test(
name = "py_image_layer",
bzl_library_target = "//py/private:py_image_layer",
)

stardoc_with_diff_test(
name = "venv",
bzl_library_target = "//py/private:py_venv",
Expand Down
81 changes: 81 additions & 0 deletions docs/py_image_layer.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions py/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ bzl_library(
"//py/private:py_wheel",
"//py/private:virtual",
"//py/private:py_pex_binary",
"//py/private:py_image_layer",
"@aspect_bazel_lib//lib:utils",
],
)
5 changes: 4 additions & 1 deletion py/defs.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,13 @@ python.toolchain(python_version = "3.9", is_default = True)
load("@aspect_bazel_lib//lib:utils.bzl", "propagate_common_rule_attributes")
load("//py/private:py_binary.bzl", _py_binary = "py_binary", _py_test = "py_test")
load("//py/private:py_executable.bzl", "determine_main")
load("//py/private:py_image_layer.bzl", _py_image_layer = "py_image_layer")
load("//py/private:py_library.bzl", _py_library = "py_library")
load("//py/private:py_pex_binary.bzl", _py_pex_binary = "py_pex_binary")
load("//py/private:py_pytest_main.bzl", _py_pytest_main = "py_pytest_main")
load("//py/private:py_unpacked_wheel.bzl", _py_unpacked_wheel = "py_unpacked_wheel")
load("//py/private:virtual.bzl", _resolutions = "resolutions")
load("//py/private:py_venv.bzl", _py_venv = "py_venv")
load("//py/private:virtual.bzl", _resolutions = "resolutions")

py_pex_binary = _py_pex_binary
py_pytest_main = _py_pytest_main
Expand All @@ -54,6 +55,8 @@ py_test_rule = _py_test
py_library = _py_library
py_unpacked_wheel = _py_unpacked_wheel

py_image_layer = _py_image_layer

resolutions = _resolutions

def _py_binary_or_test(name, rule, srcs, main, deps = [], resolutions = {}, **kwargs):
Expand Down
8 changes: 8 additions & 0 deletions py/private/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,14 @@ exports_files(
visibility = ["//docs:__pkg__"],
)

bzl_library(
name = "py_image_layer",
srcs = ["py_image_layer.bzl"],
deps = [
"@aspect_bazel_lib//lib:tar",
],
)

bzl_library(
name = "py_binary",
srcs = ["py_binary.bzl"],
Expand Down
156 changes: 156 additions & 0 deletions py/private/py_image_layer.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
"""py_image_layer macro for creating multiple layers from a py_binary
> [!WARNING]
> This macro is EXPERIMENTAL and is not subject to our SemVer guarantees.
A py_binary that uses `torch` and `numpy` can use the following layer groups:
```
load("@rules_oci//oci:defs.bzl", "oci_image")
load("@aspect_rules_py//py:defs.bzl", "py_image_layer", "py_binary")
py_binary(
name = "my_app_bin",
deps = [
"@pip_deps//numpy",
"@pip_deps//torch"
]
)
oci_image(
tars = py_image_layer(
name = "my_app",
py_binary = ":my_app_bin",
layer_groups = {
"torch": "pip_deps_torch.*",
"numpy": "pip_deps_numpy.*",
}
)
)
```
"""

load("@aspect_bazel_lib//lib:tar.bzl", "mtree_spec", "tar")

default_layer_groups = {
# match *only* external pip like repositories that contain the string "site-packages"
"packages": "\\.runfiles/.*/site-packages",
# match *only* external repositories that begins with the string "python"
# e.g. this will match
# `/hello_world/hello_world_bin.runfiles/rules_python~0.21.0~python~python3_9_aarch64-unknown-linux-gnu/bin/python3`
# but not match
# `/hello_world/hello_world_bin.runfiles/_main/python_app`
"interpreter": "\\.runfiles/python.*-.*/",
}

def _split_mtree_into_layer_groups(name, root, groups, group_names, **kwargs):
mtree_begin_blocks = "\n".join([
'print "#mtree" >> "$(RULEDIR)/%s.%s.manifest.spec";' % (name, gn)
for gn in group_names
])

# When an mtree entry matches a layer group, it will be moved into the mtree
# for that group.
ifs = "\n".join([
"""\
if ($$1 ~ "%s") {
print $$0 >> "$(RULEDIR)/%s.%s.manifest.spec";
next
}""" % (regex, name, gn)
for (gn, regex) in groups.items()
])

cmd = """\
awk < $< 'BEGIN {
%s
}
{
# Exclude .whl files from container images
if ($$1 ~ ".whl") {
next
}
# Move everything under the specified root
sub(/^/, ".%s")
# Match by regexes and write to the destination.
%s
# Every line that did not match the layer groups will go into the default layer.
print $$0 >> "$(RULEDIR)/%s.default.manifest.spec"
}'
""" % (mtree_begin_blocks, root, ifs, name)

native.genrule(
name = "_{}_manifests".format(name),
srcs = [name + ".manifest"],
outs = [
"{}.{}.manifest.spec".format(name, group_name)
for group_name in group_names
],
cmd = cmd,
**kwargs
)


def py_image_layer(name, py_binary, root = None, layer_groups = {}, compress = "gzip", tar_args = ["--options", "gzip:!timestamp"], **kwargs):
"""Produce a separate tar output for each layer of a python app
> Requires `awk` to be installed on the host machine/rbe runner.
For better performance, it is recommended to split the output of a py_binary into multiple layers.
This can be done by grouping files into layers based on their path by using the `layer_groups` attribute.
The matching order for layer groups is as follows:
1. `layer_groups` are checked first.
2. If no match is found for `layer_groups`, the `default layer groups` are checked.
3. Any remaining files are placed into the default layer.
The default layer groups are:
```
{
"packages": "\\.runfiles/.*/site-packages",, # contains third-party deps
"interpreter": "\\.runfiles/python.*-.*/", # contains the python interpreter
}
```
Args:
name: base name for targets
py_binary: a py_binary target
root: Path to where the layers should be rooted. If not specified, the layers will be rooted at the workspace root.
layer_groups: Additional layer groups to create. They are used to group files into layers based on their path. In the form of: ```{"<name>": "regex_to_match_against_file_paths"}```
compress: Compression algorithm to use. Default is gzip. See: https://github.com/bazel-contrib/bazel-lib/blob/main/docs/tar.md#tar_rule
tar_args: Additional arguments to pass to the tar rule. Default is `["--options", "gzip:!timestamp"]`. See: https://github.com/bazel-contrib/bazel-lib/blob/main/docs/tar.md#tar_rule
**kwargs: attribute that apply to all targets expanded by the macro
Returns:
A list of labels for each layer.
"""
if root != None and not root.startswith("/"):
fail("root path must start with '/' but got '{root}', expected '/{root}'".format(root = root))

# Produce the manifest for a tar file of our py_binary, but don't tar it up yet, so we can split
# into fine-grained layers for better pull, push and remote cache performance.
mtree_spec(
name = name + ".manifest",
srcs = [py_binary],
**kwargs
)

groups = dict(**layer_groups)
group_names = groups.keys() + ["default"]

_split_mtree_into_layer_groups(name, root, groups, group_names, **kwargs)

# Finally create layers using the tar rule
result = []
for group_name in group_names:
tar_target = "_{}_{}".format(name, group_name)
tar(
name = tar_target,
srcs = [py_binary],
mtree = "{}.{}.manifest.spec".format(name, group_name),
compress = compress,
args = tar_args,
**kwargs
)
result.append(tar_target)

return result
7 changes: 4 additions & 3 deletions py/repositories.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,12 @@ def rules_py_dependencies():
url = "https://github.com/bazelbuild/bazel-skylib/archive/refs/tags/1.5.0.tar.gz",
)

# py_image_layer requires 2.x for the `tar` rule.
http_archive(
name = "aspect_bazel_lib",
sha256 = "6e6f8ac3c601d6df25810cd51e51d85831e3437e873b152c5c4ecd3b96964bc8",
strip_prefix = "bazel-lib-1.42.3",
url = "https://github.com/aspect-build/bazel-lib/archive/refs/tags/v1.42.3.tar.gz",
sha256 = "f93d386d8d0b0149031175e81df42a488be4267c3ca2249ba5321c23c60bc1f0",
strip_prefix = "bazel-lib-2.9.1",
url = "https://github.com/bazel-contrib/bazel-lib/releases/download/v2.9.1/bazel-lib-v2.9.1.tar.gz",
)

http_archive(
Expand Down
5 changes: 4 additions & 1 deletion py/toolchains.bzl
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
"""Declare toolchains"""

load("@aspect_bazel_lib//lib:repositories.bzl", "register_tar_toolchains")
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_file")
load("//py/private/toolchain:autodetecting.bzl", _register_autodetecting_python_toolchain = "register_autodetecting_python_toolchain")
load("//py/private/toolchain:repo.bzl", "prerelease_toolchains_repo", "toolchains_repo")
load("//py/private/toolchain:tools.bzl", "TOOLCHAIN_PLATFORMS", "prebuilt_tool_repo")
load("//tools:version.bzl", "IS_PRERELEASE")


register_autodetecting_python_toolchain = _register_autodetecting_python_toolchain

DEFAULT_TOOLS_REPOSITORY = "rules_py_tools"
Expand All @@ -19,6 +19,9 @@ def rules_py_toolchains(name = DEFAULT_TOOLS_REPOSITORY, register = True, is_pre
register: whether to call the register_toolchains, should be True for WORKSPACE and False for bzlmod.
is_prerelease: True iff there are no pre-built tool binaries for this version of rules_py
"""

register_tar_toolchains(register = register)

if is_prerelease:
prerelease_toolchains_repo(name = name)
if register:
Expand Down

0 comments on commit c155afa

Please sign in to comment.