From 7a9e4b26beb807e2cb606be170683e22627d6dc7 Mon Sep 17 00:00:00 2001 From: Sahin Yort Date: Wed, 4 Sep 2024 12:39:35 -0700 Subject: [PATCH] feat: implement --build_python_zip pex (#324) ### Type of change - New feature or functionality (#236) ### Test plan - New test cases added --------- Co-authored-by: Matt Mackay --- MODULE.bazel | 1 + docs/rules.md | 22 ++++ examples/py_pex_binary/BUILD.bazel | 22 ++++ examples/py_pex_binary/data.txt | 1 + examples/py_pex_binary/say.py | 28 ++++ py/BUILD.bazel | 1 + py/defs.bzl | 2 + py/private/BUILD.bazel | 10 ++ py/private/py_pex_binary.bzl | 159 +++++++++++++++++++++++ py/private/run.tmpl.sh | 5 +- py/toolchains.bzl | 9 ++ py/tools/pex/BUILD.bazel | 15 +++ py/tools/pex/main.py | 201 +++++++++++++++++++++++++++++ 13 files changed, 475 insertions(+), 1 deletion(-) create mode 100644 examples/py_pex_binary/BUILD.bazel create mode 100644 examples/py_pex_binary/data.txt create mode 100644 examples/py_pex_binary/say.py create mode 100644 py/private/py_pex_binary.bzl create mode 100644 py/tools/pex/BUILD.bazel create mode 100644 py/tools/pex/main.py diff --git a/MODULE.bazel b/MODULE.bazel index 6437df6f..453cbd2a 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -23,6 +23,7 @@ python.toolchain( tools = use_extension("//py:extensions.bzl", "py_tools") tools.rules_py_tools() use_repo(tools, "rules_py_tools") +use_repo(tools, "rules_py_pex_2_3_1") register_toolchains( "@rules_py_tools//:all", diff --git a/docs/rules.md b/docs/rules.md index 4eea2bdc..9078fd76 100644 --- a/docs/rules.md +++ b/docs/rules.md @@ -54,6 +54,28 @@ py_library(name, data< | virtual_deps | - | List of strings | optional | [] | + + +## py_pex_binary + +
+py_pex_binary(name, binary, inject_env, python_interpreter_constraints, python_shebang)
+
+ +Build a pex executable from a py_binary + +**ATTRIBUTES** + + +| Name | Description | Type | Mandatory | Default | +| :------------- | :------------- | :------------- | :------------- | :------------- | +| name | A unique name for this target. | Name | required | | +| binary | A py_binary target | Label | required | | +| inject_env | Environment variables to set when running the pex binary. | Dictionary: String -> String | optional | {} | +| python_interpreter_constraints | Python interpreter versions this PEX binary is compatible with. A list of semver strings. The placeholder strings {major}, {minor}, {patch} can be used for gathering version information from the hermetic python toolchain.

For example, to enforce same interpreter version that Bazel uses, following can be used.

starlark py_pex_binary     python_interpreter_constraints = [       "CPython=={major}.{minor}.{patch}"     ] ) 
| List of strings | optional | ["CPython=={major}.{minor}.*"] | +| python_shebang | - | String | optional | "#!/usr/bin/env python3" | + + ## py_test_rule diff --git a/examples/py_pex_binary/BUILD.bazel b/examples/py_pex_binary/BUILD.bazel new file mode 100644 index 00000000..4be6474a --- /dev/null +++ b/examples/py_pex_binary/BUILD.bazel @@ -0,0 +1,22 @@ +load("//py:defs.bzl", "py_binary", "py_pex_binary") + +py_binary( + name = "binary", + srcs = ["say.py"], + data = ["data.txt"], + env = { + "TEST": "1" + }, + deps = [ + "@pypi_cowsay//:pkg", + "@bazel_tools//tools/python/runfiles", + ], +) + +py_pex_binary( + name = "py_pex_binary", + binary = ":binary", + inject_env = { + "TEST": "1" + } +) diff --git a/examples/py_pex_binary/data.txt b/examples/py_pex_binary/data.txt new file mode 100644 index 00000000..44f77409 --- /dev/null +++ b/examples/py_pex_binary/data.txt @@ -0,0 +1 @@ +Mooo! \ No newline at end of file diff --git a/examples/py_pex_binary/say.py b/examples/py_pex_binary/say.py new file mode 100644 index 00000000..8c5a3956 --- /dev/null +++ b/examples/py_pex_binary/say.py @@ -0,0 +1,28 @@ +import cowsay +import sys +import os +from bazel_tools.tools.python.runfiles import runfiles + +print("sys.path entries:") +for p in sys.path: + print(" ", p) + +print("") +print("os.environ entries:") +print(" runfiles dir:", os.environ.get("RUNFILES_DIR")) +print(" injected env:", os.environ.get("TEST")) + +print("") +print("dir info: ") +print(" current dir:", os.curdir) +print(" current dir (absolute):", os.path.abspath(os.curdir)) + + +r = runfiles.Create() +data_path = r.Rlocation("aspect_rules_py/examples/py_pex_binary/data.txt") + +print("") +print("runfiles lookup:") +print(" data.txt:", data_path) + +cowsay.cow(open(data_path).read()) \ No newline at end of file diff --git a/py/BUILD.bazel b/py/BUILD.bazel index 0aab4cbe..f90c3b2a 100644 --- a/py/BUILD.bazel +++ b/py/BUILD.bazel @@ -37,6 +37,7 @@ bzl_library( "//py/private:py_venv", "//py/private:py_wheel", "//py/private:virtual", + "//py/private:py_pex_binary", "@aspect_bazel_lib//lib:utils", ], ) diff --git a/py/defs.bzl b/py/defs.bzl index a5e5a87e..6fabf75a 100644 --- a/py/defs.bzl +++ b/py/defs.bzl @@ -4,11 +4,13 @@ load("@aspect_bazel_lib//lib:utils.bzl", "propagate_common_rule_attributes") load("//py/private:py_binary.bzl", _py_binary = "py_binary", _py_test = "py_test") load("//py/private:py_executable.bzl", "determine_main") load("//py/private:py_library.bzl", _py_library = "py_library") +load("//py/private:py_pex_binary.bzl", _py_pex_binary = "py_pex_binary") load("//py/private:py_pytest_main.bzl", _py_pytest_main = "py_pytest_main") load("//py/private:py_unpacked_wheel.bzl", _py_unpacked_wheel = "py_unpacked_wheel") load("//py/private:virtual.bzl", _resolutions = "resolutions") load("//py/private:py_venv.bzl", _py_venv = "py_venv") +py_pex_binary = _py_pex_binary py_pytest_main = _py_pytest_main py_venv = _py_venv diff --git a/py/private/BUILD.bazel b/py/private/BUILD.bazel index 568af5a7..7cd39349 100644 --- a/py/private/BUILD.bazel +++ b/py/private/BUILD.bazel @@ -106,6 +106,16 @@ bzl_library( visibility = ["//py:__subpackages__"], ) +bzl_library( + name = "py_pex_binary", + srcs = ["py_pex_binary.bzl"], + visibility = ["//py:__subpackages__"], + deps = [ + ":py_semantics", + "//py/private/toolchain:types", + ], +) + bzl_library( name = "virtual", srcs = ["virtual.bzl"], diff --git a/py/private/py_pex_binary.bzl b/py/private/py_pex_binary.bzl new file mode 100644 index 00000000..faf80814 --- /dev/null +++ b/py/private/py_pex_binary.bzl @@ -0,0 +1,159 @@ +"Create python zip file https://peps.python.org/pep-0441/ (PEX)" + +load("@rules_python//python:defs.bzl", "PyInfo") +load("//py/private:py_semantics.bzl", _py_semantics = "semantics") +load("//py/private/toolchain:types.bzl", "PY_TOOLCHAIN") + +def _runfiles_path(file, workspace): + if file.short_path.startswith("../"): + return file.short_path[3:] + else: + return workspace + "/" + file.short_path + +exclude_paths = [ + # following two lines will match paths we want to exclude in non-bzlmod setup + "toolchain", + "aspect_rules_py/py/tools/", + # these will match in bzlmod setup + "rules_python~~python~", + "aspect_rules_py~/py/tools/", + # these will match in bzlmod setup with --incompatible_use_plus_in_repo_names flag flipped. + "rules_python++python+", + "aspect_rules_py+/py/tools/" +] + +# determines if the given file is a `distinfo`, `dep` or a `source` +# this required to allow PEX to put files into different places. +# +# --dep: into `/.deps/` +# --distinfo: is only used for determining package metadata +# --source: into `//` +def _map_srcs(f, workspace): + dest_path = _runfiles_path(f, workspace) + + # We exclude files from hermetic python toolchain. + for exclude in exclude_paths: + if dest_path.find(exclude) != -1: + return [] + + site_packages_i = f.path.find("site-packages") + + # if path contains `site-packages` and there is only two path segments + # after it, it will be treated as third party dep. + # Here are some examples of path we expect and use and ones we ignore. + # + # Match: `external/rules_python~~pip~pypi_39_rtoml/site-packages/rtoml-0.11.0.dist-info/INSTALLER` + # Reason: It has two `/` after first `site-packages` substring. + # + # No Match: `external/rules_python~~pip~pypi_39_rtoml/site-packages/rtoml-0.11.0/src/mod/parse.py` + # Reason: It has three `/` after first `site-packages` substring. + if site_packages_i != -1 and f.path.count("/", site_packages_i) == 2: + if f.path.find("dist-info", site_packages_i) != -1: + return ["--distinfo={}".format(f.dirname)] + return ["--dep={}".format(f.dirname)] + + # If the path does not have a `site-packages` in it, then put it into + # the standard runfiles tree. + elif site_packages_i == -1: + return ["--source={}={}".format(f.path, dest_path)] + + return [] + +def _py_python_pex_impl(ctx): + py_toolchain = _py_semantics.resolve_toolchain(ctx) + + binary = ctx.attr.binary + runfiles = binary[DefaultInfo].data_runfiles + + output = ctx.actions.declare_file(ctx.attr.name + ".pex") + + args = ctx.actions.args() + + # Copy workspace name here to prevent ctx + # being transferred to the execution phase. + workspace_name = str(ctx.workspace_name) + + args.add_all( + ctx.attr.inject_env.items(), + map_each = lambda e: "--inject-env={}={}".format(e[0], e[1]), + # this is needed to allow passing a lambda to map_each + allow_closure = True, + ) + + args.add_all( + binary[PyInfo].imports, + format_each = "--sys-path=%s" + ) + + args.add_all( + runfiles.files, + map_each = lambda f: _map_srcs(f, workspace_name), + uniquify = True, + # this is needed to allow passing a lambda (with workspace_name) to map_each + allow_closure = True, + ) + args.add(binary[DefaultInfo].files_to_run.executable, format = "--executable=%s") + args.add(ctx.attr.python_shebang, format = "--python-shebang=%s") + args.add(py_toolchain.python, format = "--python=%s") + + py_version = py_toolchain.interpreter_version_info + args.add_all( + [ + constraint.format(major = py_version.major, minor = py_version.minor, patch = py_version.micro) + for constraint in ctx.attr.python_interpreter_constraints + ], + format_each = "--python-version-constraint=%s" + ) + args.add(output, format = "--output-file=%s") + + ctx.actions.run( + executable = ctx.executable._pex, + inputs = runfiles.files, + arguments = [args], + outputs = [output], + mnemonic = "PyPex", + progress_message = "Building PEX binary %{label}", + ) + + return [ + DefaultInfo(files = depset([output]), executable = output) + ] + + +_attrs = dict({ + "binary": attr.label(executable = True, cfg = "target", mandatory = True, doc = "A py_binary target"), + "inject_env": attr.string_dict( + doc = "Environment variables to set when running the pex binary.", + default = {}, + ), + "python_shebang": attr.string(default = "#!/usr/bin/env python3"), + "python_interpreter_constraints": attr.string_list( + default = ["CPython=={major}.{minor}.*"], + doc = """\ +Python interpreter versions this PEX binary is compatible with. A list of semver strings. +The placeholder strings `{major}`, `{minor}`, `{patch}` can be used for gathering version +information from the hermetic python toolchain. + +For example, to enforce same interpreter version that Bazel uses, following can be used. + +```starlark +py_pex_binary + python_interpreter_constraints = [ + "CPython=={major}.{minor}.{patch}" + ] +) +``` +"""), + "_pex": attr.label(executable = True, cfg = "exec", default = "//py/tools/pex") +}) + + +py_pex_binary = rule( + doc = "Build a pex executable from a py_binary", + implementation = _py_python_pex_impl, + attrs = _attrs, + toolchains = [ + PY_TOOLCHAIN + ], + executable = True, +) \ No newline at end of file diff --git a/py/private/run.tmpl.sh b/py/private/run.tmpl.sh index 1841937c..885fc885 100644 --- a/py/private/run.tmpl.sh +++ b/py/private/run.tmpl.sh @@ -2,6 +2,9 @@ # NB: we don't use a path from @bazel_tools//tools/sh:toolchain_type because that's configured for the exec # configuration, while this script executes in the target configuration at runtime. +# This is a special comment for py_pex_binary to find the python entrypoint. +# __PEX_PY_BINARY_ENTRYPOINT__ {{ENTRYPOINT}} + {{BASH_RLOCATION_FN}} runfiles_export_envvars @@ -55,4 +58,4 @@ if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then hash -r 2> /dev/null fi -exec "{{EXEC_PYTHON_BIN}}" {{INTERPRETER_FLAGS}} "$(rlocation {{ENTRYPOINT}})" "$@" +exec "{{EXEC_PYTHON_BIN}}" {{INTERPRETER_FLAGS}} "$(rlocation {{ENTRYPOINT}})" "$@" \ No newline at end of file diff --git a/py/toolchains.bzl b/py/toolchains.bzl index b68a400a..ecd60532 100644 --- a/py/toolchains.bzl +++ b/py/toolchains.bzl @@ -1,5 +1,6 @@ """Declare toolchains""" +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_file") load("//py/private/toolchain:autodetecting.bzl", _register_autodetecting_python_toolchain = "register_autodetecting_python_toolchain") load("//py/private/toolchain:repo.bzl", "prerelease_toolchains_repo", "toolchains_repo") load("//py/private/toolchain:tools.bzl", "TOOLCHAIN_PLATFORMS", "prebuilt_tool_repo") @@ -32,3 +33,11 @@ def rules_py_toolchains(name = DEFAULT_TOOLS_REPOSITORY, register = True, is_pre if register: native.register_toolchains("@{}//:all".format(name)) + + + http_file( + name = "rules_py_pex_2_3_1", + urls = ["https://files.pythonhosted.org/packages/e7/d0/fbda2a4d41d62d86ce53f5ae4fbaaee8c34070f75bb7ca009090510ae874/pex-2.3.1-py2.py3-none-any.whl"], + sha256 = "64692a5bf6f298403aab930d22f0d836ae4736c5bc820e262e9092fe8c56f830", + downloaded_file_path = "pex-2.3.1-py2.py3-none-any.whl", + ) \ No newline at end of file diff --git a/py/tools/pex/BUILD.bazel b/py/tools/pex/BUILD.bazel new file mode 100644 index 00000000..003d4343 --- /dev/null +++ b/py/tools/pex/BUILD.bazel @@ -0,0 +1,15 @@ +load("//py:defs.bzl", "py_binary", "py_unpacked_wheel") + +py_unpacked_wheel( + name = "pex_unpacked", + src = "@rules_py_pex_2_3_1//file", + py_package_name = "pex" +) + +py_binary( + name = "pex", + srcs = ["main.py"], + main = "main.py", + deps = [":pex_unpacked"], + visibility = ["//visibility:public"] +) \ No newline at end of file diff --git a/py/tools/pex/main.py b/py/tools/pex/main.py new file mode 100644 index 00000000..45981091 --- /dev/null +++ b/py/tools/pex/main.py @@ -0,0 +1,201 @@ +# Unfortunately there is no way to stop pex from writing to a PEX_ROOT during build. +# Closest thing seems to be creating a tmp folder and deleting it after. +# pex cli does the same here; +# https://github.com/pex-tool/pex/blob/252459bdd879fc1e3446a6221571875d46fad1bd/pex/commands/command.py#L362-L382 +import os +from pex.common import safe_mkdtemp, safe_rmtree +TMP_PEX_ROOT=safe_mkdtemp() +os.environ["PEX_ROOT"] = TMP_PEX_ROOT + +import sys +from pex.pex_builder import Check,PEXBuilder +from pex.interpreter import PythonInterpreter +from pex.interpreter_constraints import InterpreterConstraint +from pex.layout import Layout +from pex.dist_metadata import Distribution +from argparse import Action, ArgumentParser + +class InjectEnvAction(Action): + def __call__(self, parser, namespace, value, option_str=None): + components = value.split("=", 1) + if len(components) != 2: + raise ArgumentError( + self, + "Environment variable values must be of the form `name=value`. " + "Given: {value}".format(value=value), + ) + self.default.append(tuple(components)) + +parser = ArgumentParser() + +parser.add_argument( + "-o", + "--output-file", + dest="pex_name", + default=None, + help="The name of the generated .pex file: Omitting this will run PEX " + "immediately and not save it to a file.", +) + +parser.add_argument( + "--python", + dest="python", + required=True +) + + +parser.add_argument( + "--python-version-constraint", + dest="constraints", + default=[], + action="append" +) + +parser.add_argument( + "--python-shebang", + dest="python_shebang", + default=None, + required=True, + help="The exact shebang (#!...) line to add at the top of the PEX file minus the " + "#!. This overrides the default behavior, which picks an environment Python " + "interpreter compatible with the one used to build the PEX file.", +) + +parser.add_argument( + "--executable", + dest="executable", + default=None, + metavar="EXECUTABLE", + help=( + "Set the entry point to an existing local python script. For example: " + '"pex --executable bin/my-python-script".' + ), +) + +parser.add_argument( + "--dependency", + dest="dependencies", + default=[], + action="append", +) + +parser.add_argument( + "--distinfo", + dest="distinfos", + default=[], + action="append", +) + +parser.add_argument( + "--source", + dest="sources", + default=[], + action="append", +) + +parser.add_argument( + "--inject-env", + dest="inject_env", + default=[], + action=InjectEnvAction, +) + +parser.add_argument( + "--sys-path", + dest="sys_path", + default=[], + action="append", +) + +options = parser.parse_args(args = sys.argv[1:]) + +# Monkey patch bootstrap template to inject some templated environment variables. +# Unfortunately we can't use `preamble` feature because it runs before any initialization code. +import pex.pex_builder +BE=pex.pex_builder.BOOTSTRAP_ENVIRONMENT + +INJECT_TEMPLATE=["os.environ['RUNFILES_DIR'] = __entry_point__"] + +for path in options.sys_path: + INJECT_TEMPLATE.append("sys.path.append(os.path.abspath(os.path.join(__entry_point__, '%s')))" % path) + +import_idx = BE.index("from pex.pex_bootstrapper import bootstrap_pex") +# This is here to catch potential future bugs where pex package is updated here but the boostrap +# script was not checked again to see if we are still injecting values in the right place. +assert import_idx == 3703, "Check bootstrap template monkey patching." + +pex.pex_builder.BOOTSTRAP_ENVIRONMENT = BE[:import_idx] + "\n".join(INJECT_TEMPLATE) + "\n" + BE[import_idx:] + + +pex_builder = PEXBuilder( + interpreter=PythonInterpreter.from_binary(options.python), +) + + +MAGIC_COMMENT = "# __PEX_PY_BINARY_ENTRYPOINT__ " +executable = None +executable_was_set = False +# set the entrypoint by looking at the generated launcher. +with open(options.executable, "r") as contents: + line = contents.readline() + while line: + if line.startswith(MAGIC_COMMENT): + executable = line[len(MAGIC_COMMENT):].rstrip() + if executable: + break + line = contents.readline() + + if not executable: + print("Could not determine the `main` file for the binary. Did run.tmpl.sh change?") + sys.exit(1) + +pex_builder.set_shebang(options.python_shebang) + +pex_info = pex_builder.info +pex_info.inject_env = options.inject_env +pex_info.interpreter_constraints = [ + InterpreterConstraint.parse(constraint) + for constraint in options.constraints +] + +for dep in options.dependencies: + dist = Distribution.load(dep + "/../") + + # TODO: explain which level of inferno is this! + key = "%s-%s" % (dist.key, dist.version) + dist_hash = pex_builder._add_dist( + path= dist.location, + dist_name = key + ) + pex_info.add_distribution(key, dist_hash) + pex_builder.add_requirement(dist.as_requirement()) + +for source in options.sources: + src, dest = source.split("=", 1) + + # if destination path matches the entrypoint script, then also set the executable. + if dest == executable: + pex_builder.set_executable(src) + executable_was_set = True + + pex_builder.add_source( + src, + dest + ) + +if not executable_was_set: + print("Have not seen the source that corresponds to %s in the runfiles. Please file an issue." % executable) + sys.exit(1) + +pex_builder.freeze(bytecode_compile=False) + +pex_builder.build( + options.pex_name, + deterministic_timestamp=True, + layout=Layout.ZIPAPP, + check=Check.WARN, +) + + +# Cleanup temporary pex root +safe_rmtree(TMP_PEX_ROOT)