From cf1a2bf5cdf1224984ebd9adb844c9c5ae952391 Mon Sep 17 00:00:00 2001 From: Aaron Siddhartha Mondal Date: Mon, 1 Jul 2024 23:42:11 +0200 Subject: [PATCH] Introduce the NativeLink Kubernetes operator A single `kubectl apply -k` now deploys NativeLink in a self-configuring, self-healing and self-updating fashion. To achieve this we implement a two-stage depoyment to asynchronously reconciliate various parts of NativeLink Kustomizations. First, we deploy Flux Alerts that trigger Tekton Pipelines on GitRepository updates to bring required images into the cluster. Second, and technically at the same time, we start a Flux Kustomization to deploy a NativeLink Kustomization. This is similar to the previous 01_operations and 02_applicaion scripts, but now happens fully automated in the cluster and no longer requires a local Nix installation as all tag evaluations have become implementation details of the Tekton Pipelines. This commit also changes the K8s resource layout to a "best-practice" Kustomize directory layout. This further reduces code duplication and gives third parties greater flexibility and more useful reference points to build custom NativeLink setups. Includes an overhaul of the Kubernetes documentation. --- .../vocabularies/TraceMachina/accept.txt | 1 - .github/workflows/lre.yaml | 95 ++++++++- README.md | 1 + .../chromium-example/build_chromium_tests.sh | 0 deploy/chromium-example/kustomization.yaml | 21 ++ deploy/dev/kustomization.yaml | 64 ++++++ deploy/kubernetes-example/kustomization.yaml | 21 ++ deployment-examples/chromium/.gitignore | 2 - deployment-examples/chromium/01_operations.sh | 39 ---- .../chromium/02_application.sh | 30 --- .../chromium/04_delete_application.sh | 6 - deployment-examples/chromium/README.md | 95 --------- deployment-examples/kubernetes/.gitignore | 2 - .../kubernetes/01_operations.sh | 39 ---- .../kubernetes/02_application.sh | 39 ---- .../kubernetes/03_delete_application.sh | 6 - deployment-examples/kubernetes/README.md | 163 --------------- .../kubernetes/base/kustomization.yaml | 27 --- .../kubernetes/worker-lre-java.yaml | 69 ------- flake.nix | 7 + kubernetes/README.md | 13 ++ .../kubernetes => kubernetes}/base/cas.yaml | 0 kubernetes/base/kustomization.yaml | 13 ++ .../base/scheduler.yaml | 0 .../base/worker.yaml | 20 +- .../gateway-routes/kustomization.yaml | 5 + .../components/gateway-routes}/routes.yaml | 0 .../example-do-not-use-in-prod-key.pem | 0 .../example-do-not-use-in-prod-rootca.crt | 0 .../insecure-certs/kustomization.yaml | 9 + .../components/operator/flux-config.yaml | 110 ++++++++++ .../components/operator/kustomization.yaml | 4 + .../base => kubernetes/configmaps}/cas.json | 0 kubernetes/configmaps/kustomization.yaml | 23 +++ .../configmaps}/scheduler.json | 0 .../configmaps}/worker.json | 0 .../overlays/chromium/kustomization.yaml | 20 ++ kubernetes/overlays/lre/kustomization.yaml | 26 +++ .../overlays/lre}/worker-lre-cc.yaml | 38 +--- tools/pre-commit-hooks.nix | 2 +- web/platform/.gitignore | 2 - .../docs/deployment-examples/chromium.mdx | 118 +++++++++++ .../docs/deployment-examples/kubernetes.mdx | 189 ++++++++++++++++++ web/platform/starlight.conf.ts | 5 +- web/platform/utils/md_to_mdx_aot.ts | 16 -- 45 files changed, 755 insertions(+), 585 deletions(-) rename deployment-examples/chromium/03_build_chrome_tests.sh => deploy/chromium-example/build_chromium_tests.sh (100%) create mode 100644 deploy/chromium-example/kustomization.yaml create mode 100644 deploy/dev/kustomization.yaml create mode 100644 deploy/kubernetes-example/kustomization.yaml delete mode 100644 deployment-examples/chromium/.gitignore delete mode 100755 deployment-examples/chromium/01_operations.sh delete mode 100755 deployment-examples/chromium/02_application.sh delete mode 100755 deployment-examples/chromium/04_delete_application.sh delete mode 100644 deployment-examples/chromium/README.md delete mode 100644 deployment-examples/kubernetes/.gitignore delete mode 100755 deployment-examples/kubernetes/01_operations.sh delete mode 100755 deployment-examples/kubernetes/02_application.sh delete mode 100755 deployment-examples/kubernetes/03_delete_application.sh delete mode 100644 deployment-examples/kubernetes/README.md delete mode 100644 deployment-examples/kubernetes/base/kustomization.yaml delete mode 100644 deployment-examples/kubernetes/worker-lre-java.yaml create mode 100644 kubernetes/README.md rename {deployment-examples/kubernetes => kubernetes}/base/cas.yaml (100%) create mode 100644 kubernetes/base/kustomization.yaml rename {deployment-examples/kubernetes => kubernetes}/base/scheduler.yaml (100%) rename deployment-examples/chromium/worker-chromium.yaml => kubernetes/base/worker.yaml (79%) create mode 100644 kubernetes/components/gateway-routes/kustomization.yaml rename {deployment-examples/kubernetes/base => kubernetes/components/gateway-routes}/routes.yaml (100%) rename {deployment-examples/kubernetes/base => kubernetes/components/insecure-certs}/example-do-not-use-in-prod-key.pem (100%) rename {deployment-examples/kubernetes/base => kubernetes/components/insecure-certs}/example-do-not-use-in-prod-rootca.crt (100%) create mode 100644 kubernetes/components/insecure-certs/kustomization.yaml create mode 100644 kubernetes/components/operator/flux-config.yaml create mode 100644 kubernetes/components/operator/kustomization.yaml rename {deployment-examples/kubernetes/base => kubernetes/configmaps}/cas.json (100%) create mode 100644 kubernetes/configmaps/kustomization.yaml rename {deployment-examples/kubernetes/base => kubernetes/configmaps}/scheduler.json (100%) rename {deployment-examples/kubernetes/base => kubernetes/configmaps}/worker.json (100%) create mode 100644 kubernetes/overlays/chromium/kustomization.yaml create mode 100644 kubernetes/overlays/lre/kustomization.yaml rename {deployment-examples/kubernetes => kubernetes/overlays/lre}/worker-lre-cc.yaml (63%) create mode 100644 web/platform/src/content/docs/docs/deployment-examples/chromium.mdx create mode 100644 web/platform/src/content/docs/docs/deployment-examples/kubernetes.mdx diff --git a/.github/styles/config/vocabularies/TraceMachina/accept.txt b/.github/styles/config/vocabularies/TraceMachina/accept.txt index 5c8c9bc49..30d280a46 100644 --- a/.github/styles/config/vocabularies/TraceMachina/accept.txt +++ b/.github/styles/config/vocabularies/TraceMachina/accept.txt @@ -5,7 +5,6 @@ Cloudflare ELB GPUs Goma -Kustomization [Hh]ermeticity Kustomization LLD diff --git a/.github/workflows/lre.yaml b/.github/workflows/lre.yaml index 7399603fa..38b3e468d 100644 --- a/.github/workflows/lre.yaml +++ b/.github/workflows/lre.yaml @@ -83,19 +83,104 @@ jobs: uses: >- # v4 DeterminateSystems/magic-nix-cache-action@fc6aaceb40b9845a02b91e059ec147e78d1b4e41 - - name: Start Kubernetes cluster (Infra) + - name: Start Kubernetes cluster run: > nix run .#native up - - name: Start Kubernetes cluster (Operations) + - name: Start NativeLink operator + env: + PR_URL: ${{ github.event.pull_request.head.repo.clone_url }} + PR_BRANCH: ${{ github.event.pull_request.head.ref }} + PR_COMMIT: ${{ github.event.pull_request.head.sha }} + run: | + nix develop --impure --command bash -c 'cat > kustomization.yaml << EOF + apiVersion: kustomize.config.k8s.io/v1beta1 + kind: Kustomization + components: + - kubernetes/components/operator + patches: + - patch: |- + - op: replace + path: /spec/path + value: ./kubernetes/overlays/lre + target: + kind: Kustomization + name: nativelink + - patch: |- + - op: replace + path: /spec/url + value: ${PR_URL} + - op: replace + path: /spec/ref/branch + value: ${PR_BRANCH} + - op: replace + path: /spec/ref/commit + value: ${PR_COMMIT} + target: + kind: GitRepository + name: nativelink + - patch: |- + - op: replace + path: /spec/eventMetadata/flakeOutput + value: ./src_root#image + target: + kind: Alert + name: nativelink-image-alert + - patch: |- + - op: replace + path: /spec/eventMetadata/flakeOutput + value: ./src_root#nativelink-worker-init + target: + kind: Alert + name: nativelink-worker-init-alert + - patch: |- + - op: replace + path: /spec/eventMetadata/flakeOutput + value: ./src_root#nativelink-worker-lre-cc + target: + kind: Alert + name: nativelink-worker-alert + EOF + kubectl apply -k . && + rm kustomization.yaml' + + - name: Wait for Tekton pipelines + run: > + nix develop --impure --command + bash -c "kubectl wait \ + --for=condition=Succeeded \ + --timeout=45m \ + pipelinerun \ + -l tekton.dev/pipeline=rebuild-nativelink" + + - name: Wait for Configmaps + run: > + nix develop --impure --command + bash -c "flux reconcile kustomization -n default \ + --timeout=15m \ + nativelink-configmaps" + + - name: Wait for NativeLink Kustomization + run: > + nix develop --impure --command + bash -c "flux reconcile kustomization -n default \ + --timeout=15m \ + nativelink" + + - name: Wait for CAS + run: > + nix develop --impure --command + bash -c "kubectl rollout status deploy/nativelink-cas" + + - name: Wait for scheduler run: > nix develop --impure --command - bash -c "./deployment-examples/kubernetes/01_operations.sh" + bash -c "kubectl rollout status deploy/nativelink-scheduler" - - name: Start Kubernetes cluster (Application) + - name: Wait for worker run: > nix develop --impure --command - bash -c "./deployment-examples/kubernetes/02_application.sh" + bash -c "kubectl rollout status deploy/nativelink-worker" - name: Get gateway IPs id: gateway-ips diff --git a/README.md b/README.md index 09bb84642..b7eeda2fe 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,7 @@ To start, you can deploy NativeLink as a Docker image (as shown below) or by usi The setups below are **production-grade** installations. See the [contribution docs](https://nativelink.com/docs/contribute/nix/) for instructions on how to build from source with [Bazel](https://nativelink.com/docs/contribute/bazel/), [Cargo](https://nativelink.com/docs/contribute/cargo/), and [Nix](https://nativelink.com/docs/contribute/nix/). +You can find a few example deployments in the [Docs](https://nativelink.com/docs/guides/kubernetes). ### ๐Ÿ“ฆ Prebuilt images diff --git a/deployment-examples/chromium/03_build_chrome_tests.sh b/deploy/chromium-example/build_chromium_tests.sh similarity index 100% rename from deployment-examples/chromium/03_build_chrome_tests.sh rename to deploy/chromium-example/build_chromium_tests.sh diff --git a/deploy/chromium-example/kustomization.yaml b/deploy/chromium-example/kustomization.yaml new file mode 100644 index 000000000..d8c45a9f9 --- /dev/null +++ b/deploy/chromium-example/kustomization.yaml @@ -0,0 +1,21 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +components: +- ../../kubernetes/components/operator + +patches: +- patch: |- + - op: replace + path: /spec/path + value: ./kubernetes/overlays/chromium + target: + kind: Kustomization + name: nativelink +- patch: |- + - op: replace + path: /spec/eventMetadata/flakeOutput + value: github:TraceMachina/nativelink#nativelink-worker-siso-chromium + target: + kind: Alert + name: nativelink-worker-alert diff --git a/deploy/dev/kustomization.yaml b/deploy/dev/kustomization.yaml new file mode 100644 index 000000000..35a3f183a --- /dev/null +++ b/deploy/dev/kustomization.yaml @@ -0,0 +1,64 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +components: +- ../../kubernetes/components/operator + +# Change this value to deploy custom overlays. +patches: +- patch: |- + - op: replace + path: /spec/path + value: ./kubernetes/overlays/lre + target: + kind: Kustomization + name: nativelink + +# Modify this value to change the URL of the repository with deployment files. +# +# This is usually only necessary if you change deployment YAML files or +# NativeLink config files. If you only intend to change the Rust sources you can +# leave this as is and need to ensure that the Alerts below are patched to build +# your local sources. +- patch: |- + - op: replace + path: /spec/url + value: https://github.com/TraceMachina/nativelink +# Optionally, change the tracked branch. +# - op: replace +# path: /spec/ref/branch +# value: somecustombranch + target: + kind: GitRepository + name: nativelink + +# Setting the flake outputs to `./src_root#xxx` causes the Tekton pipelines to +# build nativelink from your local sources. +# +# During development, the following formats might be useful as well: +# +# `github:user/repo#outname` to build an image from an arbitrary flake output. +# +# `github:TraceMachina/nativelink?ref=pull//head#` to deploy a +# outputs from a Pull request. +- patch: |- + - op: replace + path: /spec/eventMetadata/flakeOutput + value: ./src_root#image + target: + kind: Alert + name: nativelink-image-alert +- patch: |- + - op: replace + path: /spec/eventMetadata/flakeOutput + value: ./src_root#nativelink-worker-init + target: + kind: Alert + name: nativelink-worker-init-alert +- patch: |- + - op: replace + path: /spec/eventMetadata/flakeOutput + value: ./src_root#nativelink-worker-lre-cc + target: + kind: Alert + name: nativelink-worker-alert diff --git a/deploy/kubernetes-example/kustomization.yaml b/deploy/kubernetes-example/kustomization.yaml new file mode 100644 index 000000000..eb836d23e --- /dev/null +++ b/deploy/kubernetes-example/kustomization.yaml @@ -0,0 +1,21 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +components: +- ../../kubernetes/components/operator + +patches: +- patch: |- + - op: replace + path: /spec/path + value: ./kubernetes/overlays/lre + target: + kind: Kustomization + name: nativelink +- patch: |- + - op: replace + path: /spec/eventMetadata/flakeOutput + value: github:TraceMachina/nativelink#nativelink-worker-lre-cc + target: + kind: Alert + name: nativelink-worker-alert diff --git a/deployment-examples/chromium/.gitignore b/deployment-examples/chromium/.gitignore deleted file mode 100644 index e0e8ebd5b..000000000 --- a/deployment-examples/chromium/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -# Generated by 02_application.yaml -/kustomization.yaml diff --git a/deployment-examples/chromium/01_operations.sh b/deployment-examples/chromium/01_operations.sh deleted file mode 100755 index b11905f6b..000000000 --- a/deployment-examples/chromium/01_operations.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env bash - -# Trigger cluster-internal pipelines to build or fetch necessary images. - -set -xeuo pipefail - -curl -v \ - -H 'content-Type: application/json' \ - -d '{"metadata": {"flakeOutput": "./src_root#image"}}' \ - localhost:8082/eventlistener - -curl -v \ - -H 'content-Type: application/json' \ - -d '{"metadata": {"flakeOutput": "./src_root#nativelink-worker-init"}}' \ - localhost:8082/eventlistener - -curl -v \ - -H 'content-Type: application/json' \ - -d '{"metadata": {"flakeOutput": "./src_root#nativelink-worker-siso-chromium"}}' \ - localhost:8082/eventlistener - -until kubectl get pipelinerun \ - -l tekton.dev/pipeline=rebuild-nativelink | grep -q 'NAME'; do - echo "Waiting for PipelineRuns to start..." - sleep 0.1 -done - -printf "Waiting for PipelineRuns to finish... - -You may cancel this script now and use 'tkn pr ls' and 'tkn pr logs -f' to -monitor the PipelineRun logs. - -" - -kubectl wait \ - --for=condition=Succeeded \ - --timeout=45m \ - pipelinerun \ - -l tekton.dev/pipeline=rebuild-nativelink diff --git a/deployment-examples/chromium/02_application.sh b/deployment-examples/chromium/02_application.sh deleted file mode 100755 index 6d1e717e6..000000000 --- a/deployment-examples/chromium/02_application.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env bash - -# Prepare the Kustomization and apply it to the cluster. - -KUSTOMIZE_DIR=$(git rev-parse --show-toplevel)/deployment-examples/chromium - -cat < "$KUSTOMIZE_DIR"/kustomization.yaml ---- -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -bases: - - ../kubernetes/base - -resources: - - worker-chromium.yaml -EOF - -cd "$KUSTOMIZE_DIR" && kustomize edit set image \ - nativelink=localhost:5001/nativelink:"$(\ - nix eval .#image.imageTag --raw)" \ - nativelink-worker-init=localhost:5001/nativelink-worker-init:"$(\ - nix eval .#nativelink-worker-init.imageTag --raw)" \ - nativelink-worker-chromium=localhost:5001/nativelink-worker-siso-chromium:"$(\ - nix eval .#nativelink-worker-siso-chromium.imageTag --raw)" - -kubectl apply -k "$KUSTOMIZE_DIR" - -kubectl rollout status deploy/nativelink-cas -kubectl rollout status deploy/nativelink-scheduler -kubectl rollout status deploy/nativelink-worker-chromium diff --git a/deployment-examples/chromium/04_delete_application.sh b/deployment-examples/chromium/04_delete_application.sh deleted file mode 100755 index 681370f92..000000000 --- a/deployment-examples/chromium/04_delete_application.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/usr/bin/env bash - -# Delete the Kustomization but leave the rest of the cluster intact. - -kubectl delete -k \ - "$(git rev-parse --show-toplevel)/deployment-examples/chromium" diff --git a/deployment-examples/chromium/README.md b/deployment-examples/chromium/README.md deleted file mode 100644 index 321a04a22..000000000 --- a/deployment-examples/chromium/README.md +++ /dev/null @@ -1,95 +0,0 @@ -# Chromium example - -This deployment sets up a 4-container deployment with separate CAS, scheduler -and worker. Don't use this example deployment in production. It's insecure. - -> [!WARNING] -> -> - The client build request is best done from a Ubuntu image, `./03_build_chrome_tests.sh`. It will check if the image is Ubuntu and -> fail otherwise. -> - This tutorial has been tested in a Nix environment of version `2. -> 21.0`. -> - You need to install the [Docker](https://docs.docker.com/engine/install/ubuntu/) Engine in Ubuntu. -> - To get your Nix environment set up see the [official Nix installation documentation](https://nix.dev/install-nix). - -All commands should be run from nix to ensure all dependencies exist in the environment. - -```bash -nix develop -``` - -In this example we're using `kind` to set up the cluster `cilium` to provide a -`LoadBalancer` and `GatewayController`. - -First set up a local development cluster: - -```bash -native up -``` - -> [!TIP] -> The `native up` command uses Pulumi under the hood. You can view and delete -> the stack with `pulumi stack` and `pulumi destroy`. - -Next start a few standard deployments. This part also builds the remote -execution containers and makes them available to the cluster: - -```bash -./01_operations.sh -``` - -> [!TIP] -> The operations invoke cluster-internal Tekton Pipelines to build and push the -> `nativelink` and worker images. You can view the state of the pipelines with -> `tkn pr ls` and `tkn pr logs`/`tkn pr logs --follow`. - -Finally, deploy NativeLink: - -```bash -./02_application.sh -``` - -> [!TIP] -> You can use `./04_delete_application.sh` to remove just the `nativelink` -> deployments but leave the rest of the cluster intact. - -This demo setup creates two gateways to expose the `cas` and `scheduler` -deployments via your local docker network: - -```bash -CACHE=$(kubectl get gtw cache-gateway -o=jsonpath='{.status.addresses[0].value}') -SCHEDULER=$(kubectl get gtw scheduler-gateway -o=jsonpath='{.status.addresses[0].value}') - -echo "Cache IP: $CACHE" -echo "Scheduler IP: $SCHEDULER" -``` - -Using `./03_build_chrome_tests.sh` example script will download needed dependencies -for building Chromium unit tests using NativeLink CAS and Scheduler. The initial part -of the script checks if some dependencies exist, if not installs them, then moves on -to downloading and building Chromium tests. The script simplifies the setup described -in [linux/build_instructions.md](https://chromium.googlesource.com/chromium/src/+/main/docs/linux/build_instructions.md) - -```bash -./03_build_chrome_tests.sh -``` - -> [!TIP] -> You can monitor the logs of container groups with `kubectl logs`: -> -> ```bash -> kubectl logs -f -l app=nativelink-cas -> kubectl logs -f -l app=nativelink-scheduler -> kubectl logs -f -l app=nativelink-worker-chromium --all-containers=true -> watch $HOME/chromium/src/buildtools/reclient/reproxystatus -> ``` - -When you're done testing, delete the cluster: - -```bash -kind delete cluster -``` - -## NativeLink Community - -If you have any questions, please reach out to the [NativeLink Community](https://join.slack.com/t/nativelink/shared_invite/zt-2i2mipfr5-lZAEeWYEy4Eru94b3IOcdg). diff --git a/deployment-examples/kubernetes/.gitignore b/deployment-examples/kubernetes/.gitignore deleted file mode 100644 index e0e8ebd5b..000000000 --- a/deployment-examples/kubernetes/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -# Generated by 02_application.yaml -/kustomization.yaml diff --git a/deployment-examples/kubernetes/01_operations.sh b/deployment-examples/kubernetes/01_operations.sh deleted file mode 100755 index 360945c36..000000000 --- a/deployment-examples/kubernetes/01_operations.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env bash - -# Trigger cluster-internal pipelines to build or fetch necessary images. - -set -xeuo pipefail - -curl -v \ - -H 'content-Type: application/json' \ - -d '{"metadata": {"flakeOutput": "./src_root#image"}}' \ - localhost:8082/eventlistener - -curl -v \ - -H 'content-Type: application/json' \ - -d '{"metadata": {"flakeOutput": "./src_root#nativelink-worker-init"}}' \ - localhost:8082/eventlistener - -curl -v \ - -H 'content-Type: application/json' \ - -d '{"metadata": {"flakeOutput": "./src_root#nativelink-worker-lre-cc"}}' \ - localhost:8082/eventlistener - -until kubectl get pipelinerun \ - -l tekton.dev/pipeline=rebuild-nativelink | grep -q 'NAME'; do - echo "Waiting for PipelineRuns to start..." - sleep 0.1 -done - -printf "Waiting for PipelineRuns to finish... - -You may cancel this script now and use 'tkn pr ls' and 'tkn pr logs -f' to -monitor the PipelineRun logs. - -" - -kubectl wait \ - --for=condition=Succeeded \ - --timeout=45m \ - pipelinerun \ - -l tekton.dev/pipeline=rebuild-nativelink diff --git a/deployment-examples/kubernetes/02_application.sh b/deployment-examples/kubernetes/02_application.sh deleted file mode 100755 index b444cf979..000000000 --- a/deployment-examples/kubernetes/02_application.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env bash - -# Prepare the Kustomization and apply it to the cluster. - -KUSTOMIZE_DIR=$(git rev-parse --show-toplevel)/deployment-examples/kubernetes - -cat < "$KUSTOMIZE_DIR"/kustomization.yaml ---- -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -bases: - - base - -resources: - - worker-lre-cc.yaml - # TODO(aaronmondal): Fix java and add this: - # - worker-lre-java.yaml -EOF - -cd "$KUSTOMIZE_DIR" && kustomize edit set image \ - nativelink=localhost:5001/nativelink:"$(\ - nix eval .#image.imageTag --raw)" \ - nativelink-worker-init=localhost:5001/nativelink-worker-init:"$(\ - nix eval .#nativelink-worker-init.imageTag --raw)" \ - nativelink-worker-lre-cc=localhost:5001/nativelink-worker-lre-cc:"$(\ - nix eval .#nativelink-worker-lre-cc.imageTag --raw)" - -# TODO(aaronmondal): Fix java and add this: -# nativelink-worker-lre-java=localhost:5001/nativelink-worker-lre-java:$(\ -# nix eval .#nativelink-worker-lre-java.imageTag --raw) - -kubectl apply -k "$KUSTOMIZE_DIR" - -kubectl rollout status deploy/nativelink-cas -kubectl rollout status deploy/nativelink-scheduler -kubectl rollout status deploy/nativelink-worker-lre-cc - -# TODO(aaronmondal): Fix java and add this: -# kubectl rollout status deploy/nativelink-worker-lre-java diff --git a/deployment-examples/kubernetes/03_delete_application.sh b/deployment-examples/kubernetes/03_delete_application.sh deleted file mode 100755 index c26119e86..000000000 --- a/deployment-examples/kubernetes/03_delete_application.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/usr/bin/env bash - -# Delete the Kustomization but leave the rest of the cluster intact. - -kubectl delete -k \ - "$(git rev-parse --show-toplevel)/deployment-examples/kubernetes" diff --git a/deployment-examples/kubernetes/README.md b/deployment-examples/kubernetes/README.md deleted file mode 100644 index 140ecb0d4..000000000 --- a/deployment-examples/kubernetes/README.md +++ /dev/null @@ -1,163 +0,0 @@ -# Kubernetes example - -This deployment sets up a 4-container deployment with separate CAS, scheduler -and worker. Don't use this example deployment in production. It's insecure. - -In this example we're using `kind` to set up the cluster `cilium` to provide a -`LoadBalancer` and `GatewayController`. - -First set up a local development cluster: - -```bash -native up -``` - -> [!TIP] -> The `native up` command uses Pulumi under the hood. You can view and delete -> the stack with `pulumi stack` and `pulumi destroy`. - -Next start a few standard deployments. This part also builds the remote -execution containers and makes them available to the cluster: - -```bash -./01_operations.sh -``` - -> [!TIP] -> The operations invoke cluster-internal Tekton Pipelines to build and push the -> `nativelink` and worker images. You can view the state of the pipelines with -> `tkn pr ls` and `tkn pr logs`/`tkn pr logs --follow`. - -Finally, deploy NativeLink: - -```bash -./02_application.sh -``` - -> [!TIP] -> You can use `./03_delete_application.sh` to remove just the `nativelink` -> deployments but leave the rest of the cluster intact. - -This demo setup creates two gateways to expose the `cas` and `scheduler` -deployments via your local docker network: - -```bash -CACHE=$(kubectl get gtw cache-gateway -o=jsonpath='{.status.addresses[0].value}') -SCHEDULER=$(kubectl get gtw scheduler-gateway -o=jsonpath='{.status.addresses[0].value}') - -echo "Cache IP: $CACHE" -echo "Scheduler IP: $SCHEDULER" - -# Prints something like: -# -# Cache IP: 172.20.255.4 -# Scheduler IP: 172.20.255.5 -``` - -You can now pass these IP addresses to your Bazel invocation to use the remote -cache and executor: - -```bash -bazel build \ - --config=lre \ - --remote_instance_name=main \ - --remote_cache=grpc://$CACHE \ - --remote_executor=grpc://$SCHEDULER \ - //local-remote-execution/examples:hello_lre -``` - -> [!TIP] -> You can add these flags to a to a `user.bazelrc` file in the workspace root. -> Note that you'll need to pass in explicit IP addresses as this file can't -> resolve environment variables: -> -> ```bash -> # user.bazelrc -> build --config=lre -> build --remote_instance_name=main -> build --remote_cache=grpc://172.20.255.4 -> build --remote_executor=grpc://172.20.255.5 -> ``` - -When you're done testing, delete the cluster: - -```bash -kind delete cluster -``` - -## Use a published image - -[Published images](https://github.com/TraceMachina/nativelink/pkgs/container/nativelink) can be found under the Container registry, which uses the namespace `https://ghcr.io`. When using the Container registry, you can select prebuilt images and avoid building the image yourself. - -To pull an existing image, you can run: - -```sh -docker pull ghcr.io/tracemachina/nativelink:taggedImageVersion -``` - -## Derive a Tag for an OCI image - -To derive the tag of the NativeLink image at a specific commit, run the below command and change `someCommit` with the commit hash you want to use: - -```sh -nix eval github:TraceMachina/nativelink/someCommit#image.imageTag --raw -``` - -Alternatively, the tag can be derived from the upstream sources at the current state of the upstream main branch by running this command: - -```sh -nix eval github:TraceMachina/nativelink#image.imageTag --raw -``` - -Similarly, you can also clone or checkout a specific version or commit of the NativeLink git repository to evaluate the output of the entire NativeLink flake. For example, assuming you've done the [NativeLink Getting Started Guide](https://github.com/TraceMachina/nativelink?tab=readme-ov-file#getting-started-with-nativelink) and cloned the repository, you can run these sample commands: - -```sh -git log -git checkout commitHash -nix eval .#image.imageTag --raw -``` - -The `--raw` removes the surrounding quotes from the output string. - -> [!WARNING] -> We don't recommend using this command to -> retrieve an image: -> -> ```sh -> nix eval github:TraceMachina/nativelink#image.imageTag --raw -> ``` -> -> Using this command prevents anyone from -> identifying the specific version of the -> NativeLink container in use because -> reflects the image version available at the -> time of download. It'll be hard to debug, -> revert to previous versions if there are issues -> and complicate bug tracking. -> It's for these same reasons you won't be able -> to retrieve an image using the `latest` tag. - -## Build and copy an OCI image - -You can build and copy the image to a container registry with the `copyTo` attribute. Below are examples within the NativeLink repository for building and copying an image: - -- [Example 1](https://github.com/TraceMachina/nativelink/blob/09b32c94d3cc7780816585e9b87f69c56cf931ae/deployment-examples/kubernetes/01_operations.sh#L12-L16) highlights: - -```sh -nix run github:tracemachina/nativelink#image.copyTo -``` - -- [Example 2](https://github.com/TraceMachina/nativelink/blob/09b32c94d3cc7780816585e9b87f69c56cf931ae/tools/local-image-test.nix#L12-L13) highlights how to skip pushing to an intermediary registry by copying directly to the docker-daemon: - -```sh -IMAGE_NAME=$(nix eval .#image.imageName --raw) -IMAGE_TAG=$(nix eval .#image.imageTag --raw) - -nix run .#image.copyTo docker-daemon:"${IMAGE_NAME}":"${IMAGE_TAG}" -``` - -You can find more about details around [nix](https://github.com/nlewo/nix2container). Published images are signed using `cosign`. For more details of the verification process of publishing OCI images see [SECURITY.md](https://github.com/TraceMachina/nativelink/blob/main/SECURITY.md) - -## NativeLink Community - -Reach out to the [NativeLink Slack community](https://join.slack.com/t/nativelink/shared_invite/zt-2forhp5n9-L7dTD21nCSY9_IRteQvZmw) for any questions via #NativeLink! diff --git a/deployment-examples/kubernetes/base/kustomization.yaml b/deployment-examples/kubernetes/base/kustomization.yaml deleted file mode 100644 index 8bc3a9d24..000000000 --- a/deployment-examples/kubernetes/base/kustomization.yaml +++ /dev/null @@ -1,27 +0,0 @@ ---- -apiVersion: kustomize.config.k8s.io/v1beta1 -kind: Kustomization -resources: - - cas.yaml - - scheduler.yaml - - routes.yaml - -configMapGenerator: - - name: cas - files: - - cas.json - - name: scheduler - files: - - scheduler.json - - name: worker - files: - - worker.json - -images: - - name: nativelink - -secretGenerator: - - name: tls-secret - files: - - example-do-not-use-in-prod-rootca.crt - - example-do-not-use-in-prod-key.pem diff --git a/deployment-examples/kubernetes/worker-lre-java.yaml b/deployment-examples/kubernetes/worker-lre-java.yaml deleted file mode 100644 index d4c580481..000000000 --- a/deployment-examples/kubernetes/worker-lre-java.yaml +++ /dev/null @@ -1,69 +0,0 @@ ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: nativelink-worker-lre-java -spec: - replicas: 1 - selector: - matchLabels: - app: nativelink-worker-lre-java - template: - metadata: - labels: - app: nativelink-worker-lre-java - spec: - initContainers: - - name: setup-entrypoint - image: nixpkgs/nix-flakes:latest - command: ["/bin/sh", "-c"] - # The kind setup mounts the nativelink repository into the kind nodes at - # `/mnt/src_root`. This ensures that the tags between the worker configs - # and bazel toolchains match when this setup is run in CI. - # - # WARNING: The platform is *not* necessarily the container that is - # actually deployed here. The generator container in this example was - # `rbe-autogen-lre-java:` and the platform was modified - # after the fact to be `lre-java:`. The deployed container - # we use as worker is - # `nativelink-worker-lre-java:` which is a - # completely separate extension of the `lre-java` base image. - args: - - | - git config --global --add safe.directory "*" - NATIVELINK_WORKER_PLATFORM=docker://lre-java:$(nix eval /mnt/src_root#lre-java.imageTag --raw) && - printf '#!/bin/sh\nexport NATIVELINK_WORKER_PLATFORM=%s\nexec "$@"' "$NATIVELINK_WORKER_PLATFORM" > /entrypoint/entrypoint.sh && - chmod +x /entrypoint/entrypoint.sh - volumeMounts: - - name: entrypoint - mountPath: /entrypoint - - name: mnt - mountPath: /mnt - containers: - - name: nativelink-worker-lre-java - # This image will be edited by kustomize - image: nativelink-worker-lre-java - env: - - name: RUST_LOG - value: warn - - name: CAS_ENDPOINT - value: nativelink-cas - - name: SCHEDULER_ENDPOINT - value: nativelink-scheduler - volumeMounts: - - name: worker-config - mountPath: /worker.json - subPath: worker.json - - name: entrypoint - mountPath: /entrypoint - command: ["/entrypoint/entrypoint.sh"] - args: ["/bin/nativelink", "/worker.json"] - volumes: - - name: entrypoint - emptyDir: {} - - name: worker-config - configMap: - name: worker - - name: mnt - hostPath: - path: /mnt diff --git a/flake.nix b/flake.nix index 693842b6a..f9f70a972 100644 --- a/flake.nix +++ b/flake.nix @@ -237,6 +237,11 @@ native-cli = pkgs.callPackage ./native-cli/default.nix {}; + build-chromium-tests = + pkgs.writeShellScriptBin + "build-chromium-tests" + ./deploy/chromium-example/build_chromium_tests.sh; + docs = pkgs.callPackage ./tools/docs.nix {rust = stable-rust.default;}; inherit (nix2container.packages.${system}.nix2container) pullImage; @@ -505,6 +510,7 @@ pkgs.tektoncd-cli pkgs.pulumi pkgs.pulumiPackages.pulumi-language-go + pkgs.fluxcd pkgs.go pkgs.kustomize @@ -521,6 +527,7 @@ customClang native-cli docs + build-chromium-tests ] ++ maybeDarwinDeps ++ pkgs.lib.optionals (pkgs.stdenv.system != "x86_64-darwin") [ diff --git a/kubernetes/README.md b/kubernetes/README.md new file mode 100644 index 000000000..dd516969a --- /dev/null +++ b/kubernetes/README.md @@ -0,0 +1,13 @@ +# NativeLink Kubernetes deployments + +Building blocks for NativeLink Kubernetes deployments. + +This directory does **not** contain a one-size-fits-all solution like a Helm +chart - infrastructure requirements are too diverse for a single setup to +reliably cover all potential use-cases. + +Instead, we provide useful building blocks in the form of Kustomizations. +Downstream implementers might use them as reference points to patch in the +functionality they require. + +See the `deployment-examples` directory for concrete example deployments. diff --git a/deployment-examples/kubernetes/base/cas.yaml b/kubernetes/base/cas.yaml similarity index 100% rename from deployment-examples/kubernetes/base/cas.yaml rename to kubernetes/base/cas.yaml diff --git a/kubernetes/base/kustomization.yaml b/kubernetes/base/kustomization.yaml new file mode 100644 index 000000000..92d4e29f7 --- /dev/null +++ b/kubernetes/base/kustomization.yaml @@ -0,0 +1,13 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - cas.yaml + - scheduler.yaml + - worker.yaml + - ../configmaps + +images: + - name: nativelink + - name: nativelink-worker-init + - name: nativelink-worker diff --git a/deployment-examples/kubernetes/base/scheduler.yaml b/kubernetes/base/scheduler.yaml similarity index 100% rename from deployment-examples/kubernetes/base/scheduler.yaml rename to kubernetes/base/scheduler.yaml diff --git a/deployment-examples/chromium/worker-chromium.yaml b/kubernetes/base/worker.yaml similarity index 79% rename from deployment-examples/chromium/worker-chromium.yaml rename to kubernetes/base/worker.yaml index be3d45076..dcf57bc2c 100644 --- a/deployment-examples/chromium/worker-chromium.yaml +++ b/kubernetes/base/worker.yaml @@ -2,16 +2,16 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: nativelink-worker-chromium + name: nativelink-worker spec: replicas: 3 selector: matchLabels: - app: nativelink-worker-chromium + app: nativelink-worker template: metadata: labels: - app: nativelink-worker-chromium + app: nativelink-worker spec: initContainers: - name: nativelink-worker-init @@ -21,11 +21,10 @@ spec: volumeMounts: - name: shared mountPath: /shared - containers: - - name: nativelink-worker-chromium + - name: nativelink-worker # This image will be edited by kustomize. - image: nativelink-worker-chromium + image: nativelink-worker env: - name: RUST_LOG value: info @@ -47,3 +46,12 @@ spec: - name: worker-config configMap: name: worker +--- +apiVersion: v1 +kind: Service +metadata: + name: nativelink-worker +spec: + selector: + app: nativelink-worker + clusterIP: None diff --git a/kubernetes/components/gateway-routes/kustomization.yaml b/kubernetes/components/gateway-routes/kustomization.yaml new file mode 100644 index 000000000..42b112b7e --- /dev/null +++ b/kubernetes/components/gateway-routes/kustomization.yaml @@ -0,0 +1,5 @@ +--- +apiVersion: kustomize.config.k8s.io/v1alpha1 +kind: Component +resources: + - routes.yaml diff --git a/deployment-examples/kubernetes/base/routes.yaml b/kubernetes/components/gateway-routes/routes.yaml similarity index 100% rename from deployment-examples/kubernetes/base/routes.yaml rename to kubernetes/components/gateway-routes/routes.yaml diff --git a/deployment-examples/kubernetes/base/example-do-not-use-in-prod-key.pem b/kubernetes/components/insecure-certs/example-do-not-use-in-prod-key.pem similarity index 100% rename from deployment-examples/kubernetes/base/example-do-not-use-in-prod-key.pem rename to kubernetes/components/insecure-certs/example-do-not-use-in-prod-key.pem diff --git a/deployment-examples/kubernetes/base/example-do-not-use-in-prod-rootca.crt b/kubernetes/components/insecure-certs/example-do-not-use-in-prod-rootca.crt similarity index 100% rename from deployment-examples/kubernetes/base/example-do-not-use-in-prod-rootca.crt rename to kubernetes/components/insecure-certs/example-do-not-use-in-prod-rootca.crt diff --git a/kubernetes/components/insecure-certs/kustomization.yaml b/kubernetes/components/insecure-certs/kustomization.yaml new file mode 100644 index 000000000..bc92cb026 --- /dev/null +++ b/kubernetes/components/insecure-certs/kustomization.yaml @@ -0,0 +1,9 @@ +--- +apiVersion: kustomize.config.k8s.io/v1alpha1 +kind: Component + +secretGenerator: + - name: tls-secret + files: + - example-do-not-use-in-prod-rootca.crt + - example-do-not-use-in-prod-key.pem diff --git a/kubernetes/components/operator/flux-config.yaml b/kubernetes/components/operator/flux-config.yaml new file mode 100644 index 000000000..c016867f5 --- /dev/null +++ b/kubernetes/components/operator/flux-config.yaml @@ -0,0 +1,110 @@ +--- +apiVersion: source.toolkit.fluxcd.io/v1 +kind: GitRepository +metadata: + name: nativelink + namespace: default +spec: + interval: 2m + url: https://github.com/TraceMachina/nativelink + ref: + branch: main +--- +apiVersion: notification.toolkit.fluxcd.io/v1beta3 +kind: Provider +metadata: + name: nativelink-webhook + namespace: flux-system +spec: + type: generic + address: http://el-nativelink-rebuild.default.svc.cluster.local:8080 +--- +apiVersion: notification.toolkit.fluxcd.io/v1beta3 +kind: Alert +metadata: + name: nativelink-image-alert + namespace: flux-system +spec: + eventSeverity: info + eventSources: + - kind: GitRepository + name: '*' + namespace: default + providerRef: + name: nativelink-webhook + eventMetadata: + flakeOutput: github:TraceMachina/nativelink#image +--- +apiVersion: notification.toolkit.fluxcd.io/v1beta3 +kind: Alert +metadata: + name: nativelink-worker-init-alert + namespace: flux-system +spec: + eventSeverity: info + eventSources: + - kind: GitRepository + name: '*' + namespace: default + providerRef: + name: nativelink-webhook + eventMetadata: + flakeOutput: github:TraceMachina/nativelink#nativelink-worker-init +--- +apiVersion: notification.toolkit.fluxcd.io/v1beta3 +kind: Alert +metadata: + name: nativelink-worker-alert + namespace: flux-system +spec: + eventSeverity: info + eventSources: + - kind: GitRepository + name: '*' + namespace: default + providerRef: + name: nativelink-webhook + eventMetadata: + flakeOutput: "PLACEHOLDER_NATIVELINK_WORKER" +--- +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: nativelink-configmaps + namespace: default +spec: + interval: 2m + path: "./kubernetes/configmaps" + prune: true + force: true + retryInterval: 20s + targetNamespace: default + wait: true + sourceRef: + kind: GitRepository + name: nativelink + namespace: default +--- +apiVersion: kustomize.toolkit.fluxcd.io/v1 +kind: Kustomization +metadata: + name: nativelink + namespace: default +spec: + interval: 2m + path: "PLACEHOLDER_NATIVELINK_KUSTOMIZATION" + prune: true + force: true + retryInterval: 20s + targetNamespace: default + wait: true + sourceRef: + kind: GitRepository + name: nativelink + namespace: default + postBuild: + substituteFrom: + - kind: ConfigMap + name: nativelink-image-tags + dependsOn: + - name: nativelink-configmaps diff --git a/kubernetes/components/operator/kustomization.yaml b/kubernetes/components/operator/kustomization.yaml new file mode 100644 index 000000000..916d215e1 --- /dev/null +++ b/kubernetes/components/operator/kustomization.yaml @@ -0,0 +1,4 @@ +apiVersion: kustomize.config.k8s.io/v1alpha1 +kind: Component +resources: +- flux-config.yaml diff --git a/deployment-examples/kubernetes/base/cas.json b/kubernetes/configmaps/cas.json similarity index 100% rename from deployment-examples/kubernetes/base/cas.json rename to kubernetes/configmaps/cas.json diff --git a/kubernetes/configmaps/kustomization.yaml b/kubernetes/configmaps/kustomization.yaml new file mode 100644 index 000000000..91752af24 --- /dev/null +++ b/kubernetes/configmaps/kustomization.yaml @@ -0,0 +1,23 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +configMapGenerator: + - name: cas + files: + - cas.json + options: + annotations: + kustomize.toolkit.fluxcd.io/substitute: disabled + - name: scheduler + files: + - scheduler.json + options: + annotations: + kustomize.toolkit.fluxcd.io/substitute: disabled + - name: worker + files: + - worker.json + options: + annotations: + kustomize.toolkit.fluxcd.io/substitute: disabled diff --git a/deployment-examples/kubernetes/base/scheduler.json b/kubernetes/configmaps/scheduler.json similarity index 100% rename from deployment-examples/kubernetes/base/scheduler.json rename to kubernetes/configmaps/scheduler.json diff --git a/deployment-examples/kubernetes/base/worker.json b/kubernetes/configmaps/worker.json similarity index 100% rename from deployment-examples/kubernetes/base/worker.json rename to kubernetes/configmaps/worker.json diff --git a/kubernetes/overlays/chromium/kustomization.yaml b/kubernetes/overlays/chromium/kustomization.yaml new file mode 100644 index 000000000..10837536b --- /dev/null +++ b/kubernetes/overlays/chromium/kustomization.yaml @@ -0,0 +1,20 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: +- ../../base + +components: +- ../../components/gateway-routes +- ../../components/insecure-certs + +images: + - name: nativelink + newName: localhost:5001/nativelink + newTag: ${NATIVELINK_TAG} + - name: nativelink-worker-init + newName: localhost:5001/nativelink-worker-init + newTag: ${NATIVELINK_WORKER_INIT_TAG} + - name: nativelink-worker + newName: localhost:5001/nativelink-worker-siso-chromium + newTag: ${NATIVELINK_WORKER_SISO_CHROMIUM_TAG} diff --git a/kubernetes/overlays/lre/kustomization.yaml b/kubernetes/overlays/lre/kustomization.yaml new file mode 100644 index 000000000..707804b4e --- /dev/null +++ b/kubernetes/overlays/lre/kustomization.yaml @@ -0,0 +1,26 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: +- ../../base + +components: +- ../../components/gateway-routes +- ../../components/insecure-certs + +patches: +- path: worker-lre-cc.yaml + target: + kind: Deployment + name: nativelink-worker + +images: + - name: nativelink + newName: localhost:5001/nativelink + newTag: ${NATIVELINK_TAG} + - name: nativelink-worker-init + newName: localhost:5001/nativelink-worker-init + newTag: ${NATIVELINK_WORKER_INIT_TAG} + - name: nativelink-worker + newName: localhost:5001/nativelink-worker-lre-cc + newTag: ${NATIVELINK_WORKER_LRE_CC_TAG} diff --git a/deployment-examples/kubernetes/worker-lre-cc.yaml b/kubernetes/overlays/lre/worker-lre-cc.yaml similarity index 63% rename from deployment-examples/kubernetes/worker-lre-cc.yaml rename to kubernetes/overlays/lre/worker-lre-cc.yaml index 36b79aed6..cbdb14c06 100644 --- a/deployment-examples/kubernetes/worker-lre-cc.yaml +++ b/kubernetes/overlays/lre/worker-lre-cc.yaml @@ -2,16 +2,10 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: nativelink-worker-lre-cc + name: nativelink-worker spec: replicas: 1 - selector: - matchLabels: - app: nativelink-worker-lre-cc template: - metadata: - labels: - app: nativelink-worker-lre-cc spec: initContainers: - name: setup-entrypoint @@ -39,44 +33,16 @@ spec: mountPath: /entrypoint - name: mnt mountPath: /mnt - - - name: nativelink-worker-init - # This image will be edited by kustomize. - image: nativelink-worker-init - args: ["/shared/nativelink"] - volumeMounts: - - name: shared - mountPath: /shared - containers: - - name: nativelink-worker-lre-cc - # This image will be edited by kustomize. - image: nativelink-worker-lre-cc - env: - - name: RUST_LOG - value: warn - - name: CAS_ENDPOINT - value: nativelink-cas - - name: SCHEDULER_ENDPOINT - value: nativelink-scheduler + - name: nativelink-worker volumeMounts: - - name: worker-config - mountPath: /worker.json - subPath: worker.json - name: entrypoint mountPath: /entrypoint - - name: shared - mountPath: /shared command: ["/entrypoint/entrypoint.sh"] args: ["/shared/nativelink", "/worker.json"] volumes: - - name: shared - emptyDir: {} - name: entrypoint emptyDir: {} - - name: worker-config - configMap: - name: worker - name: mnt hostPath: path: /mnt diff --git a/tools/pre-commit-hooks.nix b/tools/pre-commit-hooks.nix index b0b8c90e1..e690341ae 100644 --- a/tools/pre-commit-hooks.nix +++ b/tools/pre-commit-hooks.nix @@ -48,7 +48,7 @@ in { ++ [ # Integration testfiles not intended for production. "deployment-examples/docker-compose/example-do-not-use-in-prod-key.pem" - "deployment-examples/kubernetes/base/example-do-not-use-in-prod-key.pem" + "kubernetes/components/insecure-certs/example-do-not-use-in-prod-key.pem" ]; enable = true; name = "detect-private-key"; diff --git a/web/platform/.gitignore b/web/platform/.gitignore index f31c0722d..d8e51b8e7 100644 --- a/web/platform/.gitignore +++ b/web/platform/.gitignore @@ -39,9 +39,7 @@ utils/deno.d.ts src/content/docs/docs/contribute/docs.mdx src/content/docs/docs/contribute/guidelines.mdx src/content/docs/docs/explanations/lre.mdx -src/content/docs/docs/deployment-examples/chromium.mdx src/content/docs/docs/config/configuration-intro.mdx -src/content/docs/docs/deployment-examples/kubernetes.mdx src/content/docs/docs/introduction/setup.mdx src/content/docs/docs/reference/changelog.mdx src/content/docs/docs/reference/nativelink-config.mdx diff --git a/web/platform/src/content/docs/docs/deployment-examples/chromium.mdx b/web/platform/src/content/docs/docs/deployment-examples/chromium.mdx new file mode 100644 index 000000000..6d841eafe --- /dev/null +++ b/web/platform/src/content/docs/docs/deployment-examples/chromium.mdx @@ -0,0 +1,118 @@ +--- +title: NativeLink deployment example for Chromium +description: 'An example for building Chromium with NativeLink in Kubernetes.' +--- + +In this example you'll spin up a local Kubernetes cluster with NativeLink and +run a Chromium build against it. + +**Requirements** + +- An `x86_64-linux` system running a recent Ubuntu. Either "real" Linux or WSL2. +- A functional local Docker setup. +- A recent version of Nix with flake support, for instance installed via the + [next-gen Nix installer](https://github.com/NixOS/experimental-nix-installer). + +:::caution +This example doesn't work on Mac and Linux distributions other than Ubuntu. +::: + +## โ˜๏ธ Prepare the cluster + +First, enter the NativeLink development environment: + +```bash +git clone https://github.com/TraceMachina/nativelink && \ + cd nativelink && \ + nix develop +``` + +This environment contains some cloud tooling, so you don't need to set up any +kubernetes-related software yourself. + +Now, start the development cluster: + +```bash +native up +``` + +:::tip +The `native up` command uses Pulumi under the hood. You can view and delete +the stack with `pulumi stack` and `pulumi destroy`. If you're queried for a +stack password, press enter, as the password is an empty string. +::: + +Next, deploy NativeLink to the cluster: + +```bash +kubectl apply -k \ + https://github.com/TraceMachina/nativelink//deploy/chromium-example +``` + +:::danger +This example is built for demo purposes only. It's not a secure production-grade +setup and will only work in the local development cluster created with +`native up`. + +One-liner production-grade setups are still under construction. +::: + +## ๐Ÿ”ญ Explore deployments + +The deployment might take a wile to boot up. You can monitor progress via the +dashboards that come with the development cluster: + +- [localhost:8080](http://localhost:8080): Cilium's Hubble UI to view the + cluster topology. NativeLink will be deployed into the `default` namespace. +- [localhost:8081](http://localhost:8081): The Tekton Dashboard to view the + progress of the in-cluster pipelines. You'll find the pipelines under the + `PipelineRuns` tab. +- [localhost:9000](http://localhost:9000): The Capacitor Dashboard to view Flux + Kustomizations. You can view NatieLink's logs here once it's fully deployed. + +In terminals, the following commands can be helpful to view deployment progress: + +- `tkn pr logs -f` to view the logs of a `PipelineRun` in the terminal. +- `flux get all -A` to view the state of the NativeLink deployments. +- Once NativeLink is deployed: + - `kubectl logs deploy/nativelink-cas` for the CAS (cache) logs. + - `kubectl logs deploy/nativelink-scheduler` for the scheduler logs. + - `kubectl logs deploy/nativelink-worker` for the worker logs. + +## ๐Ÿ—๏ธ Build against NativeLink + +The demo setup creates gateways to expose the `cas` and `scheduler` deployments +via your local docker network. The following command builds the Chromium tests +against the cluster: + +```bash +build-chromium-tests +``` + +The `build-chromium-tests` command simplifies the setup described in +[linux/build_instructions.md](https://chromium.googlesource.com/chromium/src/+/main/docs/linux/build_instructions.md). +After preparing the requirements, it runs a Reclient build against the cluster. + +:::note +See [`deploy/chromium-example/build_chromium_tests.sh`](https://github.com/TraceMachina/nativelink/blob/main/deploy/chromium-example/build_chromium_tests.sh) +for the script contents. +::: + +You can view Reclient's logs like so: + +```bash +watch $HOME/chromium/src/buildtools/reclient/reproxystatus +``` + +## ๐Ÿงน Clean up + +When you're done testing, delete the cluster: + +```bash +# Delete the kind cluster +native down + +# Remove the container registry and loadbalancer +docker container stop kind-registry | xargs docker rm +docker container stop kind-loadbalancer | xargs docker rm +``` diff --git a/web/platform/src/content/docs/docs/deployment-examples/kubernetes.mdx b/web/platform/src/content/docs/docs/deployment-examples/kubernetes.mdx new file mode 100644 index 000000000..0461d0f2d --- /dev/null +++ b/web/platform/src/content/docs/docs/deployment-examples/kubernetes.mdx @@ -0,0 +1,189 @@ +--- +title: Kubernetes example +description: 'An example setup for NativeLink in Kubernetes' +--- + +In this example you'll spin up a local Kubernetes cluster with NativeLink and +run some Bazel builds against it. + +**Requirements** + +- An `x86_64-linux` system. Either "real" Linux or WSL2. +- A functional local Docker setup. +- A recent version of Nix with flake support, for instance installed via the + [next-gen Nix installer](https://github.com/NixOS/experimental-nix-installer). + +:::caution +This example doesn't work on Mac yet. +::: + + +## โ˜๏ธ Prepare the cluster + +First, enter the NativeLink development environment: + +```bash +git clone https://github.com/aaronmondal/nativelink && \ + cd nativelink && \ + nix develop +``` + +This environment contains Bazel and some cloud tooling, so you don't need to set +up any kubernetes-related software yourself. + +Now, start the development cluster: + +```bash +native up +``` + +:::tip +The `native up` command uses Pulumi under the hood. You can view and delete +the stack with `pulumi stack` and `pulumi destroy`. If you're queried for a +stack password, press enter, as the password is an empty string. +::: + +Next, deploy NativeLink to the cluster: + +```bash +kubectl apply -k \ + https://github.com/TraceMachina/nativelink//deploy/kubernetes-example +``` + +:::danger +This example is built for demo purposes only. It's not a secure production-grade +setup and will only work in the local development cluster created with +`native up`. + +One-liner production-grade setups are still under construction. +::: + +## ๐Ÿ”ญ Explore deployments + +The deployment might take a wile to boot up. You can monitor progress via the +dashboards that come with the development cluster: + +- [localhost:8080](http://localhost:8080): Cilium's Hubble UI to view the + cluster topology. NativeLink will be deployed into the `default` namespace. +- [localhost:8081](http://localhost:8081): The Tekton Dashboard to view the + progress of the in-cluster pipelines. You'll find the pipelines under the + `PipelineRuns` tab. +- [localhost:9000](http://localhost:9000): The Capacitor Dashboard to view Flux + Kustomizations. You can view NatieLink's logs here once it's fully deployed. + +In terminals, the following commands can be helpful to view deployment progress: + +- `tkn pr logs -f` to view the logs of a `PipelineRun` in the terminal. +- `flux get all -A` to view the state of the NativeLink deployments. +- Once NativeLink is deployed: + - `kubectl logs deploy/nativelink-cas` for the CAS (cache) logs. + - `kubectl logs deploy/nativelink-scheduler` for the scheduler logs. + - `kubectl logs deploy/nativelink-worker` for the worker logs. + +## ๐Ÿ—๏ธ Build against NativeLink + +The demo setup creates gateways to expose the `cas` and `scheduler` deployments +via your local docker network. You can pass the Gateway addresses to Bazel +invocations to make builds run against the cluster: + +```bash +CACHE=$(kubectl get gtw cache-gateway -o=jsonpath='{.status.addresses[0].value}') +SCHEDULER=$(kubectl get gtw scheduler-gateway -o=jsonpath='{.status.addresses[0].value}') + +echo "Cache IP: $CACHE" +echo "Scheduler IP: $SCHEDULER" + +bazel build \ + --config=lre \ + --remote_instance_name=main \ + --remote_cache=grpc://$CACHE \ + --remote_executor=grpc://$SCHEDULER \ + //local-remote-execution/examples:hello_lre +``` + +:::caution +While the Dashboard ports are static, the NativeLink endpoints aren't (yet). +If you shut down the cluster and reboot it, the `$CACHE` and `$SCHEDULER` IP +addresses will change. +::: + +:::tip +You can add these flags to a to a `.bazelrc.user` file in the workspace root. +Note that you'll need to pass in explicit IP addresses as this file can't +resolve environment variables: +```bash +# .bazelrc.user +build --config=lre +build --remote_instance_name=main +build --remote_cache=grpc://172.20.255.4 +build --remote_executor=grpc://172.20.255.5 +``` + +```bash +# .bazelrc +try-import %workspace%/.bazelrc.user +``` +::: + +The crucial part is this bit: + +```txt +INFO: 11 processes: 9 internal, 2 remote. +``` + +It tells us that the compilation ran against the cluster. Let's clean the Bazel +cache and run the build again: + +```bash +bazel clean && bazel build \ + --config=lre \ + --remote_instance_name=main \ + --remote_cache=grpc://$CACHE \ + --remote_executor=grpc://$SCHEDULER \ + //local-remote-execution/examples:hello_lre +``` + +The build now shows cache hits instead of remote actions: + +```txt +INFO: 11 processes: 2 remote cache hit, 9 internal. +``` + +## ๐Ÿš€ Bonus: Local Remote Execution + +The worker deployment in this example leverages [Local Remote Execution](../explanations/lre). + +Local Remote Execution mirrors toolchains for remote execution in your local +development environment. This lets you reuse build artifacts with virtually +perfect cache hit rate across different repositories, developers, and CI. + +To test LRE in the cluster, clean the local cache and invoke another build +against the cluster, but this time omit the `remote_executor` flag. This way +you'll use remote caching without remote execution: + +```bash +bazel clean && bazel build \ + --config=lre \ + --remote_instance_name=main \ + --remote_cache=grpc://$CACHE \ + //local-remote-execution/examples:hello_lre +``` + +You'll get remote cache hits as if your local machine was a `nativelink-worker`: + +```txt +INFO: 11 processes: 2 remote cache hit, 9 internal. +``` + +## ๐Ÿงน Clean up + +When you're done testing, delete the cluster: + +```bash +# Delete the kind cluster +native down + +# Remove the container registry and loadbalancer +docker container stop kind-registry | xargs docker rm +docker container stop kind-loadbalancer | xargs docker rm +``` diff --git a/web/platform/starlight.conf.ts b/web/platform/starlight.conf.ts index 97fc25920..c42c116c8 100644 --- a/web/platform/starlight.conf.ts +++ b/web/platform/starlight.conf.ts @@ -244,7 +244,10 @@ export const starlightConfig = { items: [ { label: "Docs", link: `${docsRoot}/introduction/setup` }, { label: "NativeLink Cloud", link: "https://app.nativelink.com/" }, - { label: "Coverage", link: "https://tracemachina.github.io/nativelink" }, + { + label: "Coverage", + link: "https://tracemachina.github.io/nativelink", + }, ], }, ], diff --git a/web/platform/utils/md_to_mdx_aot.ts b/web/platform/utils/md_to_mdx_aot.ts index c3f614a20..88b0b3daf 100644 --- a/web/platform/utils/md_to_mdx_aot.ts +++ b/web/platform/utils/md_to_mdx_aot.ts @@ -82,22 +82,6 @@ const filesToConvert: ConvertFileType[] = [ description: "NativeLink configuration guide", }, }, - { - input: `${rootDir}/deployment-examples/chromium/README.md`, - output: `${docsDir}/deployment-examples/chromium.mdx`, - docs: { - title: "NativeLink deployment example for Chromium", - description: "NativeLink deployment example for Chromium", - }, - }, - { - input: `${rootDir}/deployment-examples/kubernetes/README.md`, - output: `${docsDir}/deployment-examples/kubernetes.mdx`, - docs: { - title: "Local Remote Execution architecture", - description: "Local Remote Execution architecture", - }, - }, { input: `${rootDir}/CHANGELOG.md`, output: `${docsDir}/reference/changelog.mdx`,