From 47b96f3bce7e88e925f33ec6c12916b5de36fc2a Mon Sep 17 00:00:00 2001 From: Chris Staite Date: Thu, 14 Sep 2023 16:48:26 -0500 Subject: [PATCH] Add example for Reclient. --- .../docker-compose-reclient/README.md | 86 +++++++++++++++++++ .../docker-compose.yml | 72 ++++++++++++++++ .../local-storage-cas.json | 77 +++++++++++++++++ .../run_in_container.sh | 17 ++++ .../docker-compose-reclient/scheduler.json | 86 +++++++++++++++++++ .../docker-compose-reclient/worker.json | 64 ++++++++++++++ .../worker_precondition_script.sh | 11 +++ 7 files changed, 413 insertions(+) create mode 100644 deployment-examples/docker-compose-reclient/README.md create mode 100644 deployment-examples/docker-compose-reclient/docker-compose.yml create mode 100644 deployment-examples/docker-compose-reclient/local-storage-cas.json create mode 100755 deployment-examples/docker-compose-reclient/run_in_container.sh create mode 100644 deployment-examples/docker-compose-reclient/scheduler.json create mode 100644 deployment-examples/docker-compose-reclient/worker.json create mode 100755 deployment-examples/docker-compose-reclient/worker_precondition_script.sh diff --git a/deployment-examples/docker-compose-reclient/README.md b/deployment-examples/docker-compose-reclient/README.md new file mode 100644 index 000000000..147a033d9 --- /dev/null +++ b/deployment-examples/docker-compose-reclient/README.md @@ -0,0 +1,86 @@ +# Reclient deployment + +This example shows how to set up a Turbo-Cache to work with Reclient for +Chromium builds. This configuration is an example that runs all of the +services on a single machine which is obviously pointless, however it is a +good starting point to allow you to move the CAS and Worker roles to other +machines. + +## Roles + +There a number of roles required to perform a Reclient set up: + + - Turbo Cache Scheduler - The Turbo Cache instance that communicates with the + Goma proxy and handles scheduling of tasks to workers within the Turbo Cache + cluster. Because the Goma proxy is unable to talk to a separate scheduler + and CAS endpoint this also proxies CAS and AC requests to the storage + instance. + + - Turbo Cache CAS - The storage instance that stores all required build files + and the build outputs. This also acts as the AC (action cache) which stores + the results of previous actions. + + - Turbo Cache Worker - There can be many of these and they perform the build + work. They are rate limited to perform one build per core using the + cpu_count property. The worker_precondition_script.sh ensures that they have + sufficient RAM to perform a build before it starts. + +## Running + +To start Turbo-Cache simply execute `docker-compose up` from this directory. +A new Turbo-Cache instance with all of the backing will be built and started on +port 50052. + +Create the file buildtools/reclient_cfgs/reproxy.cfg with the following: +``` +instance=main +service=127.0.0.1:50052 +# Disables both TLS and authentication +service_no_security=true +# Required to stop autoninja from complaining about authentication despite being +# implied by service_no_security +service_no_auth=true +# Try not to use local execution, can't set to 0 otherwise it appears to ignore +local_resource_fraction=0.00001 +log_format=reducedtext +automatic_auth=false +gcert_refresh_timeout=20 +fail_early_min_action_count=4000 +fail_early_min_fallback_ratio=0.5 +deps_cache_max_mb=256 +# TODO(b/276727504) Re-enable once noop build shutdown time bug is fixed +# enable_deps_cache=true +async_reproxy_termination=true +use_unified_uploads=true +fast_log_collection=true +depsscanner_address=exec://%s/buildtools/reclient/scandeps_server + +# Improve upload/download concurrency +max_concurrent_streams_per_conn=50 +max_concurrent_requests_per_conn=50 +min_grpc_connections=50 +cas_concurrency=1000 + +# Turbo-cache doesn't currently support compressed blob upload +compression_threshold=-1 +use_batches=false + +# Metric metadata +metrics_namespace=main +``` + +Now that the environment is set up you simply need to enable it for your +Chromium build by modifying your `args.gn` file to contain: + +``` +use_remoteexec=true +rbe_cfg_dir="../../buildtools/reclient_cfgs/linux" +``` + +Now run your build using autoninja which will automatically start reproxy and +do all of your execution for you. + +You should set a crontab to clean up stale docker images nightly on workers: +```sh +docker image prune -a --filter "until=24h" -f +``` diff --git a/deployment-examples/docker-compose-reclient/docker-compose.yml b/deployment-examples/docker-compose-reclient/docker-compose.yml new file mode 100644 index 000000000..fe2117c43 --- /dev/null +++ b/deployment-examples/docker-compose-reclient/docker-compose.yml @@ -0,0 +1,72 @@ +# Copyright 2023 The Turbo Cache Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +version: '3.4' + +services: + turbo_cache_local_cas: + image: allada/turbo-cache:latest + build: + context: ../.. + dockerfile: ./deployment-examples/docker-compose/Dockerfile + network: host + volumes: + - ${TURBO_CACHE_DIR:-~/.cache/turbo-cache}:/root/.cache/turbo-cache + - type: bind + source: . + target: /root + environment: + RUST_LOG: $${RUST_LOG:-} + ports: [ "50051:50051/tcp" ] + command: | + turbo-cache /root/local-storage-cas.json + + turbo_cache_scheduler: + image: allada/turbo-cache:latest + volumes: + - type: bind + source: . + target: /root + environment: + RUST_LOG: $${RUST_LOG:-} + CAS_ENDPOINT: turbo_cache_local_cas + ports: [ "50052:50052/tcp", "50061:50061/tcp" ] + command: | + turbo-cache /root/scheduler.json + + turbo_cache_executor: + image: allada/turbo-cache-worker:latest + build: + context: ../.. + dockerfile: ./deployment-examples/docker-compose/Dockerfile + network: host + args: + ADDITIONAL_SETUP_WORKER_CMD: >- + DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y docker.io && + rm -rf /var/lib/apt/lists/* + volumes: + - ${HOME}/.cache/turbo-cache:/root/.cache/turbo-cache + - type: bind + source: . + target: /root + - type: bind + source: /var/run/docker.sock + target: /var/run/docker.sock + environment: + RUST_LOG: info + CAS_ENDPOINT: turbo_cache_local_cas + SCHEDULER_ENDPOINT: turbo_cache_scheduler + TURBO_CACHE_DIR: ${HOME}/.cache/turbo-cache + command: | + turbo-cache /root/worker.json diff --git a/deployment-examples/docker-compose-reclient/local-storage-cas.json b/deployment-examples/docker-compose-reclient/local-storage-cas.json new file mode 100644 index 000000000..008f5c977 --- /dev/null +++ b/deployment-examples/docker-compose-reclient/local-storage-cas.json @@ -0,0 +1,77 @@ +// This configuration will place objects in various folders in +// `~/.cache/turbo-cache`. It will store all data on disk and +// allows for restarts of the underlying service. It is optimized +// to use some in-memory optimizations for hot paths. +{ + "stores": { + "CAS_MAIN_STORE": { + "fast_slow": { + "fast": { + "memory": { + "eviction_policy": { + // 1gb + "max_bytes": 1000000000 + } + } + }, + "slow": { + "filesystem": { + "content_path": "/root/.cache/turbo-cache/content_path-cas", + "temp_path": "/root/.cache/turbo-cache/tmp_path-cas", + "eviction_policy": { + // 5gb. + "max_bytes": 5000000000, + // 200mb + "evict_bytes": 200000000 + } + } + } + } + }, + "AC_MAIN_STORE": { + "fast_slow": { + "fast": { + "memory": { + "eviction_policy": { + // 200mb + "max_bytes": 200000000 + } + } + }, + "slow": { + "filesystem": { + "content_path": "/root/.cache/turbo-cache/content_path-cas_ac", + "temp_path": "/root/.cache/turbo-cache/tmp_path-cas_ac", + "eviction_policy": { + // 1gb. + "max_bytes": 1000000000, + } + } + } + } + } + }, + "servers": [{ + "listen_address": "0.0.0.0:50051", + "services": { + "cas": { + "main": { + "cas_store": "CAS_MAIN_STORE" + } + }, + "ac": { + "main": { + "ac_store": "AC_MAIN_STORE" + } + }, + "capabilities": {}, + "bytestream": { + "cas_stores": { + "main": "CAS_MAIN_STORE", + }, + // According to https://github.com/grpc/grpc.github.io/issues/371 16KiB - 64KiB is optimal. + "max_bytes_per_stream": 64000, // 64kb. + } + } + }] +} diff --git a/deployment-examples/docker-compose-reclient/run_in_container.sh b/deployment-examples/docker-compose-reclient/run_in_container.sh new file mode 100755 index 000000000..804992f45 --- /dev/null +++ b/deployment-examples/docker-compose-reclient/run_in_container.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +# If a container is specified and starts with docker:// then run the command in Docker +if [ "$(echo "$CONTAINER" | cut -c-9)" = "docker://" ]; then + # Top level work directory is /root/.cache/turbo-cache/work/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + # However, we are doing Docker-in-Docker and therefore the work directory is + # on the host and we need to account for that. This strips off the + # /root/.cache/turbo-cache and that is populated with $HOST_ROOT from the + # worker.json which is then populated by ${TURBO_CACHE_DIR} from the + # docker-compose.yml. + WORK_DIRECTORY=$(pwd | cut -c25-94) + WORKING_DIRECTORY=$(pwd | cut -c95-) + exec docker run --rm --network none -w /work${WORKING_DIRECTORY} -v ${HOST_ROOT}${WORK_DIRECTORY}:/work $(echo "$CONTAINER" | cut -c10-) /bin/env "$@" +fi + +# Default to simply running the command outside of a container +exec "$@" diff --git a/deployment-examples/docker-compose-reclient/scheduler.json b/deployment-examples/docker-compose-reclient/scheduler.json new file mode 100644 index 000000000..f6e3015f1 --- /dev/null +++ b/deployment-examples/docker-compose-reclient/scheduler.json @@ -0,0 +1,86 @@ +{ + "stores": { + "GRPC_LOCAL_STORE": { + "grpc": { + "instance_name": "main", + "endpoints": ["grpc://${CAS_ENDPOINT:-127.0.0.1}:50051"], + "store_type": "CAS" + } + }, + "GRPC_LOCAL_AC_STORE": { + "grpc": { + "instance_name": "main", + "endpoints": ["grpc://${CAS_ENDPOINT:-127.0.0.1}:50051"], + "store_type": "AC" + } + } + }, + "schedulers": { + "MAIN_SCHEDULER": { + "property_modifier": { + "modifications": [ + {"Add": {"name": "cpu_count", "value": "1"}}, + {"Remove": "label:action_default"}, + ], + "scheduler": { + "simple": { + "supported_platform_properties": { + "cpu_count": "Minimum", + "container-image": "Priority" + } + } + } + } + } + }, + "servers": [{ + "listen_address": "0.0.0.0:50052", + "services": { + "ac": { + // Chromium default is: + // projects/rbe-chrome-untrusted/instances/default_instance + "main": { + "ac_store": "GRPC_LOCAL_AC_STORE" + } + }, + "cas": { + "main": { + "cas_store": "GRPC_LOCAL_STORE" + } + }, + "bytestream": { + "cas_stores": { + "main": "GRPC_LOCAL_STORE" + }, + "max_bytes_per_stream": 64000, + }, + "execution": { + "main": { + "cas_store": "GRPC_LOCAL_STORE", + "scheduler": "MAIN_SCHEDULER", + } + }, + "capabilities": { + "main": { + "remote_execution": { + "scheduler": "MAIN_SCHEDULER", + } + } + } + } + }, { + "listen_address": "0.0.0.0:50061", + "services": { + // Note: This should be served on a different port, because it has + // a different permission set than the other services. + // In other words, this service is a backend api. The ones above + // are a frontend api. + "worker_api": { + "scheduler": "MAIN_SCHEDULER", + }, + "prometheus": { + "path": "/metrics" + }, + } + }] +} diff --git a/deployment-examples/docker-compose-reclient/worker.json b/deployment-examples/docker-compose-reclient/worker.json new file mode 100644 index 000000000..d1ec817a7 --- /dev/null +++ b/deployment-examples/docker-compose-reclient/worker.json @@ -0,0 +1,64 @@ +{ + "stores": { + "GRPC_LOCAL_STORE": { + "grpc": { + "instance_name": "main", + "endpoints": ["grpc://${CAS_ENDPOINT:-127.0.0.1}:50051"], + "store_type": "CAS" + } + }, + "GRPC_LOCAL_AC_STORE": { + "grpc": { + "instance_name": "main", + "endpoints": ["grpc://${CAS_ENDPOINT:-127.0.0.1}:50051"], + "store_type": "AC" + } + }, + "WORKER_FAST_SLOW_STORE": { + "fast_slow": { + "fast": { + "filesystem": { + "content_path": "/root/.cache/turbo-cache/data-worker-test/content_path-cas", + "temp_path": "/root/.cache/turbo-cache/data-worker-test/tmp_path-cas", + "eviction_policy": { + // 1gb. + "max_bytes": 1000000000, + } + } + }, + "slow": { + "ref_store": { + "name": "GRPC_LOCAL_STORE", + } + } + } + } + }, + "workers": [{ + "local": { + "worker_api_endpoint": { + "uri": "grpc://${SCHEDULER_ENDPOINT:-127.0.0.1}:50061", + }, + "entrypoint_cmd": "/root/run_in_container.sh", + "additional_environment": { + "CONTAINER": {"Property": "container-image"}, + "HOST_ROOT": {"Value": "${TURBO_CACHE_DIR}"}, + }, + "cas_fast_slow_store": "WORKER_FAST_SLOW_STORE", + "ac_store": "GRPC_LOCAL_AC_STORE", + "work_directory": "/root/.cache/turbo-cache/work", + "platform_properties": { + "cpu_count": { + "query_cmd": "nproc" + }, + // Need to specify a placeholder here otherwise Priority scheduling does + // not work. + "container-image": { + "values": ["placeholder"] + } + }, + "precondition_script": "/root/worker_precondition_script.sh" + } + }], + "servers": [] +} diff --git a/deployment-examples/docker-compose-reclient/worker_precondition_script.sh b/deployment-examples/docker-compose-reclient/worker_precondition_script.sh new file mode 100755 index 000000000..93f50d57d --- /dev/null +++ b/deployment-examples/docker-compose-reclient/worker_precondition_script.sh @@ -0,0 +1,11 @@ +#!/bin/sh +set -eu + +AVAILABLE_MEMORY_KB=$(awk '/MemAvailable/ { printf "%d \n", $2 }' /proc/meminfo); +# At least 2Gb of RAM currently available +if [ $AVAILABLE_MEMORY_KB -gt 2000000 ]; then + exit 0 +else + echo "Available memory: ${AVAILABLE_MEMORY_KB}"; + exit 1 +fi