From bfb658d2a6b4d2fbc6817bf2fcaaa214d8d8d617 Mon Sep 17 00:00:00 2001 From: "Ruben L. Mendoza" Date: Thu, 31 Aug 2023 15:06:58 -0500 Subject: [PATCH] Changeset replication container (#299) * Add changeset replication container * Add charts template to deploy the changeset-replication container * Update start files for changeset replication --- .dockerignore | 2 +- .gitignore | 1 + chartpress.yaml | 4 +- compose/replication.yml | 30 ++++--- images/changeset-replication-job/Dockerfile | 13 +++ images/changeset-replication-job/start.sh | 82 +++++++++++++++++++ .../changeset-replication-job-deployment.yaml | 75 +++++++++++++++++ osm-seed/values.yaml | 19 +++++ 8 files changed, 213 insertions(+), 13 deletions(-) create mode 100644 images/changeset-replication-job/Dockerfile create mode 100755 images/changeset-replication-job/start.sh create mode 100644 osm-seed/templates/jobs/changeset-replication-job-deployment.yaml diff --git a/.dockerignore b/.dockerignore index 5206d53f..e01842f5 100644 --- a/.dockerignore +++ b/.dockerignore @@ -41,5 +41,5 @@ data/ # ignore all markdown files (md) beside all README*.md *.md !README*.md - +changeset-replication-job/config.yaml envs/ \ No newline at end of file diff --git a/.gitignore b/.gitignore index cb434b46..c2eaf90e 100644 --- a/.gitignore +++ b/.gitignore @@ -35,3 +35,4 @@ tiler-server/imposm/ overpass-api-db/ data/*/ !data/README.md +images/changeset-replication-job/config.yaml \ No newline at end of file diff --git a/chartpress.yaml b/chartpress.yaml index 1300479f..98f0f471 100644 --- a/chartpress.yaml +++ b/chartpress.yaml @@ -39,4 +39,6 @@ charts: taginfo: valuesPath: taginfo.image osm-simple-metrics: - valuesPath: osmSimpleMetrics.image \ No newline at end of file + valuesPath: osmSimpleMetrics.image + changeset-replication-job: + valuesPath: changesetReplicationJob.image \ No newline at end of file diff --git a/compose/replication.yml b/compose/replication.yml index 76d1e664..6cdba218 100644 --- a/compose/replication.yml +++ b/compose/replication.yml @@ -9,16 +9,24 @@ services: context: ../images/replication-job dockerfile: Dockerfile volumes: - - ../data/replication-job-data:/mnt/data + - ../data/replication-job-data:/mnt/data command: > - /bin/bash -c " - echo Sleep the replication-job for 1 minute; - sleep 1m; - echo Creating the replication files!; - /start.sh - " + /bin/bash -c " echo Sleep the replication-job for 1 minute; sleep 1m; echo Creating the replication files!; /start.sh " env_file: - - ../envs/.env.db - - ../envs/.env.db-utils - - ../envs/.env.cloudprovider - \ No newline at end of file + - ../envs/.env.db + - ../envs/.env.db-utils + - ../envs/.env.cloudprovider + changeset-replication-job: + image: osmseed-changeset-replication-job:v1 + build: + context: ../images/changeset-replication-job + dockerfile: Dockerfile + volumes: + - ../data/changeset-replication-job-data:/mnt/changesets + # - ./../images/changeset-replication-job:/openstreetmap-changeset-replication + command: > + /bin/bash -c "./start.sh" + env_file: + - ../envs/.env.db + - ../envs/.env.db-utils + - ../envs/.env.cloudprovider diff --git a/images/changeset-replication-job/Dockerfile b/images/changeset-replication-job/Dockerfile new file mode 100644 index 00000000..735073c3 --- /dev/null +++ b/images/changeset-replication-job/Dockerfile @@ -0,0 +1,13 @@ +FROM ruby:2.4 +RUN git clone https://github.com/zerebubuth/openstreetmap-changeset-replication.git /app +WORKDIR /app +RUN apt-get install -y curl unzip +RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" \ + && unzip awscliv2.zip \ + && ./aws/install +# TODO: Install Google Cloud Platform (GCP) and Azure CLI for better data management +RUN gem install pg -v 0.21.0 +RUN gem install libxml-ruby -v 3.1.0 +COPY start.sh . +RUN chmod +x replicate_changesets.rb +CMD /app/start.sh diff --git a/images/changeset-replication-job/start.sh b/images/changeset-replication-job/start.sh new file mode 100755 index 00000000..ffbc6390 --- /dev/null +++ b/images/changeset-replication-job/start.sh @@ -0,0 +1,82 @@ +#!/usr/bin/env bash +set -e + +workingDirectory="/mnt/changesets" +mkdir -p "$workingDirectory" +CHANGESETS_REPLICATION_FOLDER="replication/changesets" + +# Creating config file +echo "state_file: $workingDirectory/state.yaml +db: host=$POSTGRES_HOST dbname=$POSTGRES_DB user=$POSTGRES_USER password=$POSTGRES_PASSWORD +data_dir: $workingDirectory/" >/config.yaml + +# Verify the existence of the state.yaml file across all cloud providers. If it's not found, create a new one. +if [ ! -f "$workingDirectory/state.yaml" ]; then + echo "File $workingDirectory/state.yaml does not exist in local storage" + + if [ "$CLOUDPROVIDER" == "aws" ]; then + if aws s3 ls "$AWS_S3_BUCKET/$CHANGESETS_REPLICATION_FOLDER/state.yaml" >/dev/null 2>&1; then + echo "File exists, downloading from AWS - $AWS_S3_BUCKET" + aws s3 cp "$AWS_S3_BUCKET/$CHANGESETS_REPLICATION_FOLDER/state.yaml" "$workingDirectory/state.yaml" + fi + elif [ "$CLOUDPROVIDER" == "gcp" ]; then + if gsutil -q stat "$GCP_STORAGE_BUCKET/$CHANGESETS_REPLICATION_FOLDER/state.yaml"; then + echo "File exists, downloading from GCP - $GCP_STORAGE_BUCKET" + gsutil cp "$GCP_STORAGE_BUCKET/$CHANGESETS_REPLICATION_FOLDER/state.yaml" "$workingDirectory/state.yaml" + fi + elif [ "$CLOUDPROVIDER" == "azure" ]; then + state_file_exists=$(az storage blob exists --container-name "$AZURE_CONTAINER_NAME" --name "$CHANGESETS_REPLICATION_FOLDER/state.yaml" --query "exists" --output tsv) + if [ "$state_file_exists" == "true" ]; then + echo "File exists, downloading from Azure - $AZURE_CONTAINER_NAME" + az storage blob download --container-name "$AZURE_CONTAINER_NAME" --name "$CHANGESETS_REPLICATION_FOLDER/state.yaml" --file "$workingDirectory/state.yaml" + fi + fi + if [ ! -f "$workingDirectory/state.yaml" ]; then + echo "sequence: 0" >"$workingDirectory/state.yaml" + fi +fi + +# Creating the replication files +generateReplication() { + while true; do + # Run replication script + ruby replicate_changesets.rb /config.yaml + + # Loop through newly created files + for local_file in $(find "$workingDirectory/" -cmin -1); do + if [ -f "$local_file" ]; then + # Construct the cloud path for the file + cloud_file="$CHANGESETS_REPLICATION_FOLDER/${local_file#*$workingDirectory/}" + + # Log file transfer + echo "$(date +%F_%H:%M:%S): Copying file $local_file to $cloud_file" + + # Handle different cloud providers + case "$CLOUDPROVIDER" in + "aws") + aws s3 cp "$local_file" "$AWS_S3_BUCKET/$cloud_file" --acl public-read + ;; + "gcp") + gsutil cp -a public-read "$local_file" "$GCP_STORAGE_BUCKET/$cloud_file" + ;; + "azure") + az storage blob upload \ + --container-name "$AZURE_CONTAINER_NAME" \ + --file "$local_file" \ + --name "$cloud_file" \ + --output none + ;; + *) + echo "Unknown cloud provider: $CLOUDPROVIDER" + ;; + esac + fi + done + + # Sleep for 60 seconds before next iteration + sleep 60s + done +} + +# Call the function to start the replication process +generateReplication diff --git a/osm-seed/templates/jobs/changeset-replication-job-deployment.yaml b/osm-seed/templates/jobs/changeset-replication-job-deployment.yaml new file mode 100644 index 00000000..0f672714 --- /dev/null +++ b/osm-seed/templates/jobs/changeset-replication-job-deployment.yaml @@ -0,0 +1,75 @@ +{{- if .Values.changesetReplicationJob.enabled -}} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ .Release.Name }}-changeset-replication-job + labels: + app: {{ template "osm-seed.name" . }} + component: changeset-replication-job + environment: {{ .Values.environment }} + release: {{ .Release.Name }} +spec: + replicas: 1 + selector: + matchLabels: + app: {{ template "osm-seed.name" . }} + template: + metadata: + labels: + app: {{ template "osm-seed.name" . }} + spec: + containers: + - name: {{ .Release.Name }}-changeset-replication-job-deployment + image: {{ .Values.changesetReplicationJob.image.name }}:{{ .Values.changesetReplicationJob.image.tag }} + # command: ['/start.sh'] + {{- if .Values.changesetReplicationJob.resources.enabled }} + resources: + requests: + memory: {{ .Values.changesetReplicationJob.resources.requests.memory }} + cpu: {{ .Values.changesetReplicationJob.resources.requests.cpu }} + limits: + memory: {{ .Values.changesetReplicationJob.resources.limits.memory }} + cpu: {{ .Values.changesetReplicationJob.resources.limits.cpu }} + {{- end }} + env: + - name: POSTGRES_HOST + value: {{ .Release.Name }}-db + - name: POSTGRES_DB + value: {{ .Values.db.env.POSTGRES_DB }} + - name: POSTGRES_PASSWORD + value: {{ quote .Values.db.env.POSTGRES_PASSWORD }} + - name: POSTGRES_USER + value: {{ .Values.db.env.POSTGRES_USER }} + - name: REPLICATION_FOLDER + value: replication/minute + - name: CLOUDPROVIDER + value: {{ .Values.cloudProvider }} + # In case cloudProvider=aws + {{- if eq .Values.cloudProvider "aws" }} + - name: AWS_S3_BUCKET + value: {{ .Values.AWS_S3_BUCKET }} + {{- end }} + # In case cloudProvider=gcp + {{- if eq .Values.cloudProvider "gcp" }} + - name: GCP_STORAGE_BUCKET + value: {{ .Values.GCP_STORAGE_BUCKET }} + {{- end }} + # In case cloudProvider=azure + {{- if eq .Values.cloudProvider "azure" }} + - name: AZURE_STORAGE_ACCOUNT + value: {{ .Values.AZURE_STORAGE_ACCOUNT }} + - name: AZURE_CONTAINER_NAME + value: {{ .Values.AZURE_CONTAINER_NAME }} + - name: AZURE_STORAGE_CONNECTION_STRING + value: {{ .Values.AZURE_STORAGE_CONNECTION_STRING }} + {{- end }} + # Memory optimization for osmosis + {{- if .Values.changesetReplicationJob.resources.enabled }} + - name: MEMORY_JAVACMD_OPTIONS + value: {{ .Values.changesetReplicationJob.resources.requests.memory | default "2Gi" | quote}} + {{- end }} + {{- if .Values.changesetReplicationJob.nodeSelector.enabled }} + nodeSelector: + {{ .Values.changesetReplicationJob.nodeSelector.label_key }} : {{ .Values.changesetReplicationJob.nodeSelector.label_value }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/osm-seed/values.yaml b/osm-seed/values.yaml index cc24a0d1..fba87d3d 100644 --- a/osm-seed/values.yaml +++ b/osm-seed/values.yaml @@ -688,3 +688,22 @@ monitoringReplication: cpu: '2' nodeSelector: enabled: false + +# ==================================================================================================== +# Variables for changeset-replication-job, Configuration to create the replication files by, minute, hour, or day +# ==================================================================================================== +changesetReplicationJob: + enabled: false + image: + name: '' + tag: '' + resources: + enabled: false + requests: + memory: '20Gi' + cpu: '8' + limits: + memory: '24Gi' + cpu: '10' + nodeSelector: + enabled: false