Skip to content

Commit

Permalink
Changeset replication container (#299)
Browse files Browse the repository at this point in the history
* Add changeset replication container

* Add charts template to deploy the changeset-replication container

* Update start files for changeset replication
  • Loading branch information
Ruben L. Mendoza authored Aug 31, 2023
1 parent 79a51f1 commit bfb658d
Show file tree
Hide file tree
Showing 8 changed files with 213 additions and 13 deletions.
2 changes: 1 addition & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -41,5 +41,5 @@ data/
# ignore all markdown files (md) beside all README*.md
*.md
!README*.md

changeset-replication-job/config.yaml
envs/
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,4 @@ tiler-server/imposm/
overpass-api-db/
data/*/
!data/README.md
images/changeset-replication-job/config.yaml
4 changes: 3 additions & 1 deletion chartpress.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,6 @@ charts:
taginfo:
valuesPath: taginfo.image
osm-simple-metrics:
valuesPath: osmSimpleMetrics.image
valuesPath: osmSimpleMetrics.image
changeset-replication-job:
valuesPath: changesetReplicationJob.image
30 changes: 19 additions & 11 deletions compose/replication.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,24 @@ services:
context: ../images/replication-job
dockerfile: Dockerfile
volumes:
- ../data/replication-job-data:/mnt/data
- ../data/replication-job-data:/mnt/data
command: >
/bin/bash -c "
echo Sleep the replication-job for 1 minute;
sleep 1m;
echo Creating the replication files!;
/start.sh
"
/bin/bash -c " echo Sleep the replication-job for 1 minute; sleep 1m; echo Creating the replication files!; /start.sh "
env_file:
- ../envs/.env.db
- ../envs/.env.db-utils
- ../envs/.env.cloudprovider

- ../envs/.env.db
- ../envs/.env.db-utils
- ../envs/.env.cloudprovider
changeset-replication-job:
image: osmseed-changeset-replication-job:v1
build:
context: ../images/changeset-replication-job
dockerfile: Dockerfile
volumes:
- ../data/changeset-replication-job-data:/mnt/changesets
# - ./../images/changeset-replication-job:/openstreetmap-changeset-replication
command: >
/bin/bash -c "./start.sh"
env_file:
- ../envs/.env.db
- ../envs/.env.db-utils
- ../envs/.env.cloudprovider
13 changes: 13 additions & 0 deletions images/changeset-replication-job/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
FROM ruby:2.4
RUN git clone https://github.com/zerebubuth/openstreetmap-changeset-replication.git /app
WORKDIR /app
RUN apt-get install -y curl unzip
RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" \
&& unzip awscliv2.zip \
&& ./aws/install
# TODO: Install Google Cloud Platform (GCP) and Azure CLI for better data management
RUN gem install pg -v 0.21.0
RUN gem install libxml-ruby -v 3.1.0
COPY start.sh .
RUN chmod +x replicate_changesets.rb
CMD /app/start.sh
82 changes: 82 additions & 0 deletions images/changeset-replication-job/start.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#!/usr/bin/env bash
set -e

workingDirectory="/mnt/changesets"
mkdir -p "$workingDirectory"
CHANGESETS_REPLICATION_FOLDER="replication/changesets"

# Creating config file
echo "state_file: $workingDirectory/state.yaml
db: host=$POSTGRES_HOST dbname=$POSTGRES_DB user=$POSTGRES_USER password=$POSTGRES_PASSWORD
data_dir: $workingDirectory/" >/config.yaml

# Verify the existence of the state.yaml file across all cloud providers. If it's not found, create a new one.
if [ ! -f "$workingDirectory/state.yaml" ]; then
echo "File $workingDirectory/state.yaml does not exist in local storage"

if [ "$CLOUDPROVIDER" == "aws" ]; then
if aws s3 ls "$AWS_S3_BUCKET/$CHANGESETS_REPLICATION_FOLDER/state.yaml" >/dev/null 2>&1; then
echo "File exists, downloading from AWS - $AWS_S3_BUCKET"
aws s3 cp "$AWS_S3_BUCKET/$CHANGESETS_REPLICATION_FOLDER/state.yaml" "$workingDirectory/state.yaml"
fi
elif [ "$CLOUDPROVIDER" == "gcp" ]; then
if gsutil -q stat "$GCP_STORAGE_BUCKET/$CHANGESETS_REPLICATION_FOLDER/state.yaml"; then
echo "File exists, downloading from GCP - $GCP_STORAGE_BUCKET"
gsutil cp "$GCP_STORAGE_BUCKET/$CHANGESETS_REPLICATION_FOLDER/state.yaml" "$workingDirectory/state.yaml"
fi
elif [ "$CLOUDPROVIDER" == "azure" ]; then
state_file_exists=$(az storage blob exists --container-name "$AZURE_CONTAINER_NAME" --name "$CHANGESETS_REPLICATION_FOLDER/state.yaml" --query "exists" --output tsv)
if [ "$state_file_exists" == "true" ]; then
echo "File exists, downloading from Azure - $AZURE_CONTAINER_NAME"
az storage blob download --container-name "$AZURE_CONTAINER_NAME" --name "$CHANGESETS_REPLICATION_FOLDER/state.yaml" --file "$workingDirectory/state.yaml"
fi
fi
if [ ! -f "$workingDirectory/state.yaml" ]; then
echo "sequence: 0" >"$workingDirectory/state.yaml"
fi
fi

# Creating the replication files
generateReplication() {
while true; do
# Run replication script
ruby replicate_changesets.rb /config.yaml

# Loop through newly created files
for local_file in $(find "$workingDirectory/" -cmin -1); do
if [ -f "$local_file" ]; then
# Construct the cloud path for the file
cloud_file="$CHANGESETS_REPLICATION_FOLDER/${local_file#*$workingDirectory/}"

# Log file transfer
echo "$(date +%F_%H:%M:%S): Copying file $local_file to $cloud_file"

# Handle different cloud providers
case "$CLOUDPROVIDER" in
"aws")
aws s3 cp "$local_file" "$AWS_S3_BUCKET/$cloud_file" --acl public-read
;;
"gcp")
gsutil cp -a public-read "$local_file" "$GCP_STORAGE_BUCKET/$cloud_file"
;;
"azure")
az storage blob upload \
--container-name "$AZURE_CONTAINER_NAME" \
--file "$local_file" \
--name "$cloud_file" \
--output none
;;
*)
echo "Unknown cloud provider: $CLOUDPROVIDER"
;;
esac
fi
done

# Sleep for 60 seconds before next iteration
sleep 60s
done
}

# Call the function to start the replication process
generateReplication
75 changes: 75 additions & 0 deletions osm-seed/templates/jobs/changeset-replication-job-deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
{{- if .Values.changesetReplicationJob.enabled -}}
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ .Release.Name }}-changeset-replication-job
labels:
app: {{ template "osm-seed.name" . }}
component: changeset-replication-job
environment: {{ .Values.environment }}
release: {{ .Release.Name }}
spec:
replicas: 1
selector:
matchLabels:
app: {{ template "osm-seed.name" . }}
template:
metadata:
labels:
app: {{ template "osm-seed.name" . }}
spec:
containers:
- name: {{ .Release.Name }}-changeset-replication-job-deployment
image: {{ .Values.changesetReplicationJob.image.name }}:{{ .Values.changesetReplicationJob.image.tag }}
# command: ['/start.sh']
{{- if .Values.changesetReplicationJob.resources.enabled }}
resources:
requests:
memory: {{ .Values.changesetReplicationJob.resources.requests.memory }}
cpu: {{ .Values.changesetReplicationJob.resources.requests.cpu }}
limits:
memory: {{ .Values.changesetReplicationJob.resources.limits.memory }}
cpu: {{ .Values.changesetReplicationJob.resources.limits.cpu }}
{{- end }}
env:
- name: POSTGRES_HOST
value: {{ .Release.Name }}-db
- name: POSTGRES_DB
value: {{ .Values.db.env.POSTGRES_DB }}
- name: POSTGRES_PASSWORD
value: {{ quote .Values.db.env.POSTGRES_PASSWORD }}
- name: POSTGRES_USER
value: {{ .Values.db.env.POSTGRES_USER }}
- name: REPLICATION_FOLDER
value: replication/minute
- name: CLOUDPROVIDER
value: {{ .Values.cloudProvider }}
# In case cloudProvider=aws
{{- if eq .Values.cloudProvider "aws" }}
- name: AWS_S3_BUCKET
value: {{ .Values.AWS_S3_BUCKET }}
{{- end }}
# In case cloudProvider=gcp
{{- if eq .Values.cloudProvider "gcp" }}
- name: GCP_STORAGE_BUCKET
value: {{ .Values.GCP_STORAGE_BUCKET }}
{{- end }}
# In case cloudProvider=azure
{{- if eq .Values.cloudProvider "azure" }}
- name: AZURE_STORAGE_ACCOUNT
value: {{ .Values.AZURE_STORAGE_ACCOUNT }}
- name: AZURE_CONTAINER_NAME
value: {{ .Values.AZURE_CONTAINER_NAME }}
- name: AZURE_STORAGE_CONNECTION_STRING
value: {{ .Values.AZURE_STORAGE_CONNECTION_STRING }}
{{- end }}
# Memory optimization for osmosis
{{- if .Values.changesetReplicationJob.resources.enabled }}
- name: MEMORY_JAVACMD_OPTIONS
value: {{ .Values.changesetReplicationJob.resources.requests.memory | default "2Gi" | quote}}
{{- end }}
{{- if .Values.changesetReplicationJob.nodeSelector.enabled }}
nodeSelector:
{{ .Values.changesetReplicationJob.nodeSelector.label_key }} : {{ .Values.changesetReplicationJob.nodeSelector.label_value }}
{{- end }}
{{- end }}
19 changes: 19 additions & 0 deletions osm-seed/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -688,3 +688,22 @@ monitoringReplication:
cpu: '2'
nodeSelector:
enabled: false

# ====================================================================================================
# Variables for changeset-replication-job, Configuration to create the replication files by, minute, hour, or day
# ====================================================================================================
changesetReplicationJob:
enabled: false
image:
name: ''
tag: ''
resources:
enabled: false
requests:
memory: '20Gi'
cpu: '8'
limits:
memory: '24Gi'
cpu: '10'
nodeSelector:
enabled: false

0 comments on commit bfb658d

Please sign in to comment.