-
Notifications
You must be signed in to change notification settings - Fork 50
131 lines (112 loc) · 4.78 KB
/
deploy-to-eks.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
name: Create EKS cluster, deploy CKF and run bundle test
on:
workflow_dispatch: # This event allows manual triggering from the Github UI
secrets:
BUNDLE_KUBEFLOW_EKS_AWS_ACCESS_KEY_ID:
required: true
BUNDLE_KUBEFLOW_EKS_AWS_SECRET_ACCESS_KEY:
required: true
uats_branch:
description: 'Branch to run the UATs from e.g. main or track/1.8. By default, the workflow uses main.'
required: false
default: '"1.8"'
schedule:
- cron: "23 0 * * 2"
jobs:
deploy-ckf-to-eks:
runs-on: ubuntu-22.04
env:
UATS_BRANCH: ${{ inputs.uats_branch }}
PYTHON_VERSION: "3.8"
steps:
- name: Checkout repository
uses: actions/checkout@v2
# Remove once https://github.com/canonical/bundle-kubeflow/issues/761
# is resolved and applied to uats repository.
- name: Install python ${{ env.PYTHON_VERSION }}
run: |
sudo add-apt-repository ppa:deadsnakes/ppa -y
sudo apt update -y
sudo apt install python${{ env.PYTHON_VERSION }} python${{ env.PYTHON_VERSION }}-distutils python${{ env.PYTHON_VERSION }}-venv -y
- name: Install CLI tools
run: |
wget https://bootstrap.pypa.io/get-pip.py
python${{ env.PYTHON_VERSION }} get-pip.py
python${{ env.PYTHON_VERSION }} -m pip install tox
sudo snap install charmcraft --classic
curl -LO https://launchpad.net/juju/3.5/3.5.0/+download/juju-3.5.0-linux-amd64.tar.xz
tar xf juju-3.5.0-linux-amd64.tar.xz
sudo install -o root -g root -m 0755 juju /usr/local/bin/juju
juju version
- name: Configure AWS Credentials
env:
AWS_ACCESS_KEY_ID: ${{ secrets.BUNDLE_KUBEFLOW_EKS_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.BUNDLE_KUBEFLOW_EKS_AWS_SECRET_ACCESS_KEY }}
run: |
aws configure set aws_access_key_id $AWS_ACCESS_KEY_ID
aws configure set aws_secret_access_key $AWS_SECRET_ACCESS_KEY
aws configure set default.region eu-central-1
- name: Install kubectl
run: |
sudo snap install kubectl --classic --channel=1.24/stable
mkdir ~/.kube
kubectl version --client
- name: Install eksctl
run: |
sudo apt-get update
sudo apt-get install -y unzip
curl --silent --location "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_$(uname -s)_amd64.tar.gz" | tar xz -C /tmp
sudo mv /tmp/eksctl /usr/local/bin
eksctl version
- name: Create cluster
run: |
eksctl create cluster -f .github/cluster.yaml
kubectl get nodes
- name: Setup juju
run: |
juju add-k8s kubeflow --client
juju bootstrap kubeflow kubeflow-controller
juju add-model kubeflow
- name: Test bundle deployment
run: |
tox -vve test_bundle_deployment-1.8 -- --model kubeflow --keep-models -vv -s
- name: Run Kubeflow UATs
run: |
git clone https://github.com/canonical/charmed-kubeflow-uats.git ~/charmed-kubeflow-uats
cd ~/charmed-kubeflow-uats
git checkout ${{ env.UATS_BRANCH }}
tox -e kubeflow-remote
# On failure, capture debugging resources
- name: Save debug artifacts
uses: canonical/kubeflow-ci/actions/dump-charm-debug-artifacts@main
if: always()
# On failure, capture debugging resources
- name: Get juju status
run: juju status
if: failure()
- name: Get juju debug logs
run: juju debug-log --replay --no-tail
if: failure()
- name: Get all kubernetes resources
run: kubectl get all -A
if: failure()
- name: Get logs from pods with status = Pending
run: kubectl -n kubeflow get pods | tail -n +2 | grep Pending | awk '{print $1}' | xargs -n1 kubectl -n kubeflow logs --all-containers=true --tail 100
if: failure()
- name: Get logs from pods with status = Failed
run: kubectl -n kubeflow get pods | tail -n +2 | grep Failed | awk '{print $1}' | xargs -n1 kubectl -n kubeflow logs --all-containers=true --tail 100
if: failure()
- name: Get logs from pods with status = CrashLoopBackOff
run: kubectl -n kubeflow get pods | tail -n +2 | grep CrashLoopBackOff | awk '{print $1}' | xargs -n1 kubectl -n kubeflow logs --all-containers=true --tail 100
if: failure()
- name: Delete EKS cluster
if: always()
run: |
eksctl delete cluster --region eu-central-1 --name=kubeflow-test
delete-unattached-volumes:
if: always()
uses: ./.github/workflows/delete-aws-volumes.yaml
secrets: inherit
with:
region: eu-central-1
needs: [deploy-ckf-to-eks]