-
Notifications
You must be signed in to change notification settings - Fork 25
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
online upgrade with package installation and pending pods #961
Changes from 11 commits
82cf6ac
2c41466
a90a8c2
6111835
a24e665
03bd47d
521a504
5e036bb
f8b3273
cc711b9
9e39dc9
6a5957f
4771f75
4efd782
ba02231
1324be0
dbda6e9
1d62c4b
03ffeb9
87b65e1
0d2c12e
fbacb19
8fad603
a668a0f
031998c
bb882b4
e1106a4
bd814df
ef2164c
2a3f068
e6c8398
60fb071
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -165,6 +165,8 @@ func (r *OnlineUpgradeReconciler) Reconcile(ctx context.Context, _ *ctrl.Request | |
|
||
// Functions to perform when the image changes. Order matters. | ||
funcs := []func(context.Context) (ctrl.Result, error){ | ||
// Requeue if not all nodes are up | ||
r.checkUpNodes, | ||
// Initiate an upgrade by setting condition and event recording | ||
r.startUpgrade, | ||
r.logEventIfThisUpgradeWasNotChosen, | ||
|
@@ -290,6 +292,30 @@ func (r *OnlineUpgradeReconciler) loadUpgradeState(ctx context.Context) (ctrl.Re | |
return ctrl.Result{}, nil | ||
} | ||
|
||
// checkUpNodes will requeue the upgrade process if not all the main cluster nodes are up. | ||
func (r *OnlineUpgradeReconciler) checkUpNodes(ctx context.Context) (ctrl.Result, error) { | ||
// We skip this if we have already added the new subclusters | ||
if vmeta.GetOnlineUpgradeStepInx(r.VDB.Annotations) > addSubclustersInx { | ||
return ctrl.Result{}, nil | ||
} | ||
|
||
mainPFacts := r.PFacts[vapi.MainCluster] | ||
// All nodes in the main cluster must be up before running online upgrade | ||
if mainPFacts.getUpNodeCount() != len(mainPFacts.Detail) { | ||
r.VRec.Eventf(r.VDB, corev1.EventTypeWarning, events.NotAllNodesUp, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You don't need an event. Just log something. |
||
"Not all nodes (%d/%d) are up, restarting the main cluster. Please check the cluster configuration if this issue continues.", | ||
mainPFacts.getUpNodeCount(), len(mainPFacts.Detail)) | ||
|
||
// try to restart the main cluster, requeuing online upgrade | ||
res, err := r.restartMainCluster(ctx) | ||
if verrors.IsReconcileAborted(res, err) { | ||
return res, err | ||
} | ||
} | ||
|
||
return ctrl.Result{}, nil | ||
} | ||
|
||
// assignSubclustersToReplicaGroupA will go through all of the subclusters involved | ||
// in the upgrade and assign them to the first replica group. The assignment is | ||
// saved in the status.upgradeState.replicaGroups field. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -845,8 +845,17 @@ func (i *UpgradeManager) routeClientTraffic(ctx context.Context, pfacts *PodFact | |
func (i *UpgradeManager) logEventIfRequestedUpgradeIsDifferent(actualUpgrade vapi.UpgradePolicyType) { | ||
if !i.ContinuingUpgrade && i.Vdb.Spec.UpgradePolicy != actualUpgrade && i.Vdb.Spec.UpgradePolicy != vapi.AutoUpgrade { | ||
actualUpgradeAsText := strings.ToLower(string(actualUpgrade)) | ||
|
||
i.Rec.Eventf(i.Vdb, corev1.EventTypeNormal, events.IncompatibleUpgradeRequested, | ||
"Requested upgrade is incompatible with the Vertica deployment. Falling back to %s upgrade.", actualUpgradeAsText) | ||
|
||
if i.Vdb.Spec.UpgradePolicy == "Online" { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's move this before the event recorder. |
||
i.Log.Info("Not all online upgrade prerequisites met. Please make sure: " + | ||
"1. Vertica server version is 24.3.0-2 or higher. " + | ||
"2. Cluster was deployed using `vclusterops`. " + | ||
"3. A license file was applied to allow double the DB nodes. " + | ||
"4. No sandbox defined.") | ||
} | ||
} | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,7 @@ package vadmin | |
|
||
import ( | ||
"context" | ||
"errors" | ||
"regexp" | ||
"strings" | ||
|
||
|
@@ -34,7 +35,8 @@ func (a *Admintools) InstallPackages(ctx context.Context, opts ...installpackage | |
|
||
status := genInstallPackageStatus(stdout) | ||
if err != nil && len(status.Packages) == 0 { | ||
_, logErr := a.logFailure("install_package", events.InstallPackagesFailed, stdout, err) | ||
pkgErr := errors.New(err.Error() + "This may due to lack of memory resources.") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we have a way to distinguish the cause of the error? Simply adding a string here can be misleading for future debugging. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks. Now it only logs a message if 0 package was installed. |
||
_, logErr := a.logFailure("install_package", events.InstallPackagesFailed, stdout, pkgErr) | ||
a.Log.Error(err, "failed to finish package installation", "installPackageStatus", *status) | ||
return status, logErr | ||
} | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This new test does too many things already validated by other tests. I think what you want to check is that we do not pass the first function in online upgrade if some pods are pending so make some pod pending, start online upgrade, wait a little and check the upgradeStatus to verify that we are still stuck in that function, make the pending pods run and now wait until we create the new subclusters and the test can end there. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good suggestion. I removed the duplicated checks and renamed the test to online-upgrade-pods-pending. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# (c) Copyright [2021-2024] Open Text. | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# You may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
apiVersion: rbac.authorization.k8s.io/v1 | ||
kind: ClusterRole | ||
metadata: | ||
name: integration-test-role | ||
--- | ||
apiVersion: rbac.authorization.k8s.io/v1 | ||
kind: RoleBinding | ||
metadata: | ||
name: integration-test-rb |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# (c) Copyright [2021-2024] Open Text. | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# You may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
apiVersion: kuttl.dev/v1beta1 | ||
kind: TestStep | ||
commands: | ||
- script: kubectl apply --namespace $NAMESPACE -f ../../manifests/rbac/base/rbac.yaml | ||
ignoreFailure: true |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# (c) Copyright [2021-2024] Open Text. | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# You may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
apiVersion: kuttl.dev/v1beta1 | ||
kind: TestStep | ||
commands: | ||
- script: kustomize build ../../manifests/communal-creds/overlay | kubectl apply -f - --namespace $NAMESPACE | ||
- script: kustomize build ../../manifests/priv-container-creds/overlay | kubectl apply -f - --namespace $NAMESPACE | ||
- script: kustomize build ../../manifests/vertica-license/overlay | kubectl apply -f - --namespace $NAMESPACE |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# (c) Copyright [2021-2024] Open Text. | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# You may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
apiVersion: v1 | ||
kind: Pod | ||
metadata: | ||
namespace: verticadb-operator | ||
labels: | ||
control-plane: verticadb-operator | ||
status: | ||
phase: Running |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# (c) Copyright [2021-2024] Open Text. | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# You may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
# (c) Copyright [2021-2024] Open Text. | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# You may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
apiVersion: apps/v1 | ||
kind: StatefulSet | ||
metadata: | ||
name: v-base-upgrade-pri1 | ||
status: | ||
currentReplicas: 2 | ||
--- | ||
apiVersion: apps/v1 | ||
kind: StatefulSet | ||
metadata: | ||
name: v-base-upgrade-pri-2 | ||
status: | ||
currentReplicas: 1 | ||
--- | ||
apiVersion: apps/v1 | ||
kind: StatefulSet | ||
metadata: | ||
name: v-base-upgrade-sec1 | ||
status: | ||
currentReplicas: 2 | ||
--- | ||
apiVersion: apps/v1 | ||
kind: StatefulSet | ||
metadata: | ||
name: v-base-upgrade-sec2 | ||
status: | ||
currentReplicas: 1 | ||
--- | ||
apiVersion: vertica.com/v1 | ||
kind: VerticaDB | ||
metadata: | ||
name: v-base-upgrade |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
# (c) Copyright [2021-2024] Open Text. | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# You may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
apiVersion: kuttl.dev/v1beta1 | ||
kind: TestStep | ||
commands: | ||
- command: bash -c "kustomize build setup-vdb/overlay | kubectl -n $NAMESPACE apply -f - " |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
# (c) Copyright [2021-2024] Open Text. | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# You may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
apiVersion: apps/v1 | ||
kind: StatefulSet | ||
metadata: | ||
name: v-base-upgrade-pri1 | ||
status: | ||
currentReplicas: 2 | ||
readyReplicas: 2 | ||
--- | ||
apiVersion: apps/v1 | ||
kind: StatefulSet | ||
metadata: | ||
name: v-base-upgrade-pri-2 | ||
status: | ||
currentReplicas: 1 | ||
readyReplicas: 1 | ||
--- | ||
apiVersion: apps/v1 | ||
kind: StatefulSet | ||
metadata: | ||
name: v-base-upgrade-sec1 | ||
status: | ||
currentReplicas: 2 | ||
--- | ||
apiVersion: apps/v1 | ||
kind: StatefulSet | ||
metadata: | ||
name: v-base-upgrade-sec2 | ||
status: | ||
currentReplicas: 1 | ||
readyReplicas: 1 | ||
--- | ||
apiVersion: vertica.com/v1 | ||
kind: VerticaDB | ||
metadata: | ||
name: v-base-upgrade | ||
status: | ||
subclusters: | ||
- addedToDBCount: 2 | ||
upNodeCount: 2 | ||
- addedToDBCount: 1 | ||
upNodeCount: 1 | ||
- addedToDBCount: 2 | ||
upNodeCount: 2 | ||
- addedToDBCount: 1 | ||
upNodeCount: 1 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
# Intentionally empty to give this step a name in kuttl |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
# (c) Copyright [2021-2024] Open Text. | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# You may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
apiVersion: kuttl.dev/v1beta1 | ||
kind: TestStep | ||
commands: | ||
- command: bash -c "../../../scripts/wait-for-verticadb-steady-state.sh -n verticadb-operator -t 360 $NAMESPACE" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# (c) Copyright [2021-2024] Open Text. | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# You may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
apiVersion: apps/v1 | ||
kind: StatefulSet | ||
metadata: | ||
name: v-base-upgrade-sec2 | ||
status: | ||
availableReplicas: 0 | ||
replicas: 1 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
# (c) Copyright [2021-2024] Open Text. | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# You may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
apiVersion: vertica.com/v1 | ||
kind: VerticaDB | ||
metadata: | ||
name: v-base-upgrade | ||
spec: | ||
subclusters: | ||
- name: sec1 | ||
size: 2 | ||
type: secondary | ||
- name: sec2 | ||
size: 1 | ||
resources: | ||
requests: | ||
memory: 1Ti | ||
type: secondary | ||
- name: pri1 | ||
size: 2 | ||
- name: pri_2 | ||
size: 1 | ||
type: primary |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We want all the pods to be running before starting online upgrade. They don't all have to be up.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems not working after changed to
countRunningAndInstalled()
. When the pending nodes just became running (not up yet), the upgrade process continued to duplicate the main cluster but returned VAddnode error. That running node and the new added nodes never came up.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In that case, don't even check the number of upnode. Check if there is any not running pod and requeue if there is. Then Just call the restart reconciler, if any node is down it will start it, if no node is down it will do nothing.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the instruction. It is updated accordingly.