forked from bn222/cluster-deployment-automation
-
Notifications
You must be signed in to change notification settings - Fork 0
/
extraConfigDpuTenant.py
286 lines (233 loc) · 11.7 KB
/
extraConfigDpuTenant.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
from clustersConfig import ClustersConfig
from k8sClient import K8sClient
import host
from common_patches import apply_common_pathches
from concurrent.futures import Future
from extraConfigDpuInfra import run_dpu_network_operator_git
import extraConfigSriov
from typing import Optional
import sys
import jinja2
import json
import os
import re
from logger import logger
from clustersConfig import ExtraConfigArgs
import common
def ExtraConfigDpuTenantMC(cc: ClustersConfig, _: ExtraConfigArgs, futures: dict[str, Future[Optional[host.Result]]]) -> None:
[f.result() for (_, f) in futures.items()]
logger.info("Running post config step")
tclient = K8sClient("/root/kubeconfig.tenantcluster")
create_nm_operator(tclient)
apply_common_pathches(tclient)
logger.info("Apply DPU tenant mc")
tclient.oc("create -f manifests/tenant/dputenantmachineconfig.yaml")
logger.info("Waiting for mcp to be updated")
tclient.wait_for_mcp("dpu-host", "dputenantmachineconfig.yaml")
logger.info("Patching mcp setting maxUnavailable to 2")
tclient.oc("patch mcp dpu-host --type=json -p=\\[\\{\"op\":\"replace\",\"path\":\"/spec/maxUnavailable\",\"value\":2\\}\\]")
logger.info("Labeling nodes")
for e in cc.workers:
cmd = f"label node {e.name} node-role.kubernetes.io/dpu-host="
logger.info(tclient.oc(cmd))
logger.info("Need to deploy sriov network operator")
def render_sriov_node_policy(policyname: str, bf_port: str, bf_addr: str, numvfs: int, resourcename: str, outfilename: str) -> None:
with open("./manifests/tenant/SriovNetworkNodePolicy.yaml.j2") as f:
j2_template = jinja2.Template(f.read())
rendered = j2_template.render(policyName=policyname, bf_port=bf_port, bf_addr=bf_addr, numVfs=numvfs, resourceName=resourcename)
logger.info(rendered)
with open(outfilename, "w") as outFile:
outFile.write(rendered)
def render_envoverrides_cm(client: K8sClient, mapping: Optional[list[dict[str, str]]], ns: str) -> str:
assert mapping is not None
contents = open("manifests/tenant/envoverrides.yaml").read()
contents += f"{ns}\n"
contents += "data:\n"
for e in mapping:
a: dict[str, str] = {}
a["TENANT_K8S_NODE"] = e["worker"]
# Can be removed since API is replaced https://github.com/openshift/dpu-network-operator/pull/67
dpu_ip = client.get_ip(e["bf"])
if isinstance(dpu_ip, str):
a["DPU_IP"] = dpu_ip
else:
logger.error(f"Failed to retrieve ip for {e['bf']}")
sys.exit(1)
a["MGMT_IFNAME"] = "c1pf0vf0"
contents += f" {e['bf']}: |\n"
for k, v in a.items():
contents += f" {k}={v}\n"
open(f"/tmp/envoverrides-{ns}.yaml", "w").write(contents)
return f"/tmp/envoverrides-{ns}.yaml"
def _ExtraConfigDpuTenant_common(cc: ClustersConfig, cfg: ExtraConfigArgs, futures: dict[str, Future[Optional[host.Result]]], *, new_api: bool = False) -> None:
[f.result() for (_, f) in futures.items()]
logger.info("Running post config step")
if not new_api:
if cfg.mapping is None:
logger.error("BF to Worker mapping not provided for tenant deployment")
sys.exit(1)
tclient = K8sClient("/root/kubeconfig.tenantcluster")
logger.info("Waiting for mcp dpu-host to become ready")
tclient.wait_for_mcp("dpu-host")
if new_api:
lh = host.LocalHost()
run_dpu_network_operator_git(lh, "/root/kubeconfig.tenantcluster")
logger.info("Creating namespace for tenant")
tclient.oc("create -f manifests/infra/ns.yaml")
logger.info("Creating DpuClusterConfig cr")
tclient.oc("create -f manifests/tenant/dpuclusterconfig.yaml")
else:
# Since the kubeconfig we pass into secrets will have the role "system:admin", this is not allowed
# from the local node running operator pods point of view. A possible fix is to somehow download
# the kubeconfig from the tenant host nodes and use that as the secret. For now, let's disable the
# webhook
logger.info("creating config map to disable ovn-k node_admission webhook on tenant cluster")
tclient.oc("create configmap -n openshift-network-operator network-node-identity --from-literal=enabled=false")
first_worker = cc.workers[0].name
ip = tclient.get_ip(first_worker)
if ip is None:
sys.exit(-1)
rh = host.RemoteHost(ip)
rh.ssh_connect("core")
all_bf = [x for x in rh.run("lspci").out.split("\n") if "BlueField" in x]
if not all_bf:
logger.info(f"Couldn't find BF on {first_worker}")
sys.exit(-1)
bf = all_bf[0].split(" ")[0]
logger.info(f"BF is at {bf}")
bf_port = None
for port in common.ip_links(rh):
ret = rh.run(f'ethtool -i {port.ifname}')
if ret.returncode != 0:
continue
d = {}
for part in ret.out.strip().split("\n"):
key, value = part.split(":", 1)
d[key] = value
if d["bus-info"].endswith(bf):
bf_port = port.ifname
logger.info(bf_port)
if bf_port is None:
logger.info("Couldn't find bf port")
sys.exit(-1)
numVfs = 16
numMgmtVfs = 1
workloadPolicyName = "policy-mlnx-bf"
workloadResourceName = "mlnx_bf"
workloadBfPort = f"{bf_port}#{numMgmtVfs}-{numVfs-1}"
workloadPolicyFile = "/tmp/" + workloadPolicyName + ".yaml"
mgmtPolicyName = "mgmt-policy-mlnx-bf"
mgmtResourceName = "mgmtvf"
mgmtBfPort = f"{bf_port}#0-{numMgmtVfs-1}"
mgmtPolicyFile = "/tmp/" + mgmtPolicyName + ".yaml"
render_sriov_node_policy(workloadPolicyName, workloadBfPort, bf, numVfs, workloadResourceName, workloadPolicyFile)
render_sriov_node_policy(mgmtPolicyName, mgmtBfPort, bf, numVfs, mgmtResourceName, mgmtPolicyFile)
logger.info("Creating sriov pool config")
tclient.oc("create -f manifests/tenant/sriov-pool-config.yaml")
tclient.oc("create -f " + workloadPolicyFile)
tclient.oc("create -f " + mgmtPolicyFile)
logger.info("Waiting for mcp to be updated")
tclient.wait_for_mcp("dpu-host", "sriov pool")
if new_api:
# DELTA START: We don't create sriovdpuconfigmap.yaml to set dpu-host mode. https://github.com/openshift/cluster-network-operator/pull/1676
mgmtPortResourceName = "openshift.io/" + mgmtResourceName
logger.info(f"Creating Config Map for mgmt port resource name {mgmtPortResourceName}")
with open('./manifests/tenant/hardware-offload-config.yaml.j2') as f:
j2_template = jinja2.Template(f.read())
rendered = j2_template.render(mgmtPortResourceName=mgmtPortResourceName)
logger.info(rendered)
with open("/tmp/hardware-offload-config.yaml", "w") as outFile:
outFile.write(rendered)
logger.info(tclient.oc("create -f /tmp/hardware-offload-config.yaml"))
# DELTA END
else:
logger.info("creating config map to put ovn-k into dpu host mode")
tclient.oc("create -f manifests/tenant/sriovdpuconfigmap.yaml")
logger.info("setting ovn kube node env-override to set management port")
logger.info(os.getcwd())
contents = open("manifests/tenant/setenvovnkube.yaml").read()
assert cfg.mapping is not None
for bfmap in cfg.mapping:
a: dict[str, str] = {}
mp = re.sub(r'np\d$', '', bf_port)
a["OVNKUBE_NODE_MGMT_PORT_NETDEV"] = f"{mp}v0"
contents += f" {bfmap['worker']}: |\n"
for k, v in a.items():
contents += f" {k}={v}\n"
open("/tmp/1.yaml", "w").write(contents)
logger.info("Running create")
logger.info(tclient.oc("create -f /tmp/1.yaml"))
for e in cc.workers:
cmd = f"label node {e.name} network.operator.openshift.io/dpu-host="
logger.info(tclient.oc(cmd))
ip = tclient.get_ip(e.name)
if ip is None:
logger.error(f"Failed to get ip for node {e.name}")
sys.exit(-1)
rh = host.RemoteHost(ip)
rh.ssh_connect("core")
if new_api:
# DELTA: "ovn-k8s-mp0" port is deleted by OVN-K. https://github.com/ovn-org/ovn-kubernetes/pull/3571
result = rh.run("sudo ovs-vsctl show")
if "ovn-k8s-mp0" in result.out:
logger.info(result.out)
logger.info("Unexpected ovn-k8s-mp0 interface found in br-int.")
# FIXME: The above patch did not seem to entirely work. We will need to investigate further
# For now we will delete the port.
# workaround for https://issues.redhat.com/browse/NHE-335
logger.info(rh.run("sudo ovs-vsctl del-port br-int ovn-k8s-mp0"))
logger.info("creating mc to disable ovs")
if not new_api:
# At this point we error out, because the patch ports on the DPU OvS side does not get created
# patch ports are created on the OVN side. TODO: FIXME For now try this to make OVN-K on DPU happy:
for e in cc.workers:
cmd = f"annotate --overwrite node/{e.name} k8s.ovn.org/node-mgmt-port={{\"PfId\":0,\"FuncId\":0}}"
logger.info(cmd)
logger.info(tclient.oc(cmd))
tclient.oc("create -f manifests/tenant/disable-ovs.yaml")
logger.info("Waiting for mcp")
tclient.wait_for_mcp("dpu-host", "disable-ovs.yaml")
logger.info("Final infrastructure cluster configuration")
iclient = K8sClient("/root/kubeconfig.infracluster")
# https://issues.redhat.com/browse/NHE-334
if new_api:
logger.info(iclient.oc(f"create secret generic tenant-cluster-1-kubeconf --from-file=config={tclient._kc} --namespace=two-cluster-design"))
else:
iclient.oc("project two-cluster-design")
logger.info(iclient.oc(f"create secret generic tenant-cluster-1-kubeconf --from-file=config={tclient._kc}"))
tc_namespace = "two-cluster-design"
dpu_namespace = "openshift-dpu-network-operator"
file = render_envoverrides_cm(iclient, cfg.mapping, tc_namespace)
logger.info(iclient.oc(f"create -f {file}"))
file = render_envoverrides_cm(iclient, cfg.mapping, dpu_namespace)
logger.info(iclient.oc(f"create -f {file}"))
# Restart DPU network operator to apply env-overrides cm
restart_dpu_network_operator(iclient)
patch = json.dumps({"spec": {"kubeConfigFile": "tenant-cluster-1-kubeconf"}})
r = iclient.oc(f"patch --type merge -p '{patch}' DpuClusterConfig dpuclusterconfig-sample -n two-cluster-design")
if not new_api:
logger.info(patch)
logger.info(r)
logger.info("Creating network attachement definition")
tclient.oc("create -f manifests/tenant/nad.yaml")
tclient.approve_csr()
iclient.approve_csr()
extraConfigSriov.ensure_pci_realloc(cc, tclient, "dpu-host")
def ExtraConfigDpuTenant(cc: ClustersConfig, cfg: ExtraConfigArgs, futures: dict[str, Future[Optional[host.Result]]]) -> None:
_ExtraConfigDpuTenant_common(cc, cfg, futures, new_api=False)
def ExtraConfigDpuTenant_NewAPI(cc: ClustersConfig, cfg: ExtraConfigArgs, futures: dict[str, Future[Optional[host.Result]]]) -> None:
_ExtraConfigDpuTenant_common(cc, cfg, futures, new_api=True)
def create_nm_operator(client: K8sClient) -> None:
logger.info("Apply NMO subscription")
client.oc("create -f manifests/tenant/nmo-subscription.yaml")
def restart_dpu_network_operator(iclient: K8sClient) -> None:
lh = host.LocalHost()
logger.info("Restarting dpu-network-operator")
run_dpu_network_operator_git(lh, "/root/kubeconfig.infracluster")
iclient.oc("wait deploy/dpu-network-operator-controller-manager --for condition=available -n openshift-dpu-network-operator")
logger.info("Creating DpuClusterConfig cr")
iclient.oc("create -f manifests/infra/dpuclusterconfig.yaml")
def main() -> None:
pass
if __name__ == "__main__":
main()