Skip to content

Commit

Permalink
Add "iso cluster" for DPU provisioning
Browse files Browse the repository at this point in the history
Add support to deploy a cluster of type iso, which takes an arbitrary
iso file as an additional argument, and provisions a single host with
this ISO.

The ISO can be a local file, or an http url.

There are a number of assumptions made about the iso and BMC such that
this currently will only work for the DPU use-case, however in the
future this can be made more general to be hardware agnostic.

Example Config:

clusters:
- name: "iso-cluster"
  api_vip: "0.0.0.0"
  ingress_vip: "0.0.0.0"
  network_api_port: "eno12409"
  kind: "iso"
  install_iso: "http://myhost.redhat.com/rhel.iso"
  masters:
  - name: "acc-239"
    node: "foobar"
    type: "physical"
    bmc: "idrac.hostname.redhat.com"
    bmc_user: "root"
    bmc_password: "calvin"
    ip: "192.168.3.24"
    mac: "10:23:00:03:b5:42"

"network_api_port" should be an interface on the provisioning host
connected to the node in question.

"mac" should be the mac address of the port on the single host connected
to the "network_api_port"

Signed-off-by: Salvatore Daniele <[email protected]>
  • Loading branch information
SalDaniele committed Apr 4, 2024
1 parent 6234be3 commit 7b8c273
Show file tree
Hide file tree
Showing 7 changed files with 355 additions and 12 deletions.
2 changes: 1 addition & 1 deletion cda.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def main_deploy(args: argparse.Namespace) -> None:
cc = ClustersConfig(args.config, args.worker_range)

# microshift does not use assisted installer so we don't need this check
if args.url == cc.cluster_ip_range[0] and not cc.kind == "microshift":
if args.url == cc.cluster_ip_range[0] and cc.kind == "openshift":
ais = AssistedInstallerService(cc.version, args.url, cc.proxy, cc.noproxy)
ais.start()
# workaround, this will still install 4.14, but AI will think
Expand Down
26 changes: 24 additions & 2 deletions clusterDeployer.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import microshift
from extraConfigRunner import ExtraConfigRunner
from clusterHost import ClusterHost
import isoCluster


def match_to_proper_version_format(version_cluster_config: str) -> str:
Expand Down Expand Up @@ -191,7 +192,7 @@ def deploy(self) -> None:
else:
logger.info("Skipping pre configuration.")

if self._cc.kind != "microshift":
if self._cc.kind == "openshift":
if "masters" in self.steps:
self.teardown()
self.create_cluster()
Expand All @@ -211,7 +212,12 @@ def deploy(self) -> None:
microshift.deploy(self._cc.fullConfig["name"], self._cc.masters[0], self._cc.external_port, version)
else:
logger.error_and_exit("Masters must be of length one for deploying microshift")

if self._cc.kind == "iso":
if len(self._cc.masters) == 1:
self.deploy_cluster_from_iso()
else:
logger.error("Masters must be of length one for deploying from iso")
sys.exit(-1)
if "post" in self.steps:
self._postconfig()
else:
Expand All @@ -223,6 +229,9 @@ def _validate(self) -> None:
if self._cc.masters[0].ip is None:
logger.error_and_exit("Missing ip on master")

if self._cc.kind == "iso":
return

min_cores = 28
cc = int(self._local_host.hostconn.run("nproc").out)
if cc < min_cores:
Expand Down Expand Up @@ -599,3 +608,16 @@ def wait_for_workers(self) -> None:
logger.info(e)

time.sleep(30)

def deploy_cluster_from_iso(self) -> None:
master = self._cc.masters[0]
if master.mac is None:
logger.error_and_exit(f"No MAC address provided for cluster {self._cc.name}, exiting")
if master.ip is None:
logger.error_and_exit(f"No IP address provided for cluster {self._cc.name}, exiting")
if master.name is None:
logger.error_and_exit(f"No name provided for cluster {self._cc.name}, exiting")
if not self._cc.network_api_port or self._cc.network_api_port == "auto":
logger.error_and_exit(f"Network API port with connection to {self._cc.name} must be specified, exiting")

isoCluster.IPUIsoBoot(self._cc, master, self._cc.install_iso)
5 changes: 4 additions & 1 deletion clustersConfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ class ClustersConfig:
postconfig: List[ExtraConfigArgs] = []
ntp_source: str = "clock.redhat.com"
base_dns_domain: str = "redhat.com"
install_iso: str = ""

# All configurations that used to be supported but are not anymore.
# Used to warn the user to change their config.
Expand Down Expand Up @@ -178,6 +179,8 @@ def __init__(self, yaml_path: str, worker_range: common.RangeList):
self.version = cc["version"]
if "kind" in cc:
self.kind = cc["kind"]
if self.kind == "iso":
self.install_iso = cc["install_iso"]
if "network_api_port" in cc:
self.network_api_port = cc["network_api_port"]
self.name = cc["name"]
Expand Down Expand Up @@ -363,7 +366,7 @@ def local_worker_vms(self) -> List[NodeConfig]:
return [x for x in self.worker_vms() if x.node == "localhost"]

def is_sno(self) -> bool:
return len(self.masters) == 1 and len(self.workers) == 0
return len(self.masters) == 1 and len(self.workers) == 0 and self.kind == "openshift"


def main() -> None:
Expand Down
50 changes: 49 additions & 1 deletion common.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
from dataclasses import dataclass
import ipaddress
from typing import List, Optional, Set, Tuple, TypeVar, Iterator
from typing import List, Optional, Set, Tuple, TypeVar, Iterator, Type
from types import TracebackType
import http.server
import socket
from multiprocessing import Process
import host
from logger import logger
import json
import os
import glob
Expand Down Expand Up @@ -69,6 +74,49 @@ class IPRouteAddressEntry:
addr_info: List[IPRouteAddressInfoEntry]


class HttpServerManager:
def __init__(self, path: str, port: int = 8000):
self.path = path
self.port = port
self.process: Optional[Process] = None

def __enter__(self) -> 'HttpServerManager':
self.start_server()
return self

def __exit__(self, exc_type: Optional[Type[BaseException]], exc_value: Optional[BaseException], traceback: Optional[TracebackType]) -> None:
self.stop_server()

def start_server(self) -> None:
def target() -> None:
os.chdir(self.path)
server_address = ('', self.port)
httpd = http.server.HTTPServer(server_address, http.server.SimpleHTTPRequestHandler)
httpd.serve_forever()

self.port = self.find_open_port()
self.process = Process(target=target)
self.process.start()
logger.info(f"Http Server started on port {self.port}")

def stop_server(self) -> None:
if self.process:
self.process.terminate()
self.process.join()
logger.info("Http Server stopped")

def port_is_in_use(self, port: int) -> bool:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
return s.connect_ex(('localhost', port)) == 0

def find_open_port(self) -> int:
port = self.port
while self.port_is_in_use(port):
logger.debug(f"port {self.port} in use, trying port + 1")
port += 1
return port


def ipa(host: host.Host) -> str:
return host.run("ip -json a").out

Expand Down
2 changes: 1 addition & 1 deletion dependencies.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ gpgkey=https://pkgs.k8s.io/core:/stable:/v1.29/rpm/repodata/repomd.xml.key
#exclude=kubelet kubeadm kubectl cri-tools kubernetes-cni
EOF

dnf install -y wget rust coreos-installer kubectl libvirt podman qemu-img qemu-kvm virt-install make git golang-bin virt-viewer osbuild-composer composer-cli cockpit-composer bash-completion firewalld lorax
dnf install -y wget rust coreos-installer kubectl libvirt podman qemu-img qemu-kvm virt-install make git golang-bin virt-viewer osbuild-composer composer-cli cockpit-composer bash-completion firewalld lorax dhcp-server

systemctl enable osbuild-composer.socket cockpit.socket --now

Expand Down
10 changes: 4 additions & 6 deletions host.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,8 @@ def from_bmc(ip_or_hostname: str, user: str = "root", password: str = "calvin")
"""

def boot_iso_redfish(self, iso_path: str) -> None:
def boot_iso_redfish(self, iso_path: str, retries: int = 10, retry_delay: int = 60) -> None:
assert ":" in iso_path
retries = 10
for attempt in range(retries):
try:
self.boot_iso_with_retry(iso_path)
Expand All @@ -157,7 +156,7 @@ def boot_iso_redfish(self, iso_path: str) -> None:
if attempt == retries - 1:
raise e
else:
time.sleep(60)
time.sleep(retry_delay)

def boot_iso_with_retry(self, iso_path: str) -> None:
logger.info(iso_path)
Expand Down Expand Up @@ -251,10 +250,9 @@ def ssh_connect_looped(self, logins: List[Login]) -> None:
self._host = e.login()
return
except ssh_exception.AuthenticationException as e:
logger.info(type(e))
raise e
logger.debug(type(e))
except Exception as e:
logger.info(type(e))
logger.debug(type(e))
time.sleep(10)

def _rsa_login(self) -> Optional[KeyLogin]:
Expand Down
Loading

0 comments on commit 7b8c273

Please sign in to comment.