diff --git a/bin/mlad b/bin/mlad index cdfb151a..3d7dd28f 100755 Binary files a/bin/mlad and b/bin/mlad differ diff --git a/charts/api-server-0.4.2.tgz b/charts/api-server-0.4.2.tgz new file mode 100644 index 00000000..6840fcca Binary files /dev/null and b/charts/api-server-0.4.2.tgz differ diff --git a/charts/api-server/Chart.yaml b/charts/api-server/Chart.yaml index 375633d7..6c3a709f 100644 --- a/charts/api-server/Chart.yaml +++ b/charts/api-server/Chart.yaml @@ -20,10 +20,10 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.4.1 # AUTO GENERATED +version: 0.4.2 # AUTO GENERATED # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "0.4.1" # AUTO GENERATED \ No newline at end of file +appVersion: "0.4.2" # AUTO GENERATED \ No newline at end of file diff --git a/charts/api-server/values.yaml b/charts/api-server/values.yaml index 046f1400..f0852fc0 100644 --- a/charts/api-server/values.yaml +++ b/charts/api-server/values.yaml @@ -32,7 +32,7 @@ replicaCount: 1 image: repository: ghcr.io/onetop21/mlappdeploy/api-server pullPolicy: Always - tag: 0.4.1 # AUTO GENERATED + tag: 0.4.2 # AUTO GENERATED port: 8440 diff --git a/charts/index.yaml b/charts/index.yaml index e5eeaf8a..7e0be629 100644 --- a/charts/index.yaml +++ b/charts/index.yaml @@ -1,9 +1,23 @@ apiVersion: v1 entries: api-server: + - apiVersion: v2 + appVersion: 0.4.2 + created: "2022-04-05T14:20:41.620279256Z" + description: Machine Learning Application Deployment Tool by Kubernetes + digest: 399c447d4a0f5b4f1a50ac5386c023017b269457e4cd3785cbecd633dfcb1551 + home: https://github.com/onetop21/MLAppDeploy + maintainers: + - email: onetop21@gmail.com + name: Hyoil LEE + name: api-server + type: application + urls: + - api-server-0.4.2.tgz + version: 0.4.2 - apiVersion: v2 appVersion: 0.4.1 - created: "2022-03-29T14:06:31.101670814Z" + created: "2022-04-05T14:20:41.619402545Z" description: Machine Learning Application Deployment Tool by Kubernetes digest: 6e94b099549555a3f6e21847526f92f4bf0e006f0dbc76ef4b77c9ebb24887c7 home: https://github.com/onetop21/MLAppDeploy @@ -17,7 +31,7 @@ entries: version: 0.4.1 - apiVersion: v2 appVersion: 0.4.0 - created: "2022-03-29T14:06:31.100680708Z" + created: "2022-04-05T14:20:41.618451334Z" description: Machine Learning Application Deployment Tool by Kubernetes digest: 6c0ea30f5d6daa1b9e4c4bd292ec181c5db8356312599a790b767ab7b3c9011d home: https://github.com/onetop21/MLAppDeploy @@ -31,7 +45,7 @@ entries: version: 0.4.0 - apiVersion: v2 appVersion: 0.3.0 - created: "2022-03-29T14:06:31.098627695Z" + created: "2022-04-05T14:20:41.617737725Z" description: Machine Learning Application Deployment Tool by Kubernetes digest: 0c2e62fb188c88ceaa4bace74cdc0bb147e4bd772adae72629cac98a2c354305 home: https://github.com/onetop21/MLAppDeploy @@ -45,7 +59,7 @@ entries: version: 0.3.0 - apiVersion: v2 appVersion: 0.2.0 - created: "2022-03-29T14:06:31.089763041Z" + created: "2022-04-05T14:20:41.610934942Z" description: Machine Learning Application Deployment Tool by Kubernetes digest: 2167f4a29976a3ee29589a153bda98c23e316ef640d02ff30afc6ee991dea5f7 home: https://github.com/onetop21/MLAppDeploy @@ -57,4 +71,4 @@ entries: urls: - api-server-0.2.0.tgz version: 0.2.0 -generated: "2022-03-29T14:06:31.081780192Z" +generated: "2022-04-05T14:20:41.602696141Z" diff --git a/python/mlad/__init__.py b/python/mlad/__init__.py index f0ede3d3..a9873473 100644 --- a/python/mlad/__init__.py +++ b/python/mlad/__init__.py @@ -1 +1 @@ -__version__ = '0.4.1' +__version__ = '0.4.2' diff --git a/python/mlad/api/project.py b/python/mlad/api/project.py index 106c531c..c4dd480f 100644 --- a/python/mlad/api/project.py +++ b/python/mlad/api/project.py @@ -85,4 +85,4 @@ def update(self, project_key, update_yaml, update_specs): 'update_yaml': update_yaml, 'update_specs': update_specs } - return self._post(f'/{project_key}', body=body) + return self._post(f'/{project_key}', body=body, timeout=60) diff --git a/python/mlad/cli/__main__.py b/python/mlad/cli/__main__.py index eb3c6468..543fcb52 100644 --- a/python/mlad/cli/__main__.py +++ b/python/mlad/cli/__main__.py @@ -67,6 +67,7 @@ def main(): main.add_command(image.build, 'build') main.add_command(project.up, 'up') main.add_command(project.down, 'down') + main.add_command(project.run, 'run') main.add_command(project.update, 'update') main.add_command(project.ingress, 'ingress') main.add_command(project.logs, 'logs') diff --git a/python/mlad/cli/board_cli.py b/python/mlad/cli/board_cli.py index 2f151c1d..dc5acd55 100644 --- a/python/mlad/cli/board_cli.py +++ b/python/mlad/cli/board_cli.py @@ -1,5 +1,6 @@ import click +from mlad import __version__ from mlad.cli import board from mlad.cli.autocompletion import list_component_names @@ -8,7 +9,7 @@ @click.command() @click.option('--image-repository', '-i', required=False, - default='ghcr.io/onetop21/mlappdeploy/dashboard:0.4.1', + default=f'ghcr.io/onetop21/mlappdeploy/dashboard:{__version__}', help='The image repository for MLAD board.') @echo_exception def activate(image_repository): diff --git a/python/mlad/cli/install.py b/python/mlad/cli/install.py index 92adbb54..7e184e5d 100644 --- a/python/mlad/cli/install.py +++ b/python/mlad/cli/install.py @@ -1,5 +1,6 @@ from mlad import __version__ from mlad.core import exceptions as core_exceptions +from mlad.api.exceptions import VersionCheckError from mlad.cli.libs import utils from mlad.cli.exceptions import APIServerNotInstalledError from mlad.cli import config as config_core @@ -114,5 +115,10 @@ def check(): if plugin == 'MLAD API Server' and status: yield f' · API Server Address : {api_server_address}' - server_version = API.check.check_version()['version'] + try: + server_version = API.check.check_version()['version'] + except VersionCheckError as e: + yield 'The API server version should be 0.4.1 or higher.' + yield 'Please use command \'helm upgrade mlad -n mlad ./charts/api-server\'.' + raise e yield f' · API Server Version : {server_version}' diff --git a/python/mlad/cli/project.py b/python/mlad/cli/project.py index 28f4687b..b4534f2a 100644 --- a/python/mlad/cli/project.py +++ b/python/mlad/cli/project.py @@ -3,6 +3,7 @@ import json import copy import socket +import time from datetime import datetime from typing import Optional, List, Dict, Tuple, Union @@ -167,7 +168,7 @@ def status(file: Optional[str], project_key: Optional[str], no_trunc: bool, even def logs(file: Optional[str], project_key: Optional[str], - tail: bool, follow: bool, timestamps: bool, filters: Optional[List[str]]): + tail: Union[str, int], follow: bool, timestamps: bool, filters: Optional[List[str]]): utils.process_file(file) if project_key is None: project_key = utils.workspace_key() @@ -458,6 +459,82 @@ def _dump_logs(app_name: str, project_key: str, dirpath: Path): return f'The log file of app [{app_name}] saved.' +def run(file: Optional[str], env: Dict[str, str], quota: Dict[str, str], command: List[str]): + utils.process_file(file) + config = config_core.get() + project = utils.get_project() + origin_project = copy.deepcopy(project) + + base_labels = utils.base_labels( + utils.get_workspace(), + config['session'], + project, + config_core.get_registry_address(config) + ) + project_key = base_labels[MLAD_PROJECT] + try: + API.project.inspect(project_key=project_key) + raise ProjectAlreadyExistError(project_key) + except ProjectNotFound: + pass + + # Find suitable image + image_tag = base_labels[MLAD_PROJECT_IMAGE] + images = [image for image in docker_ctlr.get_images(project_key=project_key) + if image_tag in image.tags] + if len(images) == 0: + raise ImageNotFoundError(image_tag) + + app_spec = { + 'kind': 'Job', + 'name': 'job-1', + 'env': env, + 'quota': quota, + 'command': command + } + check_nvidia_plugin_installed(app_spec) + warning_msg = _check_config_envs(app_spec['name'], app_spec) + if warning_msg: + yield warning_msg + yield 'Deploy job-1 to the cluster...' + try: + credential = docker_ctlr.obtain_credential() + extra_envs = config_core.get_env() + lines = API.project.create(base_labels, origin_project, extra_envs, + credential=credential) + for line in lines: + if 'stream' in line: + sys.stdout.write(line['stream']) + if 'result' in line and line['result'] == 'succeed': + break + + API.app.create(project_key, [app_spec]) + + yield 'Wait for the app runs successfully' + while True: + task_dict = API.app.inspect(project_key, app_spec['name'])['task_dict'] + pod_info = list(task_dict.values())[0] + phase = pod_info['phase'] + reason = pod_info['status'] + if phase == 'Pending': + time.sleep(1) + elif phase == 'Succeeded' or phase == 'Running': + break + else: + yield 'Error occurred in running the job..' + yield f'Reason: {reason}' + break + yield from logs(file, project_key, 'all', True, True, None) + + yield from down(file, project_key, False) + except KeyboardInterrupt as e: + next(API.project.delete(project_key)) + raise e + except Exception as e: + next(API.project.delete(project_key)) + raise e + + def scale(file: Optional[str], project_key: Optional[str], scales: List[Tuple[str, int]]): utils.process_file(file) if project_key is None: @@ -479,11 +556,12 @@ def scale(file: Optional[str], project_key: Optional[str], scales: List[Tuple[st def update(file: Optional[str], project_key: Optional[str]): utils.process_file(file) + config = config_core.get() if project_key is None: project_key = utils.workspace_key() project = API.project.inspect(project_key=project_key) cur_project_yaml = json.loads(project['project_yaml']) - image_tag = project['image'] + cur_image_tag = project['image'] project = utils.get_project() @@ -491,8 +569,19 @@ def update(file: Optional[str], project_key: Optional[str]): if not kind == 'Deployment': raise InvalidProjectKindError('Deployment', 'deploy') + base_labels = utils.base_labels( + utils.get_workspace(), + config['session'], + project, + config_core.get_registry_address(config) + ) + image_tag = base_labels[MLAD_PROJECT_IMAGE] + if cur_image_tag != image_tag: + yield f'Image tag [{cur_image_tag}] and [{image_tag}] are different.' + yield f'The base image will be updated to [{image_tag}].' + default_update_spec = { - 'image': None, + 'image': image_tag, 'command': None, 'args': None, 'scale': 1, @@ -567,7 +656,8 @@ def _check_env(env_key: Union[str, list] = None): yield utils.info_msg(f"Warning: '{name}' env {env_ignored} " 'will be ignored for MLAD preferences.') - if len(diff_keys[name]) > 0: + # Add an update spec if there are any changes in the app spec or image tag + if len(diff_keys[name]) > 0 or image_tag != cur_image_tag: update_specs.append(update_spec) for name, keys in diff_keys.items(): diff --git a/python/mlad/cli/project_cli.py b/python/mlad/cli/project_cli.py index bace2362..339838e1 100644 --- a/python/mlad/cli/project_cli.py +++ b/python/mlad/cli/project_cli.py @@ -1,6 +1,6 @@ import getpass import click -from typing import Optional, List +from typing import Optional, List, Union from mlad.cli import project, config from mlad.cli.libs import utils, MutuallyExclusiveOption from mlad.cli.autocompletion import list_project_keys @@ -68,7 +68,7 @@ def ps(file: Optional[str], project_key: Optional[str], no_trunc: bool, event: b @click.argument('APPS|TASKS', nargs=-1) @echo_exception def logs(file: Optional[str], project_key: Optional[str], - tail: bool, follow: bool, timestamps: bool, **kwargs): + tail: Union[str, int], follow: bool, timestamps: bool, **kwargs): '''Display the project logs deployed on the cluster.''' filters = kwargs.get('apps|tasks') for line in project.logs(file, project_key, tail, follow, timestamps, filters): @@ -132,6 +132,29 @@ def update(file: Optional[str], project_key: Optional[str]): click.echo(line) +@click.command() +@click.option('--file', '-f', default=None, type=click.Path(exists=True), help=( + 'Specify an alternate project file.\t\t\t\n' + f'Same as {utils.PROJECT_FILE_ENV_KEY} in environment variable.') +) +@click.option('--env', '-e', default=[], multiple=True, help=( + 'Associate environment variables of the job\n' + 'Format: [ENV_NAME]=[ENV_VALUE]') +) +@click.option('--quota', '-q', multiple=True, help=( + 'Associate the quota of the job\n' + 'Format: cpu=[CPU_VALUE] gpu=[GPU_VALUE] mem=[MEM_VALUE]' +)) +@click.argument('command', nargs=-1) +@echo_exception +def run(file: Optional[str], env: List[str], quota: List[str], command: List[str]): + '''Create and run a single job imperatively.''' + env_dict = {e.split('=')[0]: e.split('=')[1] for e in env} + quota_dict = {q.split('=')[0]: q.split('=')[1] for q in quota} + for line in project.run(file, env_dict, quota_dict, command): + click.echo(line) + + @click.command() @click.option('--file', '-f', default=None, type=click.Path(exists=True), help=( 'Specify an alternate project file.\t\t\t\n' @@ -171,4 +194,5 @@ def cli(): cli.add_command(up) cli.add_command(down) cli.add_command(update) +cli.add_command(run) cli.add_command(scale)