Skip to content

Commit

Permalink
Ne wmulti run system
Browse files Browse the repository at this point in the history
  • Loading branch information
Your Name committed Oct 18, 2024
1 parent baf5304 commit 3f57be8
Show file tree
Hide file tree
Showing 7 changed files with 163 additions and 22 deletions.
30 changes: 30 additions & 0 deletions config/examples/system.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,33 @@ system:
ip: 192.168.11.13
main: false
user: username




multirun:
runs:
# Force batch size to populate the sizing model
- name: "bs{sizer.batch_size}"
matrix:
sizer.auto: 1
sizer.batch_size: [1, 2, 4, 8, 16, 32, 64, 128]
sizer.save: "scaling.yaml"

# Matrix run
- name: "c{sizer.capacity}_m{sizer.multiple}_w{cpu.n_workers}"
matrix:
cpu.auto: 1
cpu.n_workers: [2, 4, 8, 16, 32]
sizer.auto: 1
sizer.capacity: [4Go, 8Go, 16Go, 32Go, 64Go, All]
sizer.multiple: 8
sizer.save: "scaling.yaml"

# Auto run
- name: "auto"
matrix:
cpu.auto: 1
sizer.auto: 1
sizer.multiple: 8
sizer.save: "scaling.yaml"
6 changes: 3 additions & 3 deletions milabench/_version.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""This file is generated, do not modify"""

__tag__ = "v1.0.0_RC1-13-gde92a7e"
__commit__ = "de92a7ea9dea1da24e8105e4566d5e6daef8464c"
__date__ = "2024-10-03 15:48:10 +0000"
__tag__ = "v1.0.0_RC1-17-gbaf5304"
__commit__ = "baf53044e78d0989600359e9496e9aae682bf640"
__date__ = "2024-10-10 16:12:31 +0000"
40 changes: 26 additions & 14 deletions milabench/cli/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from ..report import make_report
from ..sizer import MemoryUsageExtractor
from ..summary import make_summary
from ..system import multirun, apply_system


# fmt: off
Expand Down Expand Up @@ -73,20 +74,8 @@ def _fetch_arch(mp):


@tooled
def cli_run(args=None):
"""Run the benchmarks."""
if args is None:
args = arguments()

layers = validation_names(args.validations)

dash_class = {
"short": ShortDashFormatter,
"long": LongDashFormatter,
"no": None,
}.get(args.dash, None)

mp = get_multipack(run_name=args.run_name)
def run(args, name):
mp = get_multipack(run_name=name)
arch = _fetch_arch(mp)

# Initialize the backend here so we can retrieve GPU stats
Expand Down Expand Up @@ -136,3 +125,26 @@ def cli_run(args=None):
)

return success


@tooled
def cli_run(args=None):
"""Run the benchmarks."""
if args is None:
args = arguments()

layers = validation_names(args.validations)

dash_class = {
"short": ShortDashFormatter,
"long": LongDashFormatter,
"no": None,
}.get(args.dash, None)

success = 0
for name, conf in multirun():
with apply_system(conf):
# mark the run later so we can resume multirun more easily
success += run(args, name or args.name)

return success
10 changes: 7 additions & 3 deletions milabench/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,15 @@ def combine_args(args, kwargs):
yield kwargs
else:
key, values = args.popitem()
for value in values:
kwargs[key] = value

try:
for value in values:
kwargs[key] = value
yield from combine_args(deepcopy(args), kwargs)
except:
kwargs[key] = values
yield from combine_args(deepcopy(args), kwargs)


def expand_matrix(name, bench_config):
if "matrix" not in bench_config:
return [(name, bench_config)]
Expand Down
60 changes: 59 additions & 1 deletion milabench/system.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import contextvars
from copy import deepcopy
import ipaddress
import os
import socket
Expand All @@ -15,7 +16,7 @@
from .merge import merge

system_global = contextvars.ContextVar("system", default=None)

multirun_global = contextvars.ContextVar("multirun", default=None)

def get_gpu_capacity(strict=False):
try:
Expand Down Expand Up @@ -79,6 +80,60 @@ def as_environment_variable(name):
return "MILABENCH_" + "_".join(map(str.upper, frags))


def multirun():
multirun = multirun_global.get()

if len(multirun) == 0:
yield None, dict()

runs = multirun.get("runs", dict())

from .config import combine_args
import time
from types import SimpleNamespace

def unflatten(dct):
result = {}
for k, v in dct.items():
l = result
frags = k.split(".")
for frag in frags[:-1]:
l = l.setdefault(frag, SimpleNamespace())
setattr(l, frags[-1], v)

return result

for run_matrix in runs:
arguments = run_matrix["matrix"]

for run in combine_args(arguments, dict()):
template_name = run_matrix["name"]

ctx = unflatten(run)
ctx['time'] = int(time.time())
run_name = template_name.format(**ctx)

yield run_name, run


@contextmanager
def apply_system(config: dict):
system = system_global.get()
old = deepcopy(system)

for k, v in config.items():
frags = k.split(".")

lookup = system.setdefault("options", {})
for f in frags[:-1]:
lookup = lookup.setdefault(f, {})
lookup[frags[-1]] = v


yield
system_global.set(old)


def option(name, etype, default=None):
options = dict()
system = system_global.get()
Expand Down Expand Up @@ -464,6 +519,9 @@ def build_system_config(config_file, defaults=None, gpu=True):
config = merge(defaults, config)

system = config.get("system", {})
multirun = config.get("multirun", {})

multirun_global.set(multirun)
system_global.set(system)

# capacity is only required if batch resizer is enabled
Expand Down
2 changes: 1 addition & 1 deletion scripts/article/run_rocm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ milabench prepare $ARGS

#
# Run the benchmakrs
milabench run $ARGS
milabench run $ARGS --system $MILABENCH_WORDIR/system.yaml

#
# Display report
Expand Down
37 changes: 37 additions & 0 deletions tests/test_system_matrix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@





from milabench.system import multirun, build_system_config, enable_offline, option, apply_system, SizerOptions

from milabench.testing import official_config


def test_system_matrix():
with enable_offline(True):
sys = build_system_config(official_config("examples/system"))

n = 0
for name, conf in multirun():
print(name, conf)
n += 1

assert n == 39


def test_apply_system_matrix():
with enable_offline(True):
sys = build_system_config(official_config("examples/system"))

for name, conf in multirun():
with apply_system(conf):

# Apply system worked and changed the config
for k, v in conf.items():
assert option(k, lambda x: x) == v



if __name__ == "__main__":
test_apply_system_matrix()

0 comments on commit 3f57be8

Please sign in to comment.