LLNL · pearce8 · Jun 6, 2024 · Apr 2, 2024 · Apr 3, 2024 · Apr 3, 2024
diff --git a/bin/benchpark b/bin/benchpark
@@ -383,6 +383,10 @@ def benchpark_setup_handler(args):
         ramble_spack_experiment_configs_dir,
         include_fn,
     )
+    os.symlink(
+        source_dir / "experiments" / "universal-resources" / "execute_experiment.tpl",
+        ramble_configs_dir / "execute_experiment.tpl",
+    )
 
     spack_location = experiments_root / "spack"
     ramble_location = experiments_root / "ramble"

diff --git a/configs/CSC-LUMI-HPECray-zen3-MI250X-Slingshot/variables.yaml b/configs/CSC-LUMI-HPECray-zen3-MI250X-Slingshot/variables.yaml
@@ -6,12 +6,15 @@
 variables:
   gtl_flag: ''  # to be overwritten by tests that need GTL
   rocm_arch: 'gfx90a'
-  batch_time: '02:00'
-  mpi_command: 'srun -N {n_nodes} -n {n_ranks}'
-  batch_submit: 'sbatch {execute_experiment}'
-  batch_nodes: '#SBATCH -N {n_nodes}'
-  batch_ranks: '#SBATCH -n {n_ranks}'
-  batch_timeout: '#SBATCH -t {batch_time}:00'
-  cpu_partition: '#SBATCH -p small'
-  gpu_partition: '#SBATCH -p small-g' 
+  timeout: '120'
+  scheduler: "slurm"
+  # This describes the LUMI-G partition: https://docs.lumi-supercomputer.eu/hardware/lumig/
+  sys_cpus_per_node: "64"
+  sys_gpus_per_node: "8"
+  sys_mem_per_node: "512"
+  max_request: "1000"  # n_ranks/n_nodes cannot exceed this
+  n_ranks: '1000001'  # placeholder value
+  n_nodes: '1000001'  # placeholder value
+  batch_submit: "placeholder"
+  mpi_command: "placeholder"
 
diff --git a/configs/CSCS-Daint-HPECray-haswell-P100-Infiniband/variables.yaml b/configs/CSCS-Daint-HPECray-haswell-P100-Infiniband/variables.yaml
@@ -4,12 +4,17 @@
 # SPDX-License-Identifier: Apache-2.0
 
 variables:
-  batch_time: '02:00'
-  mpi_command: 'srun -N {n_nodes} -n {n_ranks}'
-  batch_submit: 'sbatch {execute_experiment}'
-  batch_nodes: '#SBATCH -N {n_nodes}'
-  batch_ranks: '#SBATCH -n {n_ranks}'
-  batch_timeout: '#SBATCH -t {batch_time}:00'
   default_cuda_version: '11.2.0'
   cuda_arch: '60'
   enable_mps: '/usr/tcetmp/bin/enable_mps'
+  timeout: '120'
+  scheduler: "slurm"
+  # This describes the XC50 compute nodes: https://www.cscs.ch/computers/piz-daint
+  sys_cpus_per_node: "12"
+  sys_gpus_per_node: "1"
+  sys_mem_per_node: "64"
+  max_request: "1000"  # n_ranks/n_nodes cannot exceed this
+  n_ranks: '1000001'  # placeholder value
+  n_nodes: '1000001'  # placeholder value
+  batch_submit: "placeholder"
+  mpi_command: "placeholder"
diff --git a/configs/CSCS-Eiger-HPECray-zen2-Slingshot/variables.yaml b/configs/CSCS-Eiger-HPECray-zen2-Slingshot/variables.yaml
@@ -4,9 +4,13 @@
 # SPDX-License-Identifier: Apache-2.0
 
 variables:
-  batch_time: '00:30'
-  mpi_command: 'srun -N {n_nodes} -n {n_ranks}'
-  batch_submit: 'sbatch {execute_experiment}'
-  batch_nodes: '#SBATCH -N {n_nodes}'
-  batch_ranks: '#SBATCH -n {n_ranks}'
-  batch_timeout: '#SBATCH -t {batch_time}:00'
+  timeout: '30'
+  scheduler: "slurm"
+  sys_cpus_per_node: "128"
+  # sys_gpus_per_node unset
+  # sys_mem_per_node unset
+  max_request: "1000"  # n_ranks/n_nodes cannot exceed this
+  n_ranks: '1000001'  # placeholder value
+  n_nodes: '1000001'  # placeholder value
+  batch_submit: "placeholder"
+  mpi_command: "placeholder"
diff --git a/configs/LLNL-Magma-Penguin-icelake-OmniPath/variables.yaml b/configs/LLNL-Magma-Penguin-icelake-OmniPath/variables.yaml
@@ -4,9 +4,11 @@
 # SPDX-License-Identifier: Apache-2.0
 
 variables:
-  batch_time: '02:00'
-  mpi_command: 'srun -N {n_nodes} -n {n_ranks}'
-  batch_submit: 'sbatch {execute_experiment}'
-  batch_nodes: '#SBATCH -N {n_nodes}'
-  batch_ranks: '#SBATCH -n {n_ranks}'
-  batch_timeout: '#SBATCH -t {batch_time}:00'
+  timeout: "120"
+  scheduler: "slurm"
+  sys_cpus_per_node: "96"
+  max_request: "1000"  # n_ranks/n_nodes cannot exceed this
+  n_ranks: '1000001'  # placeholder value
+  n_nodes: '1000001'  # placeholder value
+  batch_submit: "placeholder"
+  mpi_command: "placeholder"
diff --git a/configs/LLNL-Pascal-Penguin-broadwell-P100-OmniPath/variables.yaml b/configs/LLNL-Pascal-Penguin-broadwell-P100-OmniPath/variables.yaml
@@ -7,9 +7,12 @@ variables:
   gtl_flag: ''  # to be overwritten by tests that need GTL
   cuda_arch: '60'
   default_cuda_version: '11.8.0'
-  batch_time: '02:00'
-  mpi_command: 'srun -N {n_nodes} -n {n_ranks}'
-  batch_submit: 'sbatch {execute_experiment}'
-  batch_nodes: '#SBATCH -N {n_nodes}'
-  batch_ranks: '#SBATCH -n {n_ranks} -G {n_ranks}'
-  batch_timeout: '#SBATCH -t {batch_time}:00'
+  timeout: "120"
+  scheduler: "slurm"
+  sys_cpus_per_node: "36"
+  sys_gpus_per_node: "2"
+  max_request: "1000"  # n_ranks/n_nodes cannot exceed this
+  n_ranks: '1000001'  # placeholder value
+  n_nodes: '1000001'  # placeholder value
+  batch_submit: "placeholder"
+  mpi_command: "placeholder"
diff --git a/configs/LLNL-Sierra-IBM-power9-V100-Infiniband/variables.yaml b/configs/LLNL-Sierra-IBM-power9-V100-Infiniband/variables.yaml
@@ -5,11 +5,15 @@
 
 variables:
   gtl_flag: ''  # to be overwritten by tests that need GTL
-  batch_time: '02:00'
-  mpi_command: '/usr/tcetmp/bin/lrun -n {n_ranks} -T {processes_per_node} {gtl_flag}'
-  batch_submit: 'bsub -q pdebug {execute_experiment}'
-  batch_nodes: '#BSUB -nnodes {n_nodes}'
-  batch_ranks: ''
-  batch_timeout: '#BSUB -W {batch_time}'
   default_cuda_version: '11.8.0'
   cuda_arch: '70'
+  timeout: "120"
+  scheduler: "lsf"
+  queue: "pdebug"
+  sys_cpus_per_node: "44"
+  sys_gpus_per_node: "4"
+  max_request: "1000"  # n_ranks/n_nodes cannot exceed this
+  n_ranks: '1000001'  # placeholder value
+  n_nodes: '1000001'  # placeholder value
+  batch_submit: "placeholder"
+  mpi_command: "placeholder"
diff --git a/configs/LLNL-Tioga-HPECray-zen3-MI250X-Slingshot/variables.yaml b/configs/LLNL-Tioga-HPECray-zen3-MI250X-Slingshot/variables.yaml
@@ -6,9 +6,12 @@
 variables:
   gtl_flag: ''  # to be overwritten by tests that need GTL
   rocm_arch: 'gfx90a'
-  batch_time: '120m'
-  mpi_command: 'flux run -N {n_nodes} -n {n_ranks}'
-  batch_submit: 'flux batch {execute_experiment}'
-  batch_nodes: '# flux: -N {n_nodes}'
-  batch_ranks: '# flux: -n {n_ranks}'
-  batch_timeout: '# flux: -t {batch_time}'
+  timeout: "120"
+  scheduler: "flux"
+  sys_cpus_per_node: "64"
+  sys_gpus_per_node: "4"
+  max_request: "1000"  # n_ranks/n_nodes cannot exceed this
+  n_ranks: '1000001'  # placeholder value
+  n_nodes: '1000001'  # placeholder value
+  batch_submit: "placeholder"
+  mpi_command: "placeholder"
diff --git a/configs/RCCS-Fugaku-Fujitsu-A64FX-TofuD/variables.yaml b/configs/RCCS-Fugaku-Fujitsu-A64FX-TofuD/variables.yaml
@@ -4,13 +4,15 @@
 # SPDX-License-Identifier: Apache-2.0
 
 variables:
-  batch_time: '02:00'
-  mpi_command: 'mpiexec'
-  batch_submit: 'pjsub {execute_experiment}'
-  batch_nodes: '#PJM -L "node={n_nodes}"'
-  batch_ranks: '#PJM --mpi proc={n_ranks}'
-  batch_timeout: '#PJM -L "elapse={batch_time}:00" -x PJM_LLIO_GFSCACHE="/vol0001:/vol0002:/vol0003:/vol0004:/vol0005:/vol0006"'
   default_fj_version: '4.8.1'
   default_llvm_version: '15.0.3'
   default_gnu_version: '12.2.0'
-
+  timeout: "120"
+  scheduler: "pjm"
+  sys_cpus_per_node: "48"
+  sys_mem_per_node: "32"
+  max_request: "1000"  # n_ranks/n_nodes cannot exceed this
+  n_ranks: '1000001'  # placeholder value
+  n_nodes: '1000001'  # placeholder value
+  batch_submit: "placeholder"
+  mpi_command: "placeholder"
diff --git a/configs/nosite-AWS_PCluster_Hpc7a-zen4-EFA/variables.yaml b/configs/nosite-AWS_PCluster_Hpc7a-zen4-EFA/variables.yaml
@@ -4,9 +4,12 @@
 # SPDX-License-Identifier: Apache-2.0
 
 variables:
-  batch_time: '02:00'
-  mpi_command: 'srun -N {n_nodes} -n {n_ranks} --mpi=pmix --export=ALL,FI_EFA_USE_DEVICE_RDMA=1,FI_PROVIDER="efa",OMPI_MCA_mtl_base_verbose=100'
-  batch_submit: 'sbatch {execute_experiment}'
-  batch_nodes: '#SBATCH -N {n_nodes}'
-  batch_ranks: '#SBATCH -n {n_ranks}'
-  batch_timeout: '#SBATCH -t {batch_time}:00'
+  timeout: "120"
+  scheduler: "slurm"
+  sys_cpus_per_node: "1"
+  #  sys_gpus_per_node unset
+  max_request: "1000"  # n_ranks/n_nodes cannot exceed this
+  n_ranks: '1000001'  # placeholder value
+  n_nodes: '1000001'  # placeholder value
+  batch_submit: "placeholder"
+  mpi_command: "placeholder"
diff --git a/configs/nosite-HPECray-zen3-MI250X-Slingshot/variables.yaml b/configs/nosite-HPECray-zen3-MI250X-Slingshot/variables.yaml
@@ -6,9 +6,12 @@
 variables:
   gtl_flag: ''  # to be overwritten by tests that need GTL
   rocm_arch: 'gfx90a'
-  batch_time: '02:00'
-  mpi_command: 'srun -N {n_nodes} -n {n_ranks}'
-  batch_submit: 'sbatch {execute_experiment}'
-  batch_nodes: '#SBATCH -N {n_nodes}'
-  batch_ranks: '#SBATCH -n {n_ranks}'
-  batch_timeout: '#SBATCH -t {batch_time}:00'
+  timeout: "120"
+  scheduler: "slurm"
+  sys_cpus_per_node: "1"
+  #  sys_gpus_per_node unset
+  max_request: "1000"  # n_ranks/n_nodes cannot exceed this
+  n_ranks: '1000001'  # placeholder value
+  n_nodes: '1000001'  # placeholder value
+  batch_submit: "placeholder"
+  mpi_command: "placeholder"
diff --git a/configs/nosite-x86_64/variables.yaml b/configs/nosite-x86_64/variables.yaml
@@ -4,9 +4,11 @@
 # SPDX-License-Identifier: Apache-2.0
 
 variables:
-  batch_time: ''
-  mpi_command: 'mpirun -n {n_nodes} -c {n_ranks} --oversubscribe'
-  batch_submit: '{execute_experiment}'
-  batch_nodes: ''
-  batch_ranks: ''
-  batch_timeout: ''
+  scheduler: "mpi"
+  sys_cpus_per_node: "1"
+  #  sys_gpus_per_node unset
+  max_request: "1000"  # n_ranks/n_nodes cannot exceed this
+  n_ranks: '1000001'  # placeholder value
+  n_nodes: '1000001'  # placeholder value
+  batch_submit: "placeholder"
+  mpi_command: "placeholder"
diff --git a/experiments/amg2023/cuda/ramble.yaml b/experiments/amg2023/cuda/ramble.yaml
@@ -15,12 +15,14 @@ ramble:
       install: '--add --keep-stage'
       concretize: '-U -f'
 
+  modifiers:
+  - name: allocation
+
   applications:
     amg2023:
       workloads:
         problem1:
           variables:
-            n_ranks: '{processes_per_node} * {n_nodes}'
             p: 2
             px: '{p}'
             py: '{p}'
@@ -32,11 +34,10 @@ ramble:
             gtl: ['gtl', 'nogtl']
             gtlflag: ['-M"-gpu"', '']
           experiments:
-            amg2023_cuda_problem1_{gtl}_{n_nodes}_{px}_{py}_{pz}_{nx}_{ny}_{nz}:
+            amg2023_cuda_problem1_{gtl}_{px}_{py}_{pz}_{nx}_{ny}_{nz}:
               variables:
                 env_name: amg2023
-                processes_per_node: '4'
-                n_nodes: '2'
+                n_gpus: '8'
               zips:
                 gtl_info:
                 - gtl

diff --git a/experiments/amg2023/openmp/execute_experiment.tpl b/experiments/amg2023/openmp/execute_experiment.tpl
diff --git a/experiments/amg2023/openmp/ramble.yaml b/experiments/amg2023/openmp/ramble.yaml
@@ -15,15 +15,14 @@ ramble:
       install: '--add --keep-stage'
       concretize: '-U -f'
 
+  modifier:
+  - name: allocation
+
   applications:
     amg2023:
       workloads:
         problem1:
-          env_vars:
-            set:
-              OMP_NUM_THREADS: '{omp_num_threads}'
           variables:
-            n_ranks: '{processes_per_node} * {n_nodes}'
             p: 2
             px: '{p}'
             py: '{p}'
@@ -32,18 +31,17 @@ ramble:
             nx: '{n}'
             ny: '{n}'
             nz: '{n}'
-            processes_per_node: ['8', '4']
+            n_ranks_per_node: ['8', '4']
             n_nodes: ['1', '2']
-            threads_per_node_core: ['4', '6', '12']
-            omp_num_threads: '{threads_per_node_core} * {n_nodes}'
+            n_threads_per_proc: ['4', '6', '12']
           experiments:
             amg2023_omp_problem1_{n_nodes}_{omp_num_threads}_{px}_{py}_{pz}_{nx}_{ny}_{nz}:
               variables:
                 env_name: amg2023-omp
               matrices:
                 - size_threads:
                   - n
-                  - threads_per_node_core
+                  - n_threads_per_proc
   spack:
     concretized: true
     packages:

diff --git a/experiments/amg2023/rocm/execute_experiment.tpl b/experiments/amg2023/rocm/execute_experiment.tpl
diff --git a/experiments/amg2023/rocm/ramble.yaml b/experiments/amg2023/rocm/ramble.yaml
@@ -15,12 +15,14 @@ ramble:
       install: '--add --keep-stage'
       concretize: '-U -f'
 
+  modifiers:
+  - name: allocation
+
   applications:
     amg2023:
       workloads:
         problem1:
           variables:
-            n_ranks: '{processes_per_node} * {n_nodes}'
             p: 2
             px: '{p}'
             py: '{p}'
@@ -30,12 +32,11 @@ ramble:
             ny: '{n}'
             nz: '{n}'
           experiments:
-            '{env_name}_problem1_{n_nodes}_{px}_{py}_{pz}_{nx}_{ny}_{nz}':
+            '{env_name}_problem1_{px}_{py}_{pz}_{nx}_{ny}_{nz}':
               variables:
                 gtl: ["gtl", "no-gtl"]
                 env_name: 'amg2023-gpu-{gtl}'
-                processes_per_node: ['8', '4']
-                n_nodes: ['1', '2']
+                n_gpus: '8'
               matrices:
                 - size_gtl:
                   - n

diff --git a/experiments/gromacs/cuda/execute_experiment.tpl b/experiments/gromacs/cuda/execute_experiment.tpl