From 6874ce844a00ce5b6d045b96ba57997427428710 Mon Sep 17 00:00:00 2001 From: Michael Lueken <63728921+MichaelLueken@users.noreply.github.com> Date: Fri, 9 Feb 2024 15:36:44 -0500 Subject: [PATCH] [develop] Add -n 1 to allow the use of the service partition (#1012) Following the Slurm update on Hera and Jet, the service partition is no longer usable within the SRW App. The necessary changes to allow the service partition to once again function properly have been made, by adding -n 1 to the SCHED_NATIVE_CMD_HPSS variable in the Hera and Jet machine yaml files, and updating the native entry in the parm/wflow/verify_pre.yaml and parm/wflow/aqm_prep.yaml files. --- parm/wflow/aqm_prep.yaml | 1 + parm/wflow/verify_pre.yaml | 4 ++++ ush/machine/hera.yaml | 1 + ush/machine/jet.yaml | 1 + 4 files changed, 7 insertions(+) diff --git a/parm/wflow/aqm_prep.yaml b/parm/wflow/aqm_prep.yaml index 36d2c8af2a..6cfab161d7 100644 --- a/parm/wflow/aqm_prep.yaml +++ b/parm/wflow/aqm_prep.yaml @@ -23,6 +23,7 @@ default_aqm_task: &default_aqm task_nexus_gfs_sfc: <<: *default_aqm command: '&LOAD_MODULES_RUN_TASK_FP; "nexus_gfs_sfc" "&JOBSdir;/JREGIONAL_NEXUS_GFS_SFC"' + native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' join: !cycstr '&LOGDIR;/{{ jobname }}_@Y@m@d@H&LOGEXT;' memory: 2G diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index bc1ca11078..eb1a7eb796 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -30,6 +30,7 @@ task_get_obs_ccpa: OBS_DIR: '&CCPA_OBS_DIR;' OBTYPE: 'CCPA' FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' + native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" walltime: 00:45:00 @@ -42,6 +43,7 @@ task_get_obs_nohrsc: OBS_DIR: '&NOHRSC_OBS_DIR;' OBTYPE: 'NOHRSC' FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' + native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" walltime: 00:45:00 @@ -55,6 +57,7 @@ task_get_obs_mrms: OBTYPE: 'MRMS' VAR: 'REFC RETOP' FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' + native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" walltime: 00:45:00 @@ -68,6 +71,7 @@ task_get_obs_ndas: FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' command: '&LOAD_MODULES_RUN_TASK_FP; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' queue: "&QUEUE_HPSS;" + native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' walltime: 02:00:00 diff --git a/ush/machine/hera.yaml b/ush/machine/hera.yaml index 2ad41e36aa..8d751ae891 100644 --- a/ush/machine/hera.yaml +++ b/ush/machine/hera.yaml @@ -21,6 +21,7 @@ platform: RUN_CMD_NEXUS: srun -n ${nprocs} --export=ALL RUN_CMD_AQMLBC: srun --export=ALL -n ${NUMTS} SCHED_NATIVE_CMD: --export=NONE + SCHED_NATIVE_CMD_HPSS: -n 1 --export=NONE PRE_TASK_CMDS: '{ ulimit -s unlimited; ulimit -a; }' TEST_EXTRN_MDL_SOURCE_BASEDIR: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/input_model_data TEST_AQM_INPUT_BASEDIR: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/aqm_data diff --git a/ush/machine/jet.yaml b/ush/machine/jet.yaml index b5cb19cd0b..93d375ee02 100644 --- a/ush/machine/jet.yaml +++ b/ush/machine/jet.yaml @@ -19,6 +19,7 @@ platform: RUN_CMD_SERIAL: time RUN_CMD_UTILS: srun --export=ALL SCHED_NATIVE_CMD: --export=NONE + SCHED_NATIVE_CMD_HPSS: -n 1 --export=NONE PRE_TASK_CMDS: '{ ulimit -s unlimited; ulimit -a; }' TEST_EXTRN_MDL_SOURCE_BASEDIR: /mnt/lfs4/HFIP/hfv3gfs/role.epic/UFS_SRW_data/develop/input_model_data TEST_PREGEN_BASEDIR: /mnt/lfs4/HFIP/hfv3gfs/role.epic/UFS_SRW_data/develop/FV3LAM_pregen