Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[develop] Changes for Derecho, a new platform #894

Merged
merged 16 commits into from
Sep 19, 2023
Merged
3 changes: 3 additions & 0 deletions etc/lmod-setup.csh
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ else if ( "$L_MACHINE" == singularity ) then
else if ( "$L_MACHINE" == gaea ) then
source /lustre/f2/dev/role.epic/contrib/Lmod_init.csh

else if ( "$L_MACHINE" == derecho ) then
module reset

else if ( "$L_MACHINE" == odin ) then
module unload modules
unset -f module
Expand Down
5 changes: 4 additions & 1 deletion etc/lmod-setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Usage: source etc/lmod-setup.sh PLATFORM

OPTIONS:
PLATFORM - name of machine you are building on
(e.g. cheyenne | hera | jet | orion | wcoss2 )
(e.g. cheyenne | hera | jet | orion | wcoss2 )
EOF_USAGE
exit 1
else
Expand Down Expand Up @@ -47,6 +47,9 @@ elif [ "$L_MACHINE" = singularity ]; then
elif [ "$L_MACHINE" = gaea ]; then
source /lustre/f2/dev/role.epic/contrib/Lmod_init.sh

elif [ "$L_MACHINE" = derecho ]; then
module reset

elif [ "$L_MACHINE" = odin ]; then
module unload modules
unset -f module
Expand Down
35 changes: 35 additions & 0 deletions modulefiles/build_derecho_intel.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
help([[
This module loads libraries for building the UFS SRW App on
the CISL machine Derecho (Cray) using Intel-classic-2023.0.0
]])

whatis([===[Loads libraries needed for building the UFS SRW App on Cheyenne ]===])

load(pathJoin("cmake", os.getenv("cmake_ver") or "3.26.3"))
load(pathJoin("ncarenv", os.getenv("ncarenv_ver") or "23.06"))
load(pathJoin("craype", os.getenv("craype_ver") or "2.7.20"))

unload("netcdf")
unload("hdf5")
load(pathJoin("intel-classic", os.getenv("intel_classic_ver") or "2023.0.0"))
load(pathJoin("cray-mpich", os.getenv("cray_mpich_ver") or "8.1.25"))

prepend_path("MODULEPATH","/glade/work/epicufsrt/contrib/derecho/hpc-stack/intel-classic-2023.0.0/modulefiles/stack")
load(pathJoin("hpc", os.getenv("hpc_ver") or "1.2.0"))
load(pathJoin("hpc-intel-classic", os.getenv("hpc_intel_classic_ver") or "2023.0.0"))
load(pathJoin("hpc-cray-mpich", os.getenv("hpc_cray_mpich_ver") or "8.1.25"))

load(pathJoin("ncarcompilers", os.getenv("ncarcompilers_ver") or "1.0.0"))
load(pathJoin("mkl", os.getenv("mkl_ver") or "2023.0.0"))

load("srw_common")

setenv("CC","cc")
setenv("FC","ftn")
setenv("CXX","CC")

setenv("CMAKE_C_COMPILER","cc")
setenv("CMAKE_CXX_COMPILER","CC")
setenv("CMAKE_Fortran_COMPILER","ftn")
setenv("CMAKE_Platform","derecho.intel")

3 changes: 3 additions & 0 deletions modulefiles/tasks/derecho/aqm_ics.local.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
load(pathJoin("cmake", os.getenv("cmake_ver") or "3.22.0"))
load("nco/4.9.5")
load("miniconda_regional_workflow_cmaq")
3 changes: 3 additions & 0 deletions modulefiles/tasks/derecho/aqm_lbcs.local.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
load(pathJoin("cmake", os.getenv("cmake_ver") or "3.22.0"))
load("nco/4.9.5")
load("miniconda_regional_workflow_cmaq")
2 changes: 2 additions & 0 deletions modulefiles/tasks/derecho/fire_emission.local.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
load("ncarenv")
load("miniconda_regional_workflow_cmaq")
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
unload("python")
load("conda")
prepend_path("MODULEPATH","/glade/work/epicufsrt/contrib/derecho/miniconda3/modulefiles")
load(pathJoin("miniconda3", os.getenv("miniconda3_ver") or "4.12.0"))

setenv("SRW_ENV", "regional_workflow_cmaq")
4 changes: 4 additions & 0 deletions modulefiles/tasks/derecho/nexus_emission.local.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
load("nco/5.0.6")

load("ncarenv")
load("miniconda_regional_workflow_cmaq")
2 changes: 2 additions & 0 deletions modulefiles/tasks/derecho/nexus_gfs_sfc.local.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
load("ncarenv")
load("miniconda_regional_workflow_cmaq")
3 changes: 3 additions & 0 deletions modulefiles/tasks/derecho/nexus_post_split.local.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
load(pathJoin("nco", os.getenv("nco_ver") or "5.0.6"))
load("ncarenv")
load("miniconda_regional_workflow_cmaq")
5 changes: 5 additions & 0 deletions modulefiles/tasks/derecho/plot_allvars.local.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
unload("python")
prepend_path("MODULEPATH","/glade/work/epicufsrt/contrib/miniconda3/modulefiles")
load(pathJoin("miniconda3", os.getenv("miniconda3_ver") or "4.12.0"))

setenv("SRW_ENV", "regional_workflow")
2 changes: 2 additions & 0 deletions modulefiles/tasks/derecho/point_source.local.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
load("ncarenv")
load("miniconda_regional_workflow_cmaq")
2 changes: 2 additions & 0 deletions modulefiles/tasks/derecho/pre_post_stat.local.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
load("nco/4.9.5")
load("miniconda_regional_workflow_cmaq")
5 changes: 5 additions & 0 deletions modulefiles/tasks/derecho/python_srw.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
unload("python")
prepend_path("MODULEPATH","/glade/work/epicufsrt/contrib/derecho/miniconda3/modulefiles")
load(pathJoin("miniconda3", os.getenv("miniconda3_ver") or "4.12.0"))

setenv("SRW_ENV", "workflow_tools")
6 changes: 6 additions & 0 deletions modulefiles/tasks/derecho/run_vx.local.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
--[[
Compiler-specific modules are used for met and metplus libraries
--]]
load(pathJoin("met", os.getenv("met_ver") or "10.1.2"))
load(pathJoin("metplus", os.getenv("metplus_ver") or "4.1.3"))
load("python_srw")
24 changes: 24 additions & 0 deletions modulefiles/wflow_derecho.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
help([[
This module loads python environement for running the UFS SRW App on
on the CISL machine Derecho (Cray)
]])

whatis([===[Loads libraries for running the UFS SRW Workflow on Derecho ]===])

load("ncarenv")

append_path("MODULEPATH","/glade/work/epicufsrt/contrib/derecho/rocoto/modulefiles")
load("rocoto")

unload("python")

load("set_pythonpath")
prepend_path("MODULEPATH","/glade/work/epicufsrt/contrib/derecho/miniconda3/modulefiles")
load(pathJoin("miniconda3", os.getenv("miniconda3_ver") or "4.12.0"))

if mode() == "load" then
LmodMsgRaw([===[Please do the following to activate conda:
> conda activate workflow_tools
]===])
end

1 change: 0 additions & 1 deletion parm/wflow/aqm_post.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ default_aqm_task: &default_aqm
native: '{{ platform.SCHED_NATIVE_CMD }}'
nnodes: 1
nodes: '{{ nnodes }}:ppn={{ ppn }}'
nodesize: "&NCORES_PER_NODE;"
partition: '{% if platform.get("PARTITION_DEFAULT") %}&PARTITION_DEFAULT;{% else %}None{% endif %}'
ppn: 1
queue: '&QUEUE_DEFAULT;'
Expand Down
1 change: 0 additions & 1 deletion parm/wflow/aqm_prep.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ default_aqm_task: &default_aqm
native: '{{ platform.SCHED_NATIVE_CMD }}'
nnodes: 1
nodes: '{{ nnodes }}:ppn={{ ppn }}'
nodesize: "&NCORES_PER_NODE;"
partition: '{% if platform.get("PARTITION_DEFAULT") %}&PARTITION_DEFAULT;{% else %}None{% endif %}'
ppn: 1
queue: '&QUEUE_DEFAULT;'
Expand Down
2 changes: 0 additions & 2 deletions parm/wflow/coldstart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ default_task: &default_task
ENSMEM_INDX: '#mem#'
native: '{{ platform.SCHED_NATIVE_CMD }}'
nodes: '{{ nnodes }}:ppn={{ ppn }}'
nodesize: "&NCORES_PER_NODE;"
partition: '{% if platform.get("PARTITION_DEFAULT") %}&PARTITION_DEFAULT;{% else %}None{% endif %}'
queue: '&QUEUE_DEFAULT;'
walltime: 00:30:00
Expand Down Expand Up @@ -149,7 +148,6 @@ metatask_run_ensemble:
SLASH_ENSMEM_SUBDIR: '&SLASH_ENSMEM_SUBDIR;'
nprocs:
join: !cycstr '&LOGDIR;/{{ jobname }}_@Y@m@d@H&LOGEXT;'
nodesize: '&NCORES_PER_NODE;'
nnodes: '{{ task_run_fcst.NNODES_RUN_FCST // 1 }}'
partition: '{% if platform.get("PARTITION_FCST") %}&PARTITION_FCST;{% else %}None{% endif %}'
ppn: '{{ task_run_fcst.PPN_RUN_FCST // 1 }}'
Expand Down
1 change: 0 additions & 1 deletion parm/wflow/plot.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ default_task_plot: &default_task
native: '{{ platform.SCHED_NATIVE_CMD }}'
nnodes: 1
nodes: '{{ nnodes }}:ppn={{ ppn }}'
nodesize: "&NCORES_PER_NODE;"
partition: '{% if platform.get("PARTITION_DEFAULT") %}&PARTITION_DEFAULT;{% else %}None{% endif %}'
ppn: 24
queue: '&QUEUE_DEFAULT;'
Expand Down
1 change: 0 additions & 1 deletion parm/wflow/post.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ default_task_post: &default_task
nodes: '{{ nnodes }}:ppn={{ ppn }}'
nnodes: 2
ppn: 24
nodesize: "&NCORES_PER_NODE;"
partition: '{% if platform.get("PARTITION_DEFAULT") %}&PARTITION_DEFAULT;{% else %}None{% endif %}'
queue: '&QUEUE_DEFAULT;'
walltime: 00:15:00
Expand Down
1 change: 0 additions & 1 deletion parm/wflow/prdgen.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ metatask_run_prdgen:
join: !cycstr '&LOGDIR;/{{ jobname }}_@Y@m@d@H&LOGEXT;'
native: '{{ platform.SCHED_NATIVE_CMD }}'
nodes: '{{ nnodes }}:ppn={{ ppn }}'
nodesize: "&NCORES_PER_NODE;"
nnodes: 1
partition: '{% if platform.get("PARTITION_DEFAULT") %}&PARTITION_DEFAULT;{% else %}None{% endif %}'
ppn: 22
Expand Down
1 change: 0 additions & 1 deletion parm/wflow/prep.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ default_task_prep: &default_task
native: '{{ platform.SCHED_NATIVE_CMD }}'
nodes: '{{ nnodes }}:ppn={{ ppn }}'
nnodes: 1
nodesize: "&NCORES_PER_NODE;"
ppn: 24
partition: '{% if platform.get("PARTITION_DEFAULT") %}&PARTITION_DEFAULT;{% else %}None{% endif %}'
queue: '&QUEUE_DEFAULT;'
Expand Down
1 change: 0 additions & 1 deletion parm/wflow/verify_det.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ default_task_verify_det: &default_task_verify_det
native: '{{ platform.SCHED_NATIVE_CMD }}'
nnodes: 1
nodes: '{{ nnodes }}:ppn={{ ppn }}'
nodesize: '&NCORES_PER_NODE;'
partition: '{% if platform.get("PARTITION_DEFAULT") %}&PARTITION_DEFAULT;{% else %}None{% endif %}'
ppn: 1
queue: '&QUEUE_DEFAULT;'
Expand Down
1 change: 0 additions & 1 deletion parm/wflow/verify_ens.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ default_task_verify_ens: &default_task_verify_ens
native: '{{ platform.SCHED_NATIVE_CMD }}'
nnodes: 1
nodes: '{{ nnodes }}:ppn={{ ppn }}'
nodesize: '&NCORES_PER_NODE;'
partition: '{% if platform.get("PARTITION_DEFAULT") %}&PARTITION_DEFAULT;{% else %}None{% endif %}'
ppn: 1
queue: '&QUEUE_DEFAULT;'
Expand Down
1 change: 0 additions & 1 deletion parm/wflow/verify_pre.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ default_task_verify_pre: &default_task_verify_pre
native: '{{ platform.SCHED_NATIVE_CMD }}'
nnodes: 1
nodes: '{{ nnodes }}:ppn={{ ppn }}'
nodesize: '&NCORES_PER_NODE;'
partition: '{% if platform.get("PARTITION_DEFAULT") %}&PARTITION_DEFAULT;{% else %}None{% endif %}'
ppn: 1
queue: '&QUEUE_DEFAULT;'
Expand Down
6 changes: 3 additions & 3 deletions tests/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ for the current code in the users ufs-srweather-app directory. It consists of t

Currently, the following configurations are supported:

Machine | Cheyenne | Hera | Jet | Orion | wcoss2 |
------------| ------------|--------|--------|--------|---------|
Compiler(s) | Intel, GNU | Intel | Intel | Intel | Intel |
Machine | Derecho | Cheyenne | Hera | Jet | Orion | wcoss2 |
------------|---------|-------------|--------|--------|--------|---------|
Compiler(s) | Intel | Intel, GNU | Intel | Intel | Intel | Intel |

The CMake build is done in the ``build_${compiler}`` directory.
The executables for each build are installed under the ``bin_${compiler}`` directory.
Expand Down
1 change: 1 addition & 0 deletions tests/WE2E/machine_suites/comprehensive.derecho
9 changes: 9 additions & 0 deletions tests/WE2E/machine_suites/coverage.derecho
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
custom_ESGgrid_IndianOcean_6km
grid_RRFS_CONUS_13km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16_plot
grid_RRFS_CONUS_25km_ics_NAM_lbcs_NAM_suite_GFS_v16
grid_RRFS_CONUScompact_13km_ics_HRRR_lbcs_RAP_suite_HRRR
grid_RRFS_CONUScompact_25km_ics_HRRR_lbcs_RAP_suite_RRFS_v1beta
grid_SUBCONUS_Ind_3km_ics_HRRR_lbcs_HRRR_suite_HRRR
nco_grid_RRFS_CONUS_25km_ics_FV3GFS_lbcs_FV3GFS_timeoffset_suite_GFS_v16
pregen_grid_orog_sfc_climo
specify_template_filenames
2 changes: 1 addition & 1 deletion tests/WE2E/setup_WE2E_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ function usage {

}

machines=( hera jet cheyenne orion wcoss2 gaea odin singularity macos noaacloud )
machines=( hera jet cheyenne derecho orion wcoss2 gaea odin singularity macos noaacloud )

if [ "$1" = "-h" ] ; then usage ; fi
[[ $# -le 2 ]] && usage
Expand Down
2 changes: 1 addition & 1 deletion tests/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ function usage() {
exit 1
}

machines=( hera jet cheyenne orion wcoss2 gaea odin singularity macos noaacloud )
machines=( hera jet cheyenne derecho orion wcoss2 gaea odin singularity macos noaacloud )

[[ $# -gt 4 ]] && usage

Expand Down
2 changes: 1 addition & 1 deletion ush/get_crontab_contents.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def get_crontab_contents(called_from_cron, machine, debug):
# themselves being called as cron jobs. In that case, we must instead
# call the system version of crontab at /usr/bin/crontab.
#
if machine == "CHEYENNE":
if machine == "CHEYENNE" or machine == "DERECHO":
if called_from_cron:
crontab_cmd = "/usr/bin/crontab"
else:
Expand Down
43 changes: 43 additions & 0 deletions ush/machine/derecho.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
platform:
WORKFLOW_MANAGER: rocoto
NCORES_PER_NODE: 128
SCHED: pbspro
TEST_CCPA_OBS_DIR: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/obs_data/ccpa/proc
TEST_MRMS_OBS_DIR: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/obs_data/mrms/proc
TEST_NDAS_OBS_DIR: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/obs_data/ndas/proc
DOMAIN_PREGEN_BASEDIR: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/FV3LAM_pregen
QUEUE_DEFAULT: main
QUEUE_FCST: main
QUEUE_HPSS: main
RUN_CMD_FCST: mpiexec -n ${PE_MEMBER01}
RUN_CMD_POST: mpiexec -n $nprocs
RUN_CMD_PRDGEN: mpiexec -n $nprocs
RUN_CMD_SERIAL: time
RUN_CMD_UTILS: mpiexec -n $nprocs
RUN_CMD_NEXUS: mpiexec -n $nprocs
RUN_CMD_AQMLBC: mpiexec -n ${NUMTS}
PRE_TASK_CMDS: '{ ulimit -s unlimited; ulimit -a; }'
TEST_EXTRN_MDL_SOURCE_BASEDIR: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/input_model_data
TEST_AQM_INPUT_BASEDIR: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/aqm_data
TEST_PREGEN_BASEDIR: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/FV3LAM_pregen
TEST_ALT_EXTRN_MDL_SYSBASEDIR_ICS: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/dummy_FV3GFS_sys_dir
TEST_ALT_EXTRN_MDL_SYSBASEDIR_LBCS: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/dummy_FV3GFS_sys_dir
TEST_VX_FCST_INPUT_BASEDIR: '{{ "/glade/work/epicufsrt/contrib/UFS_SRW_data/develop/output_data/fcst_" }}{{ "ens" if (global.NUM_ENS_MEMBERS > 0) else "det" }}{{ "/{{workflow.PREDEF_GRID_NAME}}" }}{% raw %}{% endraw %}'
FIXaer: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/fix/fix_aer
FIXgsi: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/fix/fix_gsi
FIXgsm: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/fix/fix_am
FIXlut: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/fix/fix_lut
FIXorg: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/fix/fix_orog
FIXsfc: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/fix/fix_sfc_climo
FIXshp: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/NaturalEarth
EXTRN_MDL_DATA_STORES: aws
data:
ics_lbcs:
FV3GFS:
nemsio: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/input_model_data/FV3GFS/nemsio/${yyyymmdd}${hh}
grib2: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/input_model_data/FV3GFS/grib2/${yyyymmdd}${hh}
netcdf: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/input_model_data/FV3GFS/netcdf/${yyyymmdd}${hh}
NAM: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/input_model_data/NAM/${yyyymmdd}${hh}
HRRR: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/input_model_data/HRRR/${yyyymmdd}${hh}
RAP: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/input_model_data/RAP/${yyyymmdd}${hh}
GSMGFS: /glade/work/epicufsrt/contrib/UFS_SRW_data/develop/input_model_data/GSMGFS/${yyyymmdd}${hh}
2 changes: 1 addition & 1 deletion ush/valid_param_vals.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
valid_vals_RUN_ENVIR: ["nco", "community"]
valid_vals_VERBOSE: [True, False]
valid_vals_DEBUG: [True, False]
valid_vals_MACHINE: ["HERA", "WCOSS2", "ORION", "JET", "ODIN", "CHEYENNE", "STAMPEDE", "LINUX", "MACOS", "NOAACLOUD", "SINGULARITY", "GAEA"]
valid_vals_MACHINE: ["HERA", "WCOSS2", "ORION", "JET", "ODIN", "CHEYENNE", "DERECHO", "STAMPEDE", "LINUX", "MACOS", "NOAACLOUD", "SINGULARITY", "GAEA"]
valid_vals_SCHED: ["slurm", "pbspro", "lsf", "lsfcray", "none"]
valid_vals_FCST_MODEL: ["ufs-weather-model"]
valid_vals_WORKFLOW_MANAGER: ["rocoto", "ecflow", "none"]
Expand Down
Loading