diff --git a/.github/workflows/test-software.eessi.io.yml b/.github/workflows/test-software.eessi.io.yml index d4d980901f..6f592cf4c4 100644 --- a/.github/workflows/test-software.eessi.io.yml +++ b/.github/workflows/test-software.eessi.io.yml @@ -7,6 +7,12 @@ on: workflow_dispatch: permissions: contents: read # to fetch code (actions/checkout) +env: + EESSI_ACCELERATOR_TARGETS: | + x86_64/amd/zen2: + - nvidia/cc80 + x86_64/amd/zen3: + - nvidia/cc80 jobs: check_missing: runs-on: ubuntu-latest @@ -21,6 +27,7 @@ jobs: - aarch64/neoverse_v1 - x86_64/amd/zen2 - x86_64/amd/zen3 + - x86_64/amd/zen4 - x86_64/intel/haswell - x86_64/intel/skylake_avx512 - x86_64/generic @@ -48,14 +55,43 @@ jobs: export EESSI_PREFIX=/cvmfs/software.eessi.io/versions/${{matrix.EESSI_VERSION}} export EESSI_OS_TYPE=linux env | grep ^EESSI | sort + + # first check the CPU-only builds for this CPU target echo "just run check_missing_installations.sh (should use easystacks/software.eessi.io/${{matrix.EESSI_VERSION}}/eessi-${{matrix.EESSI_VERSION}}-*.yml)" for easystack_file in $(ls easystacks/software.eessi.io/${{matrix.EESSI_VERSION}}/eessi-${{matrix.EESSI_VERSION}}-eb-*.yml); do + if [ ${{matrix.EESSI_SOFTWARE_SUBDIR_OVERRIDE}} = "x86_64/amd/zen4" ]; then + if grep -q 2022b <<<"${easystack_file}"; then + # skip the check of installed software on zen4 for foss/2022b builds + continue + elif grep -q CUDA <<<"${easystack_file}"; then + # skip the check of install CUDA software in the CPU path for zen4 + continue + fi + fi echo "check missing installations for ${easystack_file}..." ./check_missing_installations.sh ${easystack_file} ec=$? if [[ ${ec} -ne 0 ]]; then echo "missing installations found for ${easystack_file}!" >&2; exit ${ec}; fi done + # now check the accelerator builds for this CPU target + accelerators=$(echo "${EESSI_ACCELERATOR_TARGETS}" | yq ".${EESSI_SOFTWARE_SUBDIR_OVERRIDE}[]") + if [ -z ${accelerators} ]; then + echo "no accelerator targets defined for ${EESSI_SOFTWARE_SUBDIR_OVERRIDE}" + else + for accel in ${accelerators}; do + module use ${EESSI_SOFTWARE_PATH}/accel/${accel}/modules/all + echo "checking missing installations for accelerator ${accel} using modulepath: ${MODULEPATH}" + for easystack_file in $(ls easystacks/software.eessi.io/${{matrix.EESSI_VERSION}}/accel/$(dirname ${accel})/eessi-${{matrix.EESSI_VERSION}}-eb-*.yml); do + echo "check missing installations for ${easystack_file}..." + ./check_missing_installations.sh ${easystack_file} + ec=$? + if [[ ${ec} -ne 0 ]]; then echo "missing installations found for ${easystack_file}!" >&2; exit ${ec}; fi + done + module unuse ${EESSI_SOFTWARE_PATH}/accel/${accel}/modules/all + done + fi + - name: Test check_missing_installations.sh with missing package (GCC/8.3.0) run: | export EESSI_SOFTWARE_SUBDIR_OVERRIDE=${{matrix.EESSI_SOFTWARE_SUBDIR_OVERRIDE}} diff --git a/.github/workflows/tests_archdetect_nvidia_gpu.yml b/.github/workflows/tests_archdetect_nvidia_gpu.yml new file mode 100644 index 0000000000..8ad5f4fb36 --- /dev/null +++ b/.github/workflows/tests_archdetect_nvidia_gpu.yml @@ -0,0 +1,124 @@ +# documentation: https://help.github.com/en/articles/workflow-syntax-for-github-actions +name: Tests for accelerator detection (NVIDIA GPU) +on: + push: + pull_request: +permissions: + contents: read # to fetch code (actions/checkout) +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + fake_nvidia_smi_script: + - none # no nvidia-smi command + - no_devices # nvidia-smi command works, but no GPUs available + - 1xa100 # cc80, supported with (atleast) zen2 CPU + - 2xa100 # cc80, supported with (atleast) zen2 CPU + - 4xa100 # cc80, supported with (atleast) zen2 CPU + - cc01 # non-existing GPU + fail-fast: false + steps: + - name: checkout + uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0 + + # we deliberately do not use the eessi/github-action-eessi action, + # because we want to control when the EESSI environment is initialized + - name: Mount EESSI CernVM-FS repository + uses: cvmfs-contrib/github-action-cvmfs@55899ca74cf78ab874bdf47f5a804e47c198743c # v4.0 + with: + cvmfs_config_package: https://github.com/EESSI/filesystem-layer/releases/download/latest/cvmfs-config-eessi_latest_all.deb + cvmfs_http_proxy: DIRECT + cvmfs_repositories: software.eessi.io + + - name: test accelerator detection + run: | + export EESSI_SOFTWARE_SUBDIR_OVERRIDE='x86_64/amd/zen2' + + # put fake nvidia-smi command in place (unless we don't want to) + if [[ "${{matrix.fake_nvidia_smi_script}}" != "none" ]]; then + tmpdir=$(mktemp -d) + ln -s $PWD/tests/archdetect/nvidia-smi/${{matrix.fake_nvidia_smi_script}}.sh $tmpdir/nvidia-smi + export PATH=$tmpdir:$PATH + fi + + # first run with debugging enabled, just to show the output + ./init/eessi_archdetect.sh -d accelpath || echo "non-zero exit code: $?" + + # verify output (or exit code if non-zero) + out=$(./init/eessi_archdetect.sh accelpath || echo "non-zero exit code: $?") + + if [[ $out == "$( cat ./tests/archdetect/nvidia-smi/${{matrix.fake_nvidia_smi_script}}.output )" ]]; then + + echo "Test for '${{matrix.fake_nvidia_smi_script}}' PASSED: '$out'" + + # run full EESSI init script, which pick up on the accelerator (if available) + echo + . init/bash 2>&1 | tee init.out + echo "-----------------------------------------------------------------------------" + + if [[ "${{matrix.fake_nvidia_smi_script}}" == "none" ]] || [[ "${{matrix.fake_nvidia_smi_script}}" == "no_devices" ]]; then + + pattern="archdetect could not detect any accelerators" + echo ">>> checking for pattern '${pattern}' in init output..." + grep "${pattern}" init.out || (echo "FAILED 1" || exit 1) + + pattern="archdetect found supported accelerator" + echo ">>> checking for lack of pattern '${pattern}' in init output..." + match=$(grep "${pattern}" init.out || true) + test "x${match}" = "x" || (echo "unexpected match found for '${pattern}' in init output" && exit 1) + + pattern="Prepending /cvmfs/software.eessi.io/versions/2023.06/software/linux/.*/accel/.*/modules/all to \$MODULEPATH" + echo ">>> checking for lack of pattern '${pattern}' in init output..." + match=$(grep "${pattern}" init.out || true) + test "x${match}" = "x" || (echo "unexpected match found for '${pattern}' in init output" && exit 1) + + elif [[ "${{matrix.fake_nvidia_smi_script}}" == "cc01" ]]; then + + pattern="No matching path found in x86_64/amd/zen2 for accelerator detected by archdetect (accel/nvidia/cc01)" + echo ">>> checking for pattern '${pattern}' in init output..." + grep "${pattern}" init.out || (echo "FAILED 1" || exit 1) + + pattern="Prepending /cvmfs/software.eessi.io/versions/2023.06/software/linux/.*/accel/.*/modules/all to \$MODULEPATH" + echo ">>> checking for lack of pattern '${pattern}' in init output..." + match=$(grep "${pattern}" init.out || true) + test "x${match}" = "x" || (echo "unexpected match found for '${pattern}' in init output" && exit 1) + + else + echo ">>> checking for 'accel/nvidia/cc80' in init output..." + grep "archdetect found supported accelerator for CPU target x86_64/amd/zen2: accel/nvidia/cc80" init.out || (echo "FAILED 2" && exit 1) + grep "Prepending /cvmfs/software.eessi.io/versions/2023.06/software/linux/x86_64/amd/zen2/accel/nvidia/cc80/modules/all to \$MODULEPATH" init.out || (echo "FAILED 3" && exit 1) + fi + + echo ">>> checking last line of init output..." + tail -1 init.out | grep "Environment set up to use EESSI (2023.06), have fun!" || (echo "FAILED, full init utput:" && cat init.out && exit 1) + + echo "All checks on init output PASSED" + else + echo "Test for '${{matrix.fake_nvidia_smi_script}}' FAILED: '$out'" >&2 + exit 1 + fi + + - name: test accelerator detection under $EESSI_ACCEL_SOFTWARE_SUBDIR_OVERRIDE + $EESSI_ACCELERATOR_TARGET_OVERRIDE + run: | + export EESSI_SOFTWARE_SUBDIR_OVERRIDE='x86_64/amd/zen2' + export EESSI_ACCEL_SOFTWARE_SUBDIR_OVERRIDE='x86_64/amd/zen3' + export EESSI_ACCELERATOR_TARGET_OVERRIDE='accel/nvidia/cc80' + + # first run with debugging enabled, just to show the output + ./init/eessi_archdetect.sh -d accelpath || echo "non-zero exit code: $?" + + # verify output (or exit code if non-zero) + out=$(./init/eessi_archdetect.sh accelpath || echo "non-zero exit code: $?") + + echo + . init/bash 2>&1 | tee init.out + echo "-----------------------------------------------------------------------------" + + echo ">>> checking for 'accel/nvidia/cc80' in init output..." + grep "archdetect found supported accelerator for CPU target x86_64/amd/zen3: accel/nvidia/cc80" init.out || (echo "FAILED 1" && exit 1) + grep "Using x86_64/amd/zen2 as software subdirectory" init.out || (echo "FAILED 2" && exit 1) + grep "Prepending /cvmfs/software.eessi.io/versions/2023.06/software/linux/x86_64/amd/zen2/modules/all to \$MODULEPATH" init.out || (echo "FAILED 3" && exit 1) + grep "Prepending /cvmfs/software.eessi.io/versions/2023.06/software/linux/x86_64/amd/zen3/accel/nvidia/cc80/modules/all to \$MODULEPATH" init.out || (echo "FAILED 4" && exit 1) + + echo "All checks on init output PASSED" diff --git a/.github/workflows/tests_eessi_module.yml b/.github/workflows/tests_eessi_module.yml index d2e3cd1338..cbcffe6385 100644 --- a/.github/workflows/tests_eessi_module.yml +++ b/.github/workflows/tests_eessi_module.yml @@ -72,7 +72,7 @@ jobs: module load EESSI/${{matrix.EESSI_VERSION}} env | grep -E '^(EESSI_S|EESSI_C)' | sort > "${moduleoutfile}" module unload EESSI/${{matrix.EESSI_VERSION}} - source /cvmfs/software.eessi.io/versions/${{matrix.EESSI_VERSION}}/init/bash + source ./init/bash env | grep -E '^(EESSI_S|EESSI_C)' | sort > "${sourceoutfile}" cat "${moduleoutfile}" cat "${sourceoutfile}" @@ -80,6 +80,7 @@ jobs: echo "Test for checking env variables PASSED" else echo "Test for checking env variables FAILED" >&2 + diff "${moduleoutfile}" "${sourceoutfile}" exit 1 fi diff --git a/EESSI-extend-2023.06-easybuild.eb b/EESSI-extend-2023.06-easybuild.eb index b525ee462d..8e328c3ece 100644 --- a/EESSI-extend-2023.06-easybuild.eb +++ b/EESSI-extend-2023.06-easybuild.eb @@ -92,7 +92,7 @@ elseif (os.getenv("EESSI_SITE_INSTALL") ~= nil) then if ((os.getenv("EESSI_PROJECT_INSTALL") ~= nil) or (os.getenv("EESSI_USER_INSTALL") ~= nil)) then LmodError("You cannot use EESSI_SITE_INSTALL in combination with any other EESSI_*_INSTALL environment variables") end - easybuild_installpath = string.gsub(os.getenv("EESSI_SOFTWARE_PATH"), 'versions', 'host_injections') + easybuild_installpath = os.getenv("EESSI_SITE_SOFTWARE_PATH") else -- Deal with user and project installs project_install = os.getenv("EESSI_PROJECT_INSTALL") @@ -166,7 +166,7 @@ elseif (project_modulepath ~= nil) then end -- Make sure EasyBuild itself is loaded if not ( isloaded("EasyBuild") ) then - load("EasyBuild") + load(latest("EasyBuild")) end """ diff --git a/bot/check-test.sh b/bot/check-test.sh index 3b16e5c415..2731e75464 100755 --- a/bot/check-test.sh +++ b/bot/check-test.sh @@ -23,7 +23,6 @@ else [[ ${VERBOSE} -ne 0 ]] && echo " Slurm output file '"${job_out}"' NOT found" fi - # ReFrame prints e.g. #[----------] start processing checks #[ RUN ] GROMACS_EESSI %benchmark_info=HECBioSim/Crambin %nb_impl=cpu %scale=2_nodes %module_name=GROMACS/2021.3-foss-2021a /d597cff4 @snellius:rome+default @@ -76,8 +75,42 @@ fi if [[ ! -z ${grep_reframe_failed} ]]; then grep_reframe_result=${grep_reframe_failed} else - grep_reframe_result=${grep_reframe_success} + # Grep the entire output of ReFrame, so that we can report it in the foldable section of the test report + GP_success_full='(?s)\[----------\] start processing checks.*?\[==========\] Finished on [a-zA-Z0-9 ]*' + # Grab the full ReFrame report, than cut the irrelevant parts + # Note that the character limit for messages in github is around 65k, so cutting is important + grep_reframe_success_full=$( \ + grep -v "^>> searching for " ${job_dir}/${job_out} | \ + # Use -z + grep -Pzo "${GP_success_full}" | \ + # Replace null character with newline, to undo the -z option + sed 's/\x00/\n/g' | \ + # Remove the [ RUN ] lines from reframe, they are not very informative + grep -v -P '\[\s*RUN\s*]' | \ + # Remove the line '[----------] all spawned checks have finished' + grep -v '\[-*\]' | \ + # Remove the line '[==========] Finished on Mon Oct 7 21' + grep -v '\[=*\]' | \ + # Remove blank line(s) from the report + grep -v '^$' | \ + # Remove warnings about the local spawner not supporting memory requests + grep -v 'WARNING\: hooks\.req_memory_per_node does not support the scheduler you configured .local.*$' | \ + # Strip color coding characters + sed 's/\x1B\[[0-9;]*m//g' | \ + # Replace all newline characters with
+ sed ':a;N;$!ba;s/\n//g' | \ + # Replace % with %%. Use \%\% to interpret both %% as (non-special) characters + sed 's/\%/\%\%/g' \ + ) + # TODO (optional): we could impose a character limit here, and truncate if too long + # (though we should do that before inserting the
statements). + # If we do, we should probably re-append the final summary, e.g. + # [ PASSED ] Ran 10/10 test case(s) from 10 check(s) (0 failure(s), 0 skipped, 0 aborted) + # so that that is always displayed + # However, that's not implemented yet - let's see if this ever even becomes an issue + grep_reframe_result=${grep_reframe_success_full} fi +echo "grep_reframe_result: ${grep_reframe_result}" echo "[TEST]" > ${job_test_result_file} if [[ ${SLURM_OUTPUT_FOUND} -eq 0 ]]; then diff --git a/create_lmodrc.py b/create_lmodrc.py index 28ad2a1915..1720b762f0 100755 --- a/create_lmodrc.py +++ b/create_lmodrc.py @@ -33,6 +33,12 @@ def error(msg): error("Prefix directory %s does not exist!" % prefix) lmodrc_path = os.path.join(prefix, DOT_LMOD, 'lmodrc.lua') +# Lmod itself doesn't care about the accelerator subdir so remove this duplication from +# the target path (if it exists) +accel_subdir = os.getenv("EESSI_ACCELERATOR_TARGET") +if accel_subdir: + lmodrc_path = lmodrc_path.replace("/accel/%s" % accel_subdir, '') + lmodrc_txt = TEMPLATE_LMOD_RC % { 'dot_lmod': DOT_LMOD, 'prefix': prefix, diff --git a/create_lmodsitepackage.py b/create_lmodsitepackage.py index 62f073c9a6..11ca614be5 100755 --- a/create_lmodsitepackage.py +++ b/create_lmodsitepackage.py @@ -8,7 +8,7 @@ DOT_LMOD = '.lmod' -hook_txt ="""require("strict") +hook_txt = """require("strict") local hook = require("Hook") local open = io.open @@ -36,7 +36,7 @@ -- eessi_prefix_host_injections is the prefix with site-extensions (i.e. additional modules) -- to the official EESSI modules, e.g. /cvmfs/software.eessi.io/host_injections/2023.06 local eessi_prefix_host_injections = string.gsub(eessi_prefix, 'versions', 'host_injections') - + -- Check if the full modulepath starts with the eessi_prefix_* return string.find(t.fn, "^" .. eessi_prefix) ~= nil or string.find(t.fn, "^" .. eessi_prefix_host_injections) ~= nil end @@ -103,7 +103,7 @@ if isFile(archSitePackage) then dofile(archSitePackage) end - + end @@ -111,7 +111,7 @@ local frameStk = require("FrameStk"):singleton() local mt = frameStk:mt() local simpleName = string.match(t.modFullName, "(.-)/") - -- If we try to load CUDA itself, check if the full CUDA SDK was installed on the host in host_injections. + -- If we try to load CUDA itself, check if the full CUDA SDK was installed on the host in host_injections. -- This is required for end users to build additional CUDA software. If the full SDK isn't present, refuse -- to load the CUDA module and print an informative message on how to set up GPU support for EESSI local refer_to_docs = "For more information on how to do this, see https://www.eessi.io/docs/gpu/.\\n" @@ -207,6 +207,7 @@ load_site_specific_hooks() """ + def error(msg): sys.stderr.write("ERROR: %s\n" % msg) sys.exit(1) @@ -221,12 +222,18 @@ def error(msg): error("Prefix directory %s does not exist!" % prefix) sitepackage_path = os.path.join(prefix, DOT_LMOD, 'SitePackage.lua') + +# Lmod itself doesn't care about compute capability so remove this duplication from +# the install path (if it exists) +accel_subdir = os.getenv("EESSI_ACCELERATOR_TARGET") +if accel_subdir: + sitepackage_path = sitepackage_path.replace("/accel/%s" % accel_subdir, '') try: os.makedirs(os.path.dirname(sitepackage_path), exist_ok=True) with open(sitepackage_path, 'w') as fp: fp.write(hook_txt) # Make sure that the created Lmod file has "read/write" for the user/group and "read" permissions for others - os.chmod(sitepackage_path, S_IREAD|S_IWRITE|S_IRGRP|S_IWGRP|S_IROTH) + os.chmod(sitepackage_path, S_IREAD | S_IWRITE | S_IRGRP | S_IWGRP | S_IROTH) except (IOError, OSError) as err: error("Failed to create %s: %s" % (sitepackage_path, err)) diff --git a/create_tarball.sh b/create_tarball.sh index 9c212681a5..01f498e1ac 100755 --- a/create_tarball.sh +++ b/create_tarball.sh @@ -89,13 +89,17 @@ for subdir in ${cpu_arch_subdir} ${cpu_arch_subdir}/accel/${accel_subdir}; do done # add a bit debug output -echo "wrote file list to ${files_list}" -[ -r ${files_list} ] && cat ${files_list} -echo "wrote module file list to ${module_files_list}" -[ -r ${module_files_list} ] && cat ${module_files_list} +if [ -r ${files_list} ]; then + echo "wrote file list to ${files_list}" + cat ${files_list} +fi +if [ -r ${module_files_list} ]; then + echo "wrote module file list to ${module_files_list}" + cat ${module_files_list} -# Copy the module files list to current workindg dir for later use in the test step -cp ${module_files_list} ${current_workdir}/module_files.list.txt + # Copy the module files list to current workindg dir for later use in the test step + cp ${module_files_list} ${current_workdir}/module_files.list.txt +fi topdir=${cvmfs_repo}/versions/ diff --git a/easystacks/software.eessi.io/2023.06/a64fx/eessi-2023.06-eb-4.9.4-2023b.yml b/easystacks/software.eessi.io/2023.06/a64fx/eessi-2023.06-eb-4.9.4-2023b.yml new file mode 100644 index 0000000000..11523e7f03 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/a64fx/eessi-2023.06-eb-4.9.4-2023b.yml @@ -0,0 +1,2 @@ +easyconfigs: + - SciPy-bundle-2023.11-gfbf-2023b.eb diff --git a/easystacks/software.eessi.io/2023.06/accel/nvidia/eessi-2023.06-eb-4.9.3-2023a-CUDA.yml b/easystacks/software.eessi.io/2023.06/accel/nvidia/eessi-2023.06-eb-4.9.3-2023a-CUDA.yml new file mode 100644 index 0000000000..8935a3f3c3 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/accel/nvidia/eessi-2023.06-eb-4.9.3-2023a-CUDA.yml @@ -0,0 +1,6 @@ +easyconfigs: + - LAMMPS-2Aug2023_update2-foss-2023a-kokkos-CUDA-12.1.1.eb + - ESPResSo-4.2.2-foss-2023a-CUDA-12.1.1.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21440 + from-commit: 5525968921d7b5eae54f7d16391201e17ffae13c diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-2023a-CUDA.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-2023a-CUDA.yml new file mode 100644 index 0000000000..f8bde420de --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-2023a-CUDA.yml @@ -0,0 +1,9 @@ +easyconfigs: + - CUDA-Samples-12.1-GCC-12.3.0-CUDA-12.1.1.eb: + # use easyconfig that only install subset of CUDA samples, + # to circumvent problem with nvcc linking to glibc of host OS, + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/19189; + # and where additional samples are excluded because they fail to build on aarch64, + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/19451; + options: + from-pr: 19451 diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-2023a.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-2023a.yml index 7244219dc3..43b081b122 100644 --- a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-2023a.yml +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-2023a.yml @@ -35,14 +35,6 @@ easyconfigs: - Boost-1.82.0-GCC-12.3.0.eb - netCDF-4.9.2-gompi-2023a.eb - FFmpeg-6.0-GCCcore-12.3.0.eb - - CUDA-Samples-12.1-GCC-12.3.0-CUDA-12.1.1.eb: - # use easyconfig that only install subset of CUDA samples, - # to circumvent problem with nvcc linking to glibc of host OS, - # see https://github.com/easybuilders/easybuild-easyconfigs/pull/19189; - # and where additional samples are excluded because they fail to build on aarch64, - # see https://github.com/easybuilders/easybuild-easyconfigs/pull/19451; - options: - from-pr: 19451 - ALL-0.9.2-foss-2023a.eb: options: from-pr: 19455 diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-2023a-CUDA.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-2023a-CUDA.yml new file mode 100644 index 0000000000..cccbfa6808 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-2023a-CUDA.yml @@ -0,0 +1,2 @@ +easyconfigs: + - OSU-Micro-Benchmarks-7.2-gompi-2023a-CUDA-12.1.1.eb diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-2023a.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-2023a.yml index 4b58cb6106..3f6590c3cd 100644 --- a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-2023a.yml +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.0-2023a.yml @@ -34,7 +34,6 @@ easyconfigs: # see https://github.com/easybuilders/easybuild-easyconfigs/pull/19996 from-pr: 19996 - dask-2023.9.2-foss-2023a.eb - - OSU-Micro-Benchmarks-7.2-gompi-2023a-CUDA-12.1.1.eb - JupyterNotebook-7.0.2-GCCcore-12.3.0.eb - ImageMagick-7.1.1-15-GCCcore-12.3.0.eb: options: diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.2-2022b.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.2-2022b.yml index 5f63a7bc14..969b0d469b 100644 --- a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.2-2022b.yml +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.2-2022b.yml @@ -32,3 +32,21 @@ easyconfigs: - KaHIP-3.14-gompi-2022b.eb - MPC-1.3.1-GCCcore-12.2.0.eb - MUMPS-5.6.1-foss-2022b-metis.eb + - GL2PS-1.4.2-GCCcore-12.2.0.eb + - GST-plugins-base-1.22.1-GCC-12.2.0.eb + - wxWidgets-3.2.2.1-GCC-12.2.0.eb + - Archive-Zip-1.68-GCCcore-12.2.0.eb + - jemalloc-5.3.0-GCCcore-12.2.0.eb + - Judy-1.0.5-GCCcore-12.2.0.eb + - libaio-0.3.113-GCCcore-12.2.0.eb + - Z3-4.12.2-GCCcore-12.2.0.eb + - tbb-2021.10.0-GCCcore-12.2.0.eb + - dask-2023.7.1-foss-2022b.eb + - netcdf4-python-1.6.3-foss-2022b.eb + - Ruby-3.2.2-GCCcore-12.2.0.eb + - ROOT-6.26.10-foss-2022b.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21526 + from-commit: 6cbfbd7d7a55dc7243f46d0beea510278f4718df + # see https://github.com/easybuilders/easybuild-easyblocks/pull/3467 + include-easyblocks-from-commit: c3aebe1f133d064a228c5d6c282e898b83d74601 diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.3-2023a.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.3-2023a.yml index 0a4a4e5554..0c863f0025 100644 --- a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.3-2023a.yml +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.3-2023a.yml @@ -3,3 +3,6 @@ easyconfigs: - GDB-13.2-GCCcore-12.3.0.eb - tmux-3.3a-GCCcore-12.3.0.eb - Vim-9.1.0004-GCCcore-12.3.0.eb + - gmsh-4.12.2-foss-2023a.eb + - basemap-1.3.9-foss-2023a.eb + - geopandas-0.14.2-foss-2023a.eb \ No newline at end of file diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.3-2023b.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.3-2023b.yml new file mode 100644 index 0000000000..5325f2e553 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.3-2023b.yml @@ -0,0 +1,9 @@ +easyconfigs: + - LAMMPS-29Aug2024-foss-2023b-kokkos.eb: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21436 + options: + from-commit: 9dc24e57880a8adb06ae10557c5315e66671a533 + - GROMACS-2024.3-foss-2023b.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21430 + from-commit: 8b509882d03402e2998ff9b22c154a6957e36d6b diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.4-2023a.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.4-2023a.yml new file mode 100644 index 0000000000..170a639064 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.9.4-2023a.yml @@ -0,0 +1,11 @@ +easyconfigs: + - ROOT-6.30.06-foss-2023a.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21526 + from-commit: 6cbfbd7d7a55dc7243f46d0beea510278f4718df + # see https://github.com/easybuilders/easybuild-easyblocks/pull/3467 + include-easyblocks-from-commit: c3aebe1f133d064a228c5d6c282e898b83d74601 + - waLBerla-6.1-foss-2023a.eb: + options: + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/21600 + from-commit: 9b12318bcff1749781d9eb71c23e21bc3a79ed01 diff --git a/easystacks/software.eessi.io/2023.06/rebuilds/20240919-eb-4.9.3-osu-microbenchmarks-in-accel-prefix.yml b/easystacks/software.eessi.io/2023.06/rebuilds/20240919-eb-4.9.3-osu-microbenchmarks-in-accel-prefix.yml new file mode 100644 index 0000000000..23801e0250 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/rebuilds/20240919-eb-4.9.3-osu-microbenchmarks-in-accel-prefix.yml @@ -0,0 +1,5 @@ +# 2024.09.19 +# We need to reinstall OSU-Micro-Benchmarks in the accelerator prefixes +# See https://github.com/EESSI/software-layer/pull/716 +easyconfigs: + - OSU-Micro-Benchmarks-7.2-gompi-2023a-CUDA-12.1.1.eb diff --git a/easystacks/software.eessi.io/2023.06/rebuilds/20240925-eb-4.9.4-EESSI-extend.yml b/easystacks/software.eessi.io/2023.06/rebuilds/20240925-eb-4.9.4-EESSI-extend.yml new file mode 100644 index 0000000000..9cd1b451cd --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/rebuilds/20240925-eb-4.9.4-EESSI-extend.yml @@ -0,0 +1,6 @@ +# 2024.09.25 +# EESSI-extend did not support LMOD_EXACT_MATCH +# (see https://github.com/EESSI/software-layer/pull/747) +easyconfigs: + - EESSI-extend-2023.06-easybuild.eb + diff --git a/easystacks/software.eessi.io/2023.06/rebuilds/20241008-eb-4.9.4-EESSI-extend.yml b/easystacks/software.eessi.io/2023.06/rebuilds/20241008-eb-4.9.4-EESSI-extend.yml new file mode 100644 index 0000000000..5491ef8427 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/rebuilds/20241008-eb-4.9.4-EESSI-extend.yml @@ -0,0 +1,5 @@ +# 2024.10.08 +# EESSI-extend should use EESSI_SITE_INSTALLPATH, instead of recalculating this +easyconfigs: + - EESSI-extend-2023.06-easybuild.eb + diff --git a/easystacks/software.eessi.io/2023.06/zen4/eessi-2023.06-eb-4.9.3-001-system.yml b/easystacks/software.eessi.io/2023.06/zen4/eessi-2023.06-eb-4.9.3-001-system.yml index 1e30631e57..25337649ce 100644 --- a/easystacks/software.eessi.io/2023.06/zen4/eessi-2023.06-eb-4.9.3-001-system.yml +++ b/easystacks/software.eessi.io/2023.06/zen4/eessi-2023.06-eb-4.9.3-001-system.yml @@ -1,2 +1,4 @@ easyconfigs: - Nextflow-23.10.0.eb + - EasyBuild-4.8.2.eb + - EasyBuild-4.9.0.eb diff --git a/easystacks/software.eessi.io/2023.06/zen4/eessi-2023.06-eb-4.9.3-2023a.yml b/easystacks/software.eessi.io/2023.06/zen4/eessi-2023.06-eb-4.9.3-2023a.yml index 4bae944d45..519d7701dc 100644 --- a/easystacks/software.eessi.io/2023.06/zen4/eessi-2023.06-eb-4.9.3-2023a.yml +++ b/easystacks/software.eessi.io/2023.06/zen4/eessi-2023.06-eb-4.9.3-2023a.yml @@ -11,3 +11,9 @@ easyconfigs: - LoopTools-2.15-GCC-12.3.0.eb - ncdu-1.18-GCC-12.3.0.eb - WhatsHap-2.2-foss-2023a.eb + - PyOpenGL-3.1.7-GCCcore-12.3.0.eb + - SAMtools-1.18-GCC-12.3.0.eb + - CDO-2.2.2-gompi-2023a.eb + - OSU-Micro-Benchmarks-7.1-1-gompi-2023a.eb + - BWA-0.7.17-20220923-GCCcore-12.3.0.eb + - Valgrind-3.21.0-gompi-2023a.eb diff --git a/eb_hooks.py b/eb_hooks.py index 9b0e9c8dcb..79bdeeee0d 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -190,7 +190,9 @@ def parse_hook_casacore_disable_vectorize(ec, eprefix): ): cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') if cpu_target == CPU_TARGET_NEOVERSE_V1: - if not hasattr(ec, 'toolchainopts'): + # Make sure the toolchainopts key exists, and the value is a dict, + # before we add the option to disable vectorization + if 'toolchainopts' not in ec or ec['toolchainopts'] is None: ec['toolchainopts'] = {} ec['toolchainopts']['vectorize'] = False print_msg("Changed toochainopts for %s: %s", ec.name, ec['toolchainopts']) @@ -301,19 +303,36 @@ def parse_hook_ucx_eprefix(ec, eprefix): raise EasyBuildError("UCX-specific hook triggered for non-UCX easyconfig?!") -def parse_hook_lammps_remove_deps_for_CI_aarch64(ec, *args, **kwargs): +def parse_hook_freeimage_aarch64(ec, *args, **kwargs): """ - Remove x86_64 specific dependencies for the CI to pass on aarch64 + Make sure to build with -fPIC on ARM to avoid + https://github.com/EESSI/software-layer/pull/736#issuecomment-2373261889 """ - if ec.name == 'LAMMPS' and ec.version in ('2Aug2023_update2',): + if ec.name == 'FreeImage' and ec.version in ('3.18.0',): if os.getenv('EESSI_CPU_FAMILY') == 'aarch64': - # ScaFaCoS and tbb are not compatible with aarch64/* CPU targets, - # so remove them as dependencies for LAMMPS (they're optional); - # see also https://github.com/easybuilders/easybuild-easyconfigs/pull/19164 + - # https://github.com/easybuilders/easybuild-easyconfigs/pull/19000; - # we need this hook because we check for missing installations for all CPU targets - # on an x86_64 VM in GitHub Actions (so condition based on ARCH in LAMMPS easyconfig is always true) - ec['dependencies'] = [dep for dep in ec['dependencies'] if dep[0] not in ('ScaFaCoS', 'tbb')] + # Make sure the toolchainopts key exists, and the value is a dict, + # before we add the option to enable PIC and disable PNG_ARM_NEON_OPT + if 'toolchainopts' not in ec or ec['toolchainopts'] is None: + ec['toolchainopts'] = {} + ec['toolchainopts']['pic'] = True + ec['toolchainopts']['extra_cflags'] = '-DPNG_ARM_NEON_OPT=0' + print_msg("Changed toolchainopts for %s: %s", ec.name, ec['toolchainopts']) + + +def parse_hook_lammps_remove_deps_for_aarch64(ec, *args, **kwargs): + """ + Remove x86_64 specific dependencies for the CI and missing installations to pass on aarch64 + """ + if ec.name == 'LAMMPS': + if ec.version in ('2Aug2023_update2', '29Aug2024'): + if os.getenv('EESSI_CPU_FAMILY') == 'aarch64': + # ScaFaCoS and tbb are not compatible with aarch64/* CPU targets, + # so remove them as dependencies for LAMMPS (they're optional); + # see also https://github.com/easybuilders/easybuild-easyconfigs/pull/19164 + + # https://github.com/easybuilders/easybuild-easyconfigs/pull/19000; + # we need this hook because we check for missing installations for all CPU targets + # on an x86_64 VM in GitHub Actions (so condition based on ARCH in LAMMPS easyconfig is always true) + ec['dependencies'] = [dep for dep in ec['dependencies'] if dep[0] not in ('ScaFaCoS', 'tbb',)] else: raise EasyBuildError("LAMMPS-specific hook triggered for non-LAMMPS easyconfig?!") @@ -537,7 +556,7 @@ def pre_configure_hook_LAMMPS_zen4(self, *args, **kwargs): cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR') if self.name == 'LAMMPS': - if self.version == '2Aug2023_update2': + if self.version in ('2Aug2023_update2', '29Aug2024'): if get_cpu_architecture() == X86_64: if cpu_target == CPU_TARGET_ZEN4: # There is no support for ZEN4 in LAMMPS yet so falling back to ZEN3 @@ -802,8 +821,9 @@ def inject_gpu_property(ec): 'casacore': parse_hook_casacore_disable_vectorize, 'CGAL': parse_hook_cgal_toolchainopts_precise, 'fontconfig': parse_hook_fontconfig_add_fonts, + 'FreeImage': parse_hook_freeimage_aarch64, 'grpcio': parse_hook_grpcio_zlib, - 'LAMMPS': parse_hook_lammps_remove_deps_for_CI_aarch64, + 'LAMMPS': parse_hook_lammps_remove_deps_for_aarch64, 'CP2K': parse_hook_CP2K_remove_deps_for_aarch64, 'OpenBLAS': parse_hook_openblas_relax_lapack_tests_num_errors, 'pybind11': parse_hook_pybind11_replace_catch2, diff --git a/init/bash b/init/bash index 4ad09f6a1b..928ac6efdf 100644 --- a/init/bash +++ b/init/bash @@ -29,6 +29,11 @@ if [ $? -eq 0 ]; then show_msg "Prepending site path $EESSI_SITE_MODULEPATH to \$MODULEPATH..." module use $EESSI_SITE_MODULEPATH + if [ ! -z ${EESSI_MODULEPATH_ACCEL} ]; then + show_msg "Prepending $EESSI_MODULEPATH_ACCEL to \$MODULEPATH..." + module use $EESSI_MODULEPATH_ACCEL + fi + #show_msg "" #show_msg "*** Known problems in the ${EESSI_VERSION} software stack ***" #show_msg "" diff --git a/init/eessi_archdetect.sh b/init/eessi_archdetect.sh index ad6dce6f9a..2b1534ce62 100755 --- a/init/eessi_archdetect.sh +++ b/init/eessi_archdetect.sh @@ -17,7 +17,7 @@ else exit 1 fi -VERSION="1.1.0" +VERSION="1.2.0" # default log level: only emit warnings or errors LOG_LEVEL="WARN" @@ -150,8 +150,45 @@ cpupath(){ fi } +accelpath() { + # If EESSI_ACCELERATOR_TARGET_OVERRIDE is set, use it + log "DEBUG" "accelpath: Override variable set as '$EESSI_ACCELERATOR_TARGET_OVERRIDE' " + if [ ! -z $EESSI_ACCELERATOR_TARGET_OVERRIDE ]; then + if [[ "$EESSI_ACCELERATOR_TARGET_OVERRIDE" =~ ^accel/nvidia/cc[0-9][0-9]$ ]]; then + echo ${EESSI_ACCELERATOR_TARGET_OVERRIDE} + return 0 + else + log "ERROR" "Value of \$EESSI_ACCELERATOR_TARGET_OVERRIDE should match 'accel/nvidia/cc[0-9[0-9]', but it does not: '$EESSI_ACCELERATOR_TARGET_OVERRIDE'" + fi + return 0 + fi + + # check for NVIDIA GPUs via nvidia-smi command + nvidia_smi=$(command -v nvidia-smi) + if [[ $? -eq 0 ]]; then + log "DEBUG" "accelpath: nvidia-smi command found @ ${nvidia_smi}" + nvidia_smi_out=$(mktemp -p /tmp nvidia_smi_out.XXXXX) + nvidia-smi --query-gpu=gpu_name,count,driver_version,compute_cap --format=csv,noheader 2>&1 > $nvidia_smi_out + if [[ $? -eq 0 ]]; then + nvidia_smi_info=$(head -1 $nvidia_smi_out) + cuda_cc=$(echo $nvidia_smi_info | sed 's/, /,/g' | cut -f4 -d, | sed 's/\.//g') + log "DEBUG" "accelpath: CUDA compute capability '${cuda_cc}' derived from nvidia-smi output '${nvidia_smi_info}'" + res="accel/nvidia/cc${cuda_cc}" + log "DEBUG" "accelpath: result: ${res}" + echo $res + rm -f $nvidia_smi_out + else + log "DEBUG" "accelpath: nvidia-smi command failed, see output in $nvidia_smi_out" + exit 3 + fi + else + log "DEBUG" "accelpath: nvidia-smi command not found" + exit 2 + fi +} + # Parse command line arguments -USAGE="Usage: eessi_archdetect.sh [-h][-d][-a] " +USAGE="Usage: eessi_archdetect.sh [-h][-d][-a] " while getopts 'hdva' OPTION; do case "$OPTION" in @@ -168,5 +205,6 @@ ARGUMENT=${1:-none} case "$ARGUMENT" in "cpupath") cpupath; exit;; - *) echo "$USAGE"; log "ERROR" "Missing argument (possible actions: 'cpupath')";; + "accelpath") accelpath; exit;; + *) echo "$USAGE"; log "ERROR" "Missing argument (possible actions: 'cpupath', 'accelpath')";; esac diff --git a/init/eessi_environment_variables b/init/eessi_environment_variables index 8c10b1fca8..d2daf40ace 100644 --- a/init/eessi_environment_variables +++ b/init/eessi_environment_variables @@ -38,6 +38,45 @@ if [ -d $EESSI_PREFIX ]; then break fi done + + # we need to make sure that errexit shell option (set -e) is not enabled, + # since archdetect will produce non-zero exit code if no accelerator was found + if [[ "$-" =~ e ]]; then + errexit_shell_option_set='yes' + set +e + else + errexit_shell_option_set='no' + fi + + # to be able to grab exit code of archdetect trying to detect accelerators, + # we can not run it via $(...), so we have to redirect the output to a temporary file + tmpout=$(mktemp) + ${EESSI_INIT_DIR_PATH}/eessi_archdetect.sh accelpath 2>&1 > $tmpout + accelpath_exit_code=$? + + if [[ "$errexit_shell_option_set" == "yes" ]]; then + set -e + fi + + if [[ $accelpath_exit_code -eq 0 ]]; then + export EESSI_ACCEL_SUBDIR=$(tail -1 $tmpout && rm -f $tmpout) + if [ -z ${EESSI_ACCEL_SUBDIR} ]; then + error "accelerator detection with archdetect worked, but no result was returned?!" + else + # allow specifying different parent directory for accel/* subdirectory via $EESSI_ACCEL_SOFTWARE_SUBDIR_OVERRIDE + EESSI_ACCEL_SOFTWARE_SUBDIR=${EESSI_ACCEL_SOFTWARE_SUBDIR_OVERRIDE:-$EESSI_SOFTWARE_SUBDIR} + # path to where accel/* subdirectory is located + EESSI_ACCEL_SOFTWARE_PATH=${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_ACCEL_SOFTWARE_SUBDIR} + if [ -d $EESSI_ACCEL_SOFTWARE_PATH/${EESSI_ACCEL_SUBDIR} ]; then + show_msg "archdetect found supported accelerator for CPU target ${EESSI_ACCEL_SOFTWARE_SUBDIR}: ${EESSI_ACCEL_SUBDIR}" + else + show_msg "No matching path found in ${EESSI_ACCEL_SOFTWARE_SUBDIR} for accelerator detected by archdetect (${EESSI_ACCEL_SUBDIR})" + fi + fi + else + show_msg "archdetect could not detect any accelerators" + rm -f $tmpout + fi elif [ "$EESSI_USE_ARCHSPEC" == "1" ]; then # note: eessi_software_subdir_for_host.py will pick up value from $EESSI_SOFTWARE_SUBDIR_OVERRIDE if it's defined! export EESSI_EPREFIX_PYTHON=$EESSI_EPREFIX/usr/bin/python3 @@ -81,15 +120,17 @@ if [ -d $EESSI_PREFIX ]; then if [ ! -z $EESSI_BASIC_ENV ]; then show_msg "Only setting up basic environment, so we're done" elif [ -d $EESSI_SOFTWARE_PATH ]; then + export EESSI_SITE_SOFTWARE_PATH=${EESSI_SOFTWARE_PATH/versions/host_injections} + show_msg "Using ${EESSI_SITE_SOFTWARE_PATH} as the site extension directory for installations." + # Allow for use of alternative module tree shipped with EESSI + if [ -z ${EESSI_MODULE_SUBDIR+x} ]; then + # EESSI_MODULE_SUBDIR not set + EESSI_MODULE_SUBDIR="modules/all" + fi # Allow for the use of a custom MNS if [ -z ${EESSI_CUSTOM_MODULEPATH+x} ]; then # EESSI_CUSTOM_MODULEPATH not set so we use our defaults - # Allow for use of alternative module tree shipped with EESSI - if [ -z ${EESSI_MODULE_SUBDIR+x} ]; then - # EESSI_MODULE_SUBDIR not set - EESSI_MODULE_SUBDIR="modules/all" - fi EESSI_MODULEPATH=$EESSI_SOFTWARE_PATH/$EESSI_MODULE_SUBDIR else show_msg "Using defined environment variable \$EESSI_CUSTOM_MODULEPATH to set EESSI_MODULEPATH." @@ -99,13 +140,18 @@ if [ -d $EESSI_PREFIX ]; then if [ -d $EESSI_MODULEPATH ]; then export EESSI_MODULEPATH=$EESSI_MODULEPATH show_msg "Using ${EESSI_MODULEPATH} as the directory to be added to MODULEPATH." - export EESSI_SITE_MODULEPATH=${EESSI_MODULEPATH/versions/host_injections} + export EESSI_SITE_MODULEPATH=$EESSI_SITE_SOFTWARE_PATH/$EESSI_MODULE_SUBDIR show_msg "Using ${EESSI_SITE_MODULEPATH} as the site extension directory to be added to MODULEPATH." else error "EESSI module path at $EESSI_MODULEPATH not found!" false fi + if [ -d ${EESSI_ACCEL_SOFTWARE_PATH}/${EESSI_ACCEL_SUBDIR}/${EESSI_MODULE_SUBDIR} ]; then + export EESSI_MODULEPATH_ACCEL=${EESSI_ACCEL_SOFTWARE_PATH}/${EESSI_ACCEL_SUBDIR}/${EESSI_MODULE_SUBDIR} + show_msg "Using ${EESSI_MODULEPATH_ACCEL} as additional directory (for accelerators) to be added to MODULEPATH." + fi + # Fix wrong path for RHEL >=8 libcurl # This is required here because we ship curl in our compat layer. If we only provided # curl as a module file we could instead do this via a `modluafooter` in an EasyBuild diff --git a/init/modules/EESSI/2023.06.lua b/init/modules/EESSI/2023.06.lua index 32aaf6c07f..463706ce6c 100644 --- a/init/modules/EESSI/2023.06.lua +++ b/init/modules/EESSI/2023.06.lua @@ -48,9 +48,11 @@ local eessi_software_subdir = archdetect local eessi_eprefix = pathJoin(eessi_prefix, "compat", eessi_os_type, eessi_cpu_family) local eessi_software_path = pathJoin(eessi_prefix, "software", eessi_os_type, eessi_software_subdir) local eessi_module_path = pathJoin(eessi_software_path, "modules", "all") -local eessi_site_module_path = string.gsub(eessi_module_path, "versions", "host_injections") +local eessi_site_software_path = string.gsub(eessi_software_path, "versions", "host_injections") +local eessi_site_module_path = pathJoin(eessi_site_software_path, "modules", "all") setenv("EPREFIX", eessi_eprefix) setenv("EESSI_CPU_FAMILY", eessi_cpu_family) +setenv("EESSI_SITE_SOFTWARE_PATH", eessi_site_software_path) setenv("EESSI_SITE_MODULEPATH", eessi_site_module_path) setenv("EESSI_SOFTWARE_SUBDIR", eessi_software_subdir) setenv("EESSI_PREFIX", eessi_prefix) diff --git a/test_suite.sh b/test_suite.sh index e7151e00e7..1f0b91c477 100755 --- a/test_suite.sh +++ b/test_suite.sh @@ -198,10 +198,19 @@ fi # Get the subset of test names based on the test mapping and tags (e.g. CI, 1_node) module_list="module_files.list.txt" mapping_config="tests/eessi_test_mapping/software_to_tests.yml" -# Run with --debug for easier debugging in case there are issues: -python3 tests/eessi_test_mapping/map_software_to_test.py --module-list "${module_list}" --mapping-file "${mapping_config}" --debug -REFRAME_NAME_ARGS=$(python3 tests/eessi_test_mapping/map_software_to_test.py --module-list "${module_list}" --mapping-file "${mapping_config}") -test_selection_exit_code=$? +if [[ ! -f "$module_list" ]]; then + echo_green "File ${module_list} not found, so only running the default set of tests from ${mapping_config}" + # Run with --debug for easier debugging in case there are issues: + python3 tests/eessi_test_mapping/map_software_to_test.py --mapping-file "${mapping_config}" --debug --defaults-only + REFRAME_NAME_ARGS=$(python3 tests/eessi_test_mapping/map_software_to_test.py --mapping-file "${mapping_config}" --defaults-only) + test_selection_exit_code=$? +else + # Run with --debug for easier debugging in case there are issues: + python3 tests/eessi_test_mapping/map_software_to_test.py --module-list "${module_list}" --mapping-file "${mapping_config}" --debug + REFRAME_NAME_ARGS=$(python3 tests/eessi_test_mapping/map_software_to_test.py --module-list "${module_list}" --mapping-file "${mapping_config}") + test_selection_exit_code=$? +fi +# Check exit status if [[ ${test_selection_exit_code} -eq 0 ]]; then echo_green "Succesfully extracted names of tests to run: ${REFRAME_NAME_ARGS}" else diff --git a/tests/archdetect/nvidia-smi/1xa100.output b/tests/archdetect/nvidia-smi/1xa100.output new file mode 100644 index 0000000000..5eb3aaff18 --- /dev/null +++ b/tests/archdetect/nvidia-smi/1xa100.output @@ -0,0 +1 @@ +accel/nvidia/cc80 diff --git a/tests/archdetect/nvidia-smi/1xa100.sh b/tests/archdetect/nvidia-smi/1xa100.sh new file mode 100755 index 0000000000..ead191418b --- /dev/null +++ b/tests/archdetect/nvidia-smi/1xa100.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# output from NVIDIA A100 system, +# produced by: nvidia-smi --query-gpu=gpu_name,count,driver_version,compute_cap --format=csv,noheader +echo "NVIDIA A100-SXM4-80GB, 1, 545.23.08, 8.0" +exit 0 diff --git a/tests/archdetect/nvidia-smi/2xa100.output b/tests/archdetect/nvidia-smi/2xa100.output new file mode 100644 index 0000000000..5eb3aaff18 --- /dev/null +++ b/tests/archdetect/nvidia-smi/2xa100.output @@ -0,0 +1 @@ +accel/nvidia/cc80 diff --git a/tests/archdetect/nvidia-smi/2xa100.sh b/tests/archdetect/nvidia-smi/2xa100.sh new file mode 100755 index 0000000000..5539607fbe --- /dev/null +++ b/tests/archdetect/nvidia-smi/2xa100.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# output from NVIDIA A100 system, +# produced by: nvidia-smi --query-gpu=gpu_name,count,driver_version,compute_cap --format=csv,noheader +echo "NVIDIA A100-SXM4-80GB, 2, 545.23.08, 8.0" +echo "NVIDIA A100-SXM4-80GB, 2, 545.23.08, 8.0" +exit 0 diff --git a/tests/archdetect/nvidia-smi/4xa100.output b/tests/archdetect/nvidia-smi/4xa100.output new file mode 100644 index 0000000000..5eb3aaff18 --- /dev/null +++ b/tests/archdetect/nvidia-smi/4xa100.output @@ -0,0 +1 @@ +accel/nvidia/cc80 diff --git a/tests/archdetect/nvidia-smi/4xa100.sh b/tests/archdetect/nvidia-smi/4xa100.sh new file mode 100755 index 0000000000..45458ea7bd --- /dev/null +++ b/tests/archdetect/nvidia-smi/4xa100.sh @@ -0,0 +1,8 @@ +#!/bin/bash +# output from NVIDIA A100 system, +# produced by: nvidia-smi --query-gpu=gpu_name,count,driver_version,compute_cap --format=csv,noheader +echo "NVIDIA A100-SXM4-80GB, 4, 545.23.08, 8.0" +echo "NVIDIA A100-SXM4-80GB, 4, 545.23.08, 8.0" +echo "NVIDIA A100-SXM4-80GB, 4, 545.23.08, 8.0" +echo "NVIDIA A100-SXM4-80GB, 4, 545.23.08, 8.0" +exit 0 diff --git a/tests/archdetect/nvidia-smi/cc01.output b/tests/archdetect/nvidia-smi/cc01.output new file mode 100644 index 0000000000..9cbf66a131 --- /dev/null +++ b/tests/archdetect/nvidia-smi/cc01.output @@ -0,0 +1 @@ +accel/nvidia/cc01 diff --git a/tests/archdetect/nvidia-smi/cc01.sh b/tests/archdetect/nvidia-smi/cc01.sh new file mode 100755 index 0000000000..81011a1d16 --- /dev/null +++ b/tests/archdetect/nvidia-smi/cc01.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# output from non-existing NVIDIA GPU system, +# to test handling of unknown GPU model +# (supposedly) produced by: nvidia-smi --query-gpu=gpu_name,count,driver_version,compute_cap --format=csv,noheader +echo "NVIDIA does-not-exist, 1, 000.00.00, 0.1" +exit 0 diff --git a/tests/archdetect/nvidia-smi/no_devices.output b/tests/archdetect/nvidia-smi/no_devices.output new file mode 100644 index 0000000000..b251bfc837 --- /dev/null +++ b/tests/archdetect/nvidia-smi/no_devices.output @@ -0,0 +1 @@ +non-zero exit code: 3 diff --git a/tests/archdetect/nvidia-smi/no_devices.sh b/tests/archdetect/nvidia-smi/no_devices.sh new file mode 100755 index 0000000000..0bc26dcddc --- /dev/null +++ b/tests/archdetect/nvidia-smi/no_devices.sh @@ -0,0 +1,3 @@ +#!/bin/bash +echo "No devices were found" +exit 6 diff --git a/tests/archdetect/nvidia-smi/none.output b/tests/archdetect/nvidia-smi/none.output new file mode 100644 index 0000000000..e287574cc3 --- /dev/null +++ b/tests/archdetect/nvidia-smi/none.output @@ -0,0 +1 @@ +non-zero exit code: 2 diff --git a/tests/eessi_test_mapping/map_software_to_test.py b/tests/eessi_test_mapping/map_software_to_test.py index 24cf246ef1..a0da6258c8 100644 --- a/tests/eessi_test_mapping/map_software_to_test.py +++ b/tests/eessi_test_mapping/map_software_to_test.py @@ -33,29 +33,32 @@ def get_tests_for_software(software_name, mappings): return [] -def main(yaml_file, module_file, debug): +def main(yaml_file, module_file, debug, defaults_only): """Main function to process software names and their tests.""" mappings = load_mappings(yaml_file) if debug: print(f"Loaded mappings from '{yaml_file}'") - software_names = read_software_names(module_file) - if debug: - print(f"Read software names from '{module_file}'") + if not defaults_only: + software_names = read_software_names(module_file) + if debug: + print(f"Read software names from '{module_file}'") tests_to_run = [] arg_string = "" - # For each module name, get the relevant set of tests - for software_name in software_names: - additional_tests = get_tests_for_software(software_name, mappings) - for test in additional_tests: - if test not in tests_to_run: - tests_to_run.append(test) - - if additional_tests and debug: - print(f"Software: {software_name} -> Tests: {additional_tests}") - elif debug: - print(f"Software: {software_name} -> No tests found") + + if not defaults_only: + # For each module name, get the relevant set of tests + for software_name in software_names: + additional_tests = get_tests_for_software(software_name, mappings) + for test in additional_tests: + if test not in tests_to_run: + tests_to_run.append(test) + + if additional_tests and debug: + print(f"Software: {software_name} -> Tests: {additional_tests}") + elif debug: + print(f"Software: {software_name} -> No tests found") # Always add the default set of tests, if default_tests is specified if 'default_tests' in mappings: @@ -83,8 +86,10 @@ def main(yaml_file, module_file, debug): parser = argparse.ArgumentParser(description="Map software names to their tests based on a YAML configuration.") parser.add_argument('--mapping-file', type=str, help='Path to the YAML file containing the test mappings.') parser.add_argument('--module-list', type=str, help='Path to the file containing the list of software names.') - parser.add_argument('--debug', action='store_true', help='Enable debug output.') + defaults_help = "Don't consider the module-list file, only return the default tests from the mapping file" + parser.add_argument('--defaults-only', action='store_true', default=False, help=defaults_help) + parser.add_argument('--debug', action='store_true', default=False, help='Enable debug output.') args = parser.parse_args() - main(args.mapping_file, args.module_list, args.debug) + main(args.mapping_file, args.module_list, args.debug, args.defaults_only)