Feature/compare califiles (#318)

Add scripts for comparing benchmark and example timings against release timings and a notebook using Thicket to visualize data --------- Co-authored-by: David J. Gardner <[email protected]> Co-authored-by: Cody J. Balos <[email protected]>
LLNL · Dec 18, 2023 · dc3ef2f · dc3ef2f
1 parent aef26f3
commit dc3ef2f
Show file tree

Hide file tree

Showing 7 changed files with 334 additions and 7 deletions.
diff --git a/.gitlab/build_and_test.sh b/.gitlab/build_and_test.sh
@@ -188,11 +188,23 @@ then
     $cmake_exe --version
 
     # configure
-    $cmake_exe \
-        -C "${hostconfig_path}" \
-        -DCMAKE_INSTALL_PREFIX=${install_dir} \
-        "${project_dir}"
-
+    if [[ "${CI_COMMIT_BRANCH}" == "main" ]]
+    then
+        # redirect caliper files to release directory
+        sundials_version=$(cd ${project_dir}; git describe --abbrev=0)
+        $cmake_exe \
+            -C "${hostconfig_path}" \
+            -DCMAKE_INSTALL_PREFIX=${install_dir} \
+            -DSUNDIALS_CALIPER_OUTPUT_DIR="${CALIPER_DIR}/Release/${hostname}/${sundials_version}" \
+            "${project_dir}"
+
+    else
+        $cmake_exe \
+            -C "${hostconfig_path}" \
+            -DCMAKE_INSTALL_PREFIX=${install_dir} \
+            "${project_dir}"
+    fi
+
     # build
     VERBOSE_BUILD=${VERBOSE_BUILD:-"OFF"}
     if [[ "${VERBOSE_BUILD}" == "ON" ]]; then

diff --git a/.gitlab/lassen-jobs.yml b/.gitlab/lassen-jobs.yml
@@ -38,7 +38,8 @@ lassen_gcc_cuda_bench:
     matrix:
       - COMPILER_SPEC: [email protected]
         CUDA_SPEC: [[email protected]]
+        CALIPER_DIR: /usr/workspace/sundials/califiles
   variables:
-    SPEC: "%${COMPILER_SPEC} cstd=99 cxxstd=14 build_type=Release precision=double scheduler=lsf caliper-dir=/usr/workspace/sundials/califiles ~int64 +benchmarks+profiling+caliper+adiak+mpi+openmp+cuda+raja cuda_arch=70 ^raja+cuda~openmp~examples~exercises cuda_arch=70 ^caliper+adiak+cuda cuda_arch=70 ^${CUDA_SPEC}+allow-unsupported-compilers"
+    SPEC: "%${COMPILER_SPEC} cstd=99 cxxstd=14 build_type=Release precision=double scheduler=lsf caliper-dir=${CALIPER_DIR} ~int64 +benchmarks+profiling+caliper+adiak+mpi+openmp+cuda+raja cuda_arch=70 ^raja+cuda~openmp~examples~exercises cuda_arch=70 ^caliper+adiak+cuda cuda_arch=70 ^${CUDA_SPEC}+allow-unsupported-compilers"
   extends: .lassen_build_and_bench
 
diff --git a/src/sundials/sundials_context.c b/src/sundials/sundials_context.c
@@ -239,7 +239,8 @@ void sunAdiakCollectMetadata() {
   adiak_namevalue("fortran_compiler_version", 2, NULL, "%s", SUN_FORTRAN_COMPILER_VERSION);
   adiak_namevalue("fortran_compiler_flags", 2, NULL, "%s", SUN_FORTRAN_COMPILER_FLAGS);
 
-  adiak_namevalue("sundials_version", 2, NULL, "%s", SUNDIALS_GIT_VERSION);
+  adiak_namevalue("sundials_version", 2, NULL, "%s", SUNDIALS_VERSION);
+  adiak_namevalue("sundials_git_version", 2, NULL, "%s", SUNDIALS_GIT_VERSION);
   adiak_namevalue("build_type", 2, NULL, "%s", SUN_BUILD_TYPE);
   adiak_namevalue("third_party_libraries", 2, NULL, "%s", SUN_TPL_LIST);
 #ifdef SUN_JOB_ID

diff --git a/test/benchmark_analysis.ipynb b/test/benchmark_analysis.ipynb
@@ -0,0 +1,87 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import subprocess\n",
+    "import sys\n",
+    "import re\n",
+    "import glob\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "from IPython.display import display, HTML\n",
+    "display(HTML(\"<style>.container { width:100% !important; }</style>\"))\n",
+    "\n",
+    "import hatchet as ht\n",
+    "import thicket as tt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Set desired pandas options\n",
+    "pd.set_option('display.max_rows', None)\n",
+    "pd.set_option('display.max_columns', None)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "caliDir = \"/usr/workspace/pan13/shrimp/Benchmarking/diffusion_2D/arkode_diffusion_2D_mpi_d2d_arkode_serial\"\n",
+    "caliFiles = glob.glob(\"%s/*.cali\" % caliDir)\n",
+    "\n",
+    "th_bench = tt.Thicket.from_caliperreader(caliFiles)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_merge = th_bench.dataframe.merge(th_bench.metadata, on='profile')\n",
+    "df_merge['launchdate'] = pd.to_datetime(df_merge['launchdate'], unit='s')\n",
+    "\n",
+    "# display only the top-most node\n",
+    "df_main = df_merge[df_merge['name'] == 'main']\n",
+    "df_main.set_index('launchdate')\n",
+    "\n",
+    "# adjust y-axis to be 2 degrees of precision\n",
+    "df_main.plot(x='launchdate', y=['Max time/rank', 'Avg time/rank', 'Min time/rank'], kind='line', use_index=True, xticks=df_main['launchdate'], rot=80, x_compat=True, figsize=(18,1))\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/test/compare_benchmarks.py b/test/compare_benchmarks.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python3
+# -----------------------------------------------------------------------------
+# Programmer(s): Yu Pan @ LLNL
+# -----------------------------------------------------------------------------
+# SUNDIALS Copyright Start
+# Copyright (c) 2002-2023, Lawrence Livermore National Security
+# and Southern Methodist University.
+# All rights reserved.
+#
+# See the top-level LICENSE and NOTICE files for details.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+# SUNDIALS Copyright End
+# -----------------------------------------------------------------------------
+
+import os
+import glob
+import argparse
+import multiprocessing as mp
+
+import thicket as tt
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Compare Sundials performance results against previous results')
+
+    parser.add_argument('--release', dest='release', action='store_true', help='indicate if the current run to process is a release')
+
+    parser.add_argument('--calidir', dest='caliDir', type=str, help='path to directory containing caliper files', default="/usr/workspace/sundials/califiles")
+
+    parser.add_argument('--releasedir', dest='releaseDir', type=str, help='path to directory containing release caliper files', default="/usr/workspace/sundials/califiles/Release")
+
+    parser.add_argument('--outpath', dest='outPath', type=str, help='path to directory to write results to', default="/dev/null")
+
+    parser.add_argument('--jobid', dest='jobID', type=int, help='job id of the current run to identify .cali files')
+
+    parser.add_argument('--threshold', dest="threshold", type=float, help='the percentage threshold in performance difference that indicates a regression', default=2.0)
+
+    args = parser.parse_args()
+
+    release = args.release
+    releaseDir = args.releaseDir
+    caliDir = args.caliDir
+    outPath = args.outPath
+    jobID = args.jobID
+    threshold = args.threshold
+
+    # Get available benchmarks
+    benchFiles = glob.glob("%s/Benchmarking/*/*" % caliDir)
+
+    if not os.path.exists(outPath):
+        os.makedirs(outPath)
+    outFile = open("%s/benchmark_output.out" % outPath, 'w')
+
+    # thread per file
+    with mp.Pool() as pool:
+        for res in pool.starmap(process_benchmark, [(jobID, release, releaseDir, i, threshold) for i in benchFiles]):
+            if res:
+                outFile.write(res + "\n")
+    outFile.close()
+
+    outFile = open("%s/benchmark_output.out" % outPath, 'r')
+    try:
+        outLines = outFile.readlines()
+    finally:
+        outFile.close()
+
+    if (len(outLines) == 0):
+        return -1 
+    return 0
+
+def process_benchmark(jobID, isRelease, releaseDir, benchmarkDir, threshold):
+    # Get the current benchmark run
+    benchmarkFiles = glob.glob("%s/*.cali" % benchmarkDir)
+    # Don't compare if the run didn't include this benchmark
+    if (len(benchmarkFiles) == 0):
+        return 
+
+    th_files = tt.Thicket.from_caliperreader(benchmarkFiles)
+    curFilter = lambda x: x['job_id'] == jobID
+    th_current = th_files.filter_metadata(curFilter)
+
+    # Get the release caliper file
+    cluster = th_current.metadata['cluster'].values[0]
+    if isRelease:
+        # Get the last release
+        versionDirs = glob.glob("%s/%s/*" % (releaseDir, cluster))
+        versionDirs.sort(key=os.path.getmtime, reverse=True)
+        versionDir = versionDirs[1]
+    else:
+        # Get the release the run is a part of
+        version = th_current.metadata['sundials_version'].values[0]
+        versionDir = "%s/%s/%s" % (releaseDir, cluster, version)
+    benchmarkName = th_current.metadata['env.TEST_NAME'].values[0]
+    releaseFile = glob.glob("%s/Benchmarking/*/%s/*.cali" % (versionDir, benchmarkName), recursive=True)
+    th_compare = tt.Thicket.from_caliperreader(releaseFile)
+    metrics = ['Max time/rank']
+    tt.mean(th_current, columns=metrics)
+    tt.mean(th_compare, columns=metrics)
+
+    ratio = th_current.statsframe.dataframe['Max time/rank_mean'] / th_compare.statsframe.dataframe['Max time/rank_mean']
+
+    tolerance = threshold/100
+    if 1 - ratio[0] < tolerance:
+        return benchmarkName
+
+
+if __name__ == "__main__":
+    main()
diff --git a/test/compare_examples.py b/test/compare_examples.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python3
+# -----------------------------------------------------------------------------
+# Programmer(s): Yu Pan @ LLNL
+# -----------------------------------------------------------------------------
+# SUNDIALS Copyright Start
+# Copyright (c) 2002-2023, Lawrence Livermore National Security
+# and Southern Methodist University.
+# All rights reserved.
+#
+# See the top-level LICENSE and NOTICE files for details.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+# SUNDIALS Copyright End
+# -----------------------------------------------------------------------------
+
+import os
+import subprocess
+import sys
+import re
+import glob
+import argparse
+import multiprocessing as mp
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+
+import hatchet as ht
+import thicket as tt
+
+def main():
+    parser = argparse.ArgumentParser(description='Compare Sundials performance results against previous results')
+
+    parser.add_argument('--release', dest='release', action='store_true', help='indicate if the current run to process is a release')
+
+    parser.add_argument('--calidir', dest='caliDir', type=str, help='path to directory containing caliper files', default="/usr/workspace/sundials/califiles")
+
+    parser.add_argument('--releasedir', dest='releaseDir', type=str, help='path to directory containing release caliper files', default="/usr/workspace/sundials/califiles/Release")
+
+    parser.add_argument('--outpath', dest='outPath', type=str, help='path to directory to write results to', default="/dev/null")
+
+    parser.add_argument('--threshold', dest="threshold", type=float, help='the percentage threshold in performance difference that indicates a regression', default=2.0)
+
+    args = parser.parse_args()
+
+    release = args.release
+    releaseDir = args.releaseDir
+    caliDir = args.caliDir
+    outPath = args.outPath
+    threshold = args.threshold
+
+    # Get the latest test run
+    runDirs = glob.glob("%s/Testing/*" % caliDir, recursive = True)
+    runDirs.sort(key=os.path.getmtime, reverse=True)
+    runDir = runDirs[0]
+
+    runFile = glob.glob(runDir)[0]
+    th_temp = tt.Thicket.from_caliperreader(runFile) 
+    cluster = th_temp.metadata['cluster']
+    # get machine from the file
+    if release:
+        # Compare against the last release
+        versionDirs = glob.glob("%s/%s/*" % (releaseDir, cluster))
+        versionDirs.sort(key=os.path.getmtime, reverse=True)
+        versionDir = versionDirs[1]
+    else:
+        # Compare against the release the run is a part of
+        version = th_temp.metadata['sundials_version'].values[0]
+        versionDir = "%s/%s/%s" % (releaseDir, cluster, version)
+
+    # Gather files to process
+    runFiles = glob.glob("%s/*.cali" % (runDir))
+
+    if not os.path.exists(outPath):
+        os.makedirs(outPath)
+    outFile = open("%s/output.out" % outPath, 'w')
+
+    # Compare test results against past runs. If a test performs below a threshold, output test name to outFile.
+    with mp.Pool() as pool:
+        for res in pool.starmap(compare_against_release, [(versionDir, i, threshold) for i in runFiles]):
+            if res:
+                outFile.write(res + "\n")
+    outFile.close()
+
+    outFile = open("%s/example_output.out" % outPath, 'r')
+    try:
+        outLines = outFile.readlines()
+    finally:
+        outFile.close()
+
+    if (len(outLines) == 0):
+        return -1 
+    return 0
+
+
+def compare_against_release(releaseDir, file, threshold):
+    th = tt.Thicket.from_caliperreader(file)
+
+    testName = th.metadata['env.TEST_NAME'].values[0]
+
+    # Gather release run
+    releaseFile = glob.glob("%s/Testing/*/%s.*.cali" % (releaseDir, testName), recursive=True)
+    th_release = tt.Thicket.from_caliperreader(releaseFile)
+
+    metrics = ['Max time/rank']
+    tt.mean(th_release, columns=metrics)
+    tt.mean(th, columns=metrics)
+
+    ratio = th.statsframe.dataframe['Max time/rank_mean'] / th_release.statsframe.dataframe['Max time/rank_mean']
+    print(ratio[0])
+    tolerance = threshold/100
+    if 1 - ratio[0] < tolerance:
+        return testName
+
+if __name__ == "__main__":
+    main()
diff --git a/test/testRunner b/test/testRunner
@@ -201,6 +201,8 @@ def main():
             dateTime = datetime.datetime.now().strftime("%m%d%Y_%H%M%S")
             profilePath = os.path.join(caliDir, args.testName+".%s.cali" % dateTime)
             os.environ['CALI_SERVICES_ENABLE'] = 'env'
+            os.environ['TEST_NAME'] = testName
+            os.environ['CALI_ENV_EXTRA'] = 'TEST_NAME'
             os.environ['CALI_CONFIG'] = 'spot(output=%s)' % profilePath
 
         # if user supplies precision info overide the default choices