From c1989fabee047173e44eaea60e0a73f85c309eac Mon Sep 17 00:00:00 2001
From: Julian Brown <julian.brown@amd.com>
Date: Fri, 14 Jun 2024 08:39:09 -0500
Subject: [PATCH 1/7] Use 'communicate' method for subprocess in LLNL test
 script

This patch uses the subprocess 'communicate' method in the LLNL Python
test script, as recommended in the documentation, and adds some
return-code checks.  This was mostly done as a side-effect of
investigating another problem, but is probably helpful anyway.
---
 test/LLNL/openmp5.0-tests/test.py | 31 +++++++++++++++++++++----------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/test/LLNL/openmp5.0-tests/test.py b/test/LLNL/openmp5.0-tests/test.py
index 784733cf7..9c3096b70 100755
--- a/test/LLNL/openmp5.0-tests/test.py
+++ b/test/LLNL/openmp5.0-tests/test.py
@@ -35,16 +35,20 @@ def run(tests):
         cmd="./"+t
         cmd_s=cmd.split()
         p4=subprocess.Popen(cmd_s,stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
-        for lines in p4.stdout.readlines():
+        stdout,stderr = p4.communicate()
+        stdout = stdout.decode('utf-8')
+        for lines in stdout.splitlines():
             #print(lines)
-            if b"error" in lines:
+            if "error" in lines:
                 print("Failed")
                 passs=False
                 break
-            if b"FAIL" in lines:
+            if "FAIL" in lines:
                 print("Failed")
                 passs=False
                 break
+        if p4.returncode != 0:
+            passs=False
         if passs: 
             print ("Passed")
             pass_count+=1
@@ -61,14 +65,18 @@ def compile(CC,LIBS, tests):
                 cmd=CC+" -c "+ v
                 cmd_s=cmd.split()
                 p2=subprocess.Popen(cmd_s,stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
-                for lines in p2.stdout.readlines():
-                    efile.write(lines.decode('utf8'))
-                    if b"error" in lines:
+                stdout,stderr = p2.communicate()
+                stdout = stdout.decode('utf-8')
+                for lines in stdout.splitlines():
+                    efile.write(lines)
+                    if "error" in lines:
                         fail=True
                         print("Compilation of ",v," failed")
                         passs = passs and not fail
                         break
                     passs = passs and not fail
+                if p2.returncode != 0:
+                    passs=False
  
             if passs:
                 print("Compiling ",key)
@@ -79,12 +87,15 @@ def compile(CC,LIBS, tests):
                 #print("Final compile command is ",cmd)
                 cmd_s=cmd.split()
                 p3=subprocess.Popen(cmd_s,stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
-                for lines in p3.stdout.readlines():
-                    print(lines.decode('utf8'))
-                    if b"error" in lines:
+                stdout, stderr = p3.communicate()
+                stdout = stdout.decode('utf-8')
+                for lines in stdout.splitlines():
+                    print(lines)
+                    if "error" in lines:
                         print("Linking of ",v," failed\n")
                         break
-                runnables.append(key)
+                if p3.returncode == 0:
+                    runnables.append(key)
     return runnables
 def main():
     tests=get_tests("test_list")

From a266b6cbaeeca0e1ab0e3487bce8b4a4d9aa8309 Mon Sep 17 00:00:00 2001
From: Julian Brown <julian.brown@amd.com>
Date: Fri, 14 Jun 2024 08:36:37 -0500
Subject: [PATCH 2/7] Allow adding extra compile options in LLNL tests

This patch provides a somewhat ad-hoc way of adding compilation options
to particular tests in the LLNL/openmp5.0-tests suite. In the test_list
file, just write options after a forward slash:

  foo.cpp / --extra-opt
---
 test/LLNL/openmp5.0-tests/test.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/test/LLNL/openmp5.0-tests/test.py b/test/LLNL/openmp5.0-tests/test.py
index 9c3096b70..3c25f0118 100755
--- a/test/LLNL/openmp5.0-tests/test.py
+++ b/test/LLNL/openmp5.0-tests/test.py
@@ -19,13 +19,19 @@
 
 def get_tests(file_name):
     d=defaultdict(list)
+    o=defaultdict(lambda: '')
     with open(file_name,"r") as infile:
         for line in infile.readlines():
             #print(line)
-            sline=line.split()
+            if '/' in line:
+                files,opts = line.split('/')
+                sline=files.split()
+                o[sline[0][:-4]]=opts
+            else:
+                sline=line.split()
             for s in sline:
                 d[sline[0][:-4]].append(s)
-    return d
+    return d,o
 
 def run(tests):
     pass_count=0
@@ -54,7 +60,7 @@ def run(tests):
             pass_count+=1
     return pass_count
 
-def compile(CC,LIBS, tests):
+def compile(CC, LIBS, tests, opts):
 
     runnables=[]
     for key,value in tests.items():
@@ -62,7 +68,8 @@ def compile(CC,LIBS, tests):
             passs=True
             for v in value:
                 fail=False
-                cmd=CC+" -c "+ v
+                extraopts=opts[key]
+                cmd=CC+" -c " + v + " " + extraopts
                 cmd_s=cmd.split()
                 p2=subprocess.Popen(cmd_s,stdout=subprocess.PIPE,stderr=subprocess.STDOUT)
                 stdout,stderr = p2.communicate()
@@ -98,12 +105,12 @@ def compile(CC,LIBS, tests):
                     runnables.append(key)
     return runnables
 def main():
-    tests=get_tests("test_list")
+    tests,opts=get_tests("test_list")
 # Change Compile line in CC and LIBS
     CC="{}/bin/clang++  -O2  -target x86_64-pc-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march={}".format(AOMP, AOMP_GPU)
     LIBS = ""
 # End Compile line 
-    runnables=compile(CC,LIBS, tests)
+    runnables=compile(CC, LIBS, tests, opts)
     print("\nRunnable tests are:")
     for r in runnables:
         print(r)

From 1bf2b3d9919258e39657fad99fd78e4c9b57e394 Mon Sep 17 00:00:00 2001
From: Julian Brown <julian.brown@amd.com>
Date: Mon, 17 Jun 2024 11:10:29 -0500
Subject: [PATCH 3/7] Add timeout detection, and also increase timeout for LLNL
 tests

This patch fixes a mysterious bug with the LLNL test script: if it takes
more than two minutes to run, it silently exits without producing any
output (with logging enabled).  This can happen for debug builds of the
compiler, even on a fast machine.

This happens because the underlying Python script buffers output because
it knows it is writing to a pipe.  But if the 'timeout' command kills
that script before it has completed, all its output is then discarded.

The fix is twofold: firstly the "timed-out" condition is now detected
in the check_LLNL.sh script, and secondly the timeout time is increased.
---
 test/LLNL/openmp5.0-tests/check_LLNL.sh | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/test/LLNL/openmp5.0-tests/check_LLNL.sh b/test/LLNL/openmp5.0-tests/check_LLNL.sh
index 9062a0bad..03c34d1cd 100755
--- a/test/LLNL/openmp5.0-tests/check_LLNL.sh
+++ b/test/LLNL/openmp5.0-tests/check_LLNL.sh
@@ -50,7 +50,15 @@ if [ "$1" == "log" ]; then
     log="LLNL.run.log.$date"
   fi
   echo "Log enabled: $log"
- timeout 120 ./test.py $AOMP $AOMP_GPU 2>&1 | tee $log
+ timeout 480 ./test.py $AOMP $AOMP_GPU 2>&1 | tee $log
+ if [ ${PIPESTATUS[0]} -eq 124 ]; then
+   echo "WARNING: Testing timed out!" >&2
+   exit 1
+ fi
 else
- timeout 120 ./test.py $AOMP $AOMP_GPU
+ timeout 480 ./test.py $AOMP $AOMP_GPU
+ if [ $? -eq 124 ]; then
+   echo "WARNING: Testing timed out!" >&2
+   exit 1
+ fi
 fi

From 59600c5fb3c0b6f91da8c0c36943e6da2c453d21 Mon Sep 17 00:00:00 2001
From: Julian Brown <julian.brown@amd.com>
Date: Wed, 19 Jun 2024 11:58:21 -0500
Subject: [PATCH 4/7] Enable imperfect loop collapse analysis option

This patch adds the -Ropenmp-opt=analysis option to the compilation
of the test_imperfect_loop.cxx LLNL/openmp5.0-tests test, and a new
pseudo-test to check that the right 'remark' is emitted by the compiler.

This depends on the upstream PR:

  https://github.com/llvm/llvm-project/pull/96087
---
 .../test_imperfect_loop_warning.cxx           | 34 +++++++++++++++++++
 test/LLNL/openmp5.0-tests/test_list           |  3 +-
 2 files changed, 36 insertions(+), 1 deletion(-)
 create mode 100644 test/LLNL/openmp5.0-tests/test_imperfect_loop_warning.cxx

diff --git a/test/LLNL/openmp5.0-tests/test_imperfect_loop_warning.cxx b/test/LLNL/openmp5.0-tests/test_imperfect_loop_warning.cxx
new file mode 100644
index 000000000..d71e41ec4
--- /dev/null
+++ b/test/LLNL/openmp5.0-tests/test_imperfect_loop_warning.cxx
@@ -0,0 +1,34 @@
+/* Scan error output from the compilation of "test_imperfect_loop.cxx".
+   Every problem is a hammer here.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+int main (void) {
+  FILE *f = fopen ("test_imperfect_loop.ERR", "r");
+  size_t bufsize = 1024;
+  char *buf = (char*) malloc (bufsize);
+  bool found = false;
+
+  if (!f) {
+    perror ("fopen");
+    goto fail;
+  }
+  while (!feof (f) && !found) {
+    getline (&buf, &bufsize, f);
+    if (strstr (buf, "remark: Collapsing imperfectly-nested loop may "
+                     "introduce unexpected data dependencies"))
+      found = true;
+  }
+  fclose (f);
+
+  if (found) {
+    printf ("Imperfect loop warning test:: Pass\n");
+    exit (0);
+  }
+
+fail:
+  printf ("Imperfect loop warning test:: FAIL\n");
+  return 1;
+}
diff --git a/test/LLNL/openmp5.0-tests/test_list b/test/LLNL/openmp5.0-tests/test_list
index ba5bab593..3463e91b4 100644
--- a/test/LLNL/openmp5.0-tests/test_list
+++ b/test/LLNL/openmp5.0-tests/test_list
@@ -3,7 +3,8 @@ test_allocate_directive.cxx
 test_concurrent_map.cxx
 test_implicit_declare_target.cxx foo.cxx
 test_declare_variant.cxx
-test_imperfect_loop.cxx
+test_imperfect_loop.cxx / -Rpass-analysis=openmp-opt
+test_imperfect_loop_warning.cxx
 test_loop.cxx
 test_lvalue_map_motion.cxx
 test_non_rectangular.cxx

From 754539b575943194d2f215e43466a1b86fafdc89 Mon Sep 17 00:00:00 2001
From: Julian Brown <julian.brown@amd.com>
Date: Mon, 17 Jun 2024 11:12:13 -0500
Subject: [PATCH 5/7] XFAIL support for LLNL/openmp5.0-tests tests

The test test_imperfect_loop.cxx contains a loop collapse operation that
cannot be executed safely in parallel, so should be marked as expected
to fail, somehow.  This patch adds a basic way to do that.
---
 bin/rocm-test/scripts/parse_LLNL.sh  | 7 +++++++
 test/LLNL/openmp5.0-tests/xfail-list | 1 +
 2 files changed, 8 insertions(+)
 create mode 100644 test/LLNL/openmp5.0-tests/xfail-list

diff --git a/bin/rocm-test/scripts/parse_LLNL.sh b/bin/rocm-test/scripts/parse_LLNL.sh
index 0692b04a3..90dad81e0 100755
--- a/bin/rocm-test/scripts/parse_LLNL.sh
+++ b/bin/rocm-test/scripts/parse_LLNL.sh
@@ -26,6 +26,9 @@ fi
 if [ -e failing-tests.txt ]; then
   rm failing-tests.txt
 fi
+if [ -e xfail-tests.txt ]; then
+  rm xfail-tests.txt
+fi
 if [ -e make-fail.txt ]; then
   rm make-fail.txt
 fi
@@ -49,6 +52,10 @@ function parse(){
       fi
       [[ "$line" =~ $testname_regex ]]
       llnltest=${BASH_REMATCH[1]}
+      if [ "$outfile" = "failing-tests.txt" ] && \
+         grep -Fq "$llnltest" xfail-list; then
+        outfile="xfail-tests.txt"
+      fi
       echo "$llnltest" >> $outfile
     fi
   done < "$infile"
diff --git a/test/LLNL/openmp5.0-tests/xfail-list b/test/LLNL/openmp5.0-tests/xfail-list
new file mode 100644
index 000000000..e1a5abf68
--- /dev/null
+++ b/test/LLNL/openmp5.0-tests/xfail-list
@@ -0,0 +1 @@
+test_imperfect_loop run

From f48ea60e401c14d6b3a86ad72ba68f7db5b870c4 Mon Sep 17 00:00:00 2001
From: Julian Brown <julian.brown@amd.com>
Date: Fri, 21 Jun 2024 07:30:00 -0500
Subject: [PATCH 6/7] Robustify parse_LLNL.sh script a bit

This patch adds some error checking to the parse_LLNL.sh script, and
avoids parsing `ls` output when looking for the results file.
---
 bin/rocm-test/scripts/parse_LLNL.sh | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/bin/rocm-test/scripts/parse_LLNL.sh b/bin/rocm-test/scripts/parse_LLNL.sh
index 90dad81e0..e6a1a18e2 100755
--- a/bin/rocm-test/scripts/parse_LLNL.sh
+++ b/bin/rocm-test/scripts/parse_LLNL.sh
@@ -16,8 +16,14 @@ testname_regex='.*(test_\S*)'
 compile_regex='Compilation.*failed'
 runtime_regex='Running.+\.\.\.\s+([A-Z]*[a-z]*)'
 
-cd "$aompdir/test/LLNL/openmp5.0-tests"
-infile=`ls | grep "LLNL.run.log"`
+cd "$aompdir/test/LLNL/openmp5.0-tests" || exit 1
+declare -a infiles
+infiles=( LLNL.run.log* )
+if [ "${#infiles[@]}" -ne 1 ]; then
+  echo "Expected to find a single result file, bailing out" >&2
+  exit 1
+fi
+infile=${infiles[0]}
 
 # Clean up before parsing
 if [ -e passing-tests.txt ]; then

From 47193fc3ee19bc44c290d87992229c0612ce51f6 Mon Sep 17 00:00:00 2001
From: Julian Brown <julian.brown@amd.com>
Date: Mon, 17 Jun 2024 11:19:47 -0500
Subject: [PATCH 7/7] Split imperfect-loop-collapse into usm/non-usm variants

This patch splits the smoke-fails/imperfect-loop-collapse test into two
variants, one of which does and one which doesn't need unified shared
memory (which is orthogonal to the main purpose of the test).  It also
fixes the HSA_XNACK setting for smoke-fails/imperfect-loop-collapse-usm
version, so both versions now work.
---
 .../imperfect-loop-collapse-usm/Makefile      | 25 +++++++++++++
 .../imperfect_loop_collapse_usm.cpp           | 35 +++++++++++++++++++
 .../imperfect-loop-collapse/Makefile          |  5 +--
 .../imperfect_loop_collapse.cpp               |  5 ++-
 4 files changed, 63 insertions(+), 7 deletions(-)
 create mode 100644 test/smoke-fails/imperfect-loop-collapse-usm/Makefile
 create mode 100644 test/smoke-fails/imperfect-loop-collapse-usm/imperfect_loop_collapse_usm.cpp

diff --git a/test/smoke-fails/imperfect-loop-collapse-usm/Makefile b/test/smoke-fails/imperfect-loop-collapse-usm/Makefile
new file mode 100644
index 000000000..7b09b6a44
--- /dev/null
+++ b/test/smoke-fails/imperfect-loop-collapse-usm/Makefile
@@ -0,0 +1,25 @@
+include ../../Makefile.defs
+
+TESTNAME     = imperfect_loop_collapse_usm
+TESTSRC_MAIN = imperfect_loop_collapse_usm.cpp
+TESTSRC_AUX  =
+TESTSRC_ALL  = $(TESTSRC_MAIN) $(TESTSRC_AUX)
+
+CLANG        ?= clang++
+OMP_BIN      = $(AOMP)/bin/$(CLANG)
+CC           = $(OMP_BIN) $(VERBOSE)
+
+HSA_XNACK ?= 1
+SUPPORTED    = $(SUPPORTS_USM)
+
+# Our "run" target gets overridden.  Make sure we run with HSA_XNACK set
+# appropriately.
+RUNENV	     += HSA_XNACK=${HSA_XNACK}
+
+#-ccc-print-phases
+#"-\#\#\#"
+
+include ../Makefile.rules
+
+run:
+	HSA_XNACK=${HSA_XNACK} ./$(TESTNAME)
diff --git a/test/smoke-fails/imperfect-loop-collapse-usm/imperfect_loop_collapse_usm.cpp b/test/smoke-fails/imperfect-loop-collapse-usm/imperfect_loop_collapse_usm.cpp
new file mode 100644
index 000000000..4be83e0a5
--- /dev/null
+++ b/test/smoke-fails/imperfect-loop-collapse-usm/imperfect_loop_collapse_usm.cpp
@@ -0,0 +1,35 @@
+#include<cstdio>
+
+#define N 1024
+
+#pragma omp requires unified_shared_memory
+
+int main() {
+  double *a = new double[N*N];
+
+  #pragma omp parallel for collapse(2)
+  for(int i = 0; i < N; i++)
+    for(int j = 0; j < N; j++)
+      a[i*N+j] = (double) (i*N+j);
+
+  #pragma omp target teams distribute parallel for collapse(2)
+  for(int i = 0; i < N; i++) {
+    double k = i*3.14;
+    for(int j = 0; j < N; j++)
+      a[i*N+j] += k;
+  }
+
+  //check
+  int err = 0;
+  for(int i = 0; i < N; i++) {
+    double k = i*3.14;
+    for(int j = 0; j < N; j++)
+      if (a[i*N+j] != (double) (i*N+j) + k) {
+        err++;
+        printf("Error at (%d,%d): got %lf expected %lf\n", i, j, a[i*N+j], (double) (i*N+j) + k);
+        if (err > 10) return err;
+      }
+  }
+
+  return err;
+}
diff --git a/test/smoke-fails/imperfect-loop-collapse/Makefile b/test/smoke-fails/imperfect-loop-collapse/Makefile
index eb0665a22..7edc5fa32 100644
--- a/test/smoke-fails/imperfect-loop-collapse/Makefile
+++ b/test/smoke-fails/imperfect-loop-collapse/Makefile
@@ -9,13 +9,10 @@ CLANG        ?= clang++
 OMP_BIN      = $(AOMP)/bin/$(CLANG)
 CC           = $(OMP_BIN) $(VERBOSE)
 
-HSA_XNACK ?= 1
-SUPPORTED    = $(SUPPORTS_USM)
-
 #-ccc-print-phases
 #"-\#\#\#"
 
 include ../Makefile.rules
 
 run:
-	HSA_XNACK=${HSA_XNACK} ./$(TESTNAME)
+	./$(TESTNAME)
diff --git a/test/smoke-fails/imperfect-loop-collapse/imperfect_loop_collapse.cpp b/test/smoke-fails/imperfect-loop-collapse/imperfect_loop_collapse.cpp
index 4be83e0a5..11818bb4b 100644
--- a/test/smoke-fails/imperfect-loop-collapse/imperfect_loop_collapse.cpp
+++ b/test/smoke-fails/imperfect-loop-collapse/imperfect_loop_collapse.cpp
@@ -2,8 +2,6 @@
 
 #define N 1024
 
-#pragma omp requires unified_shared_memory
-
 int main() {
   double *a = new double[N*N];
 
@@ -12,7 +10,8 @@ int main() {
     for(int j = 0; j < N; j++)
       a[i*N+j] = (double) (i*N+j);
 
-  #pragma omp target teams distribute parallel for collapse(2)
+  #pragma omp target teams distribute parallel for collapse(2) \
+                     map(tofrom: a[0:N*N])
   for(int i = 0; i < N; i++) {
     double k = i*3.14;
     for(int j = 0; j < N; j++)