[build] Add performance testing of PRs vs current (antlr#4023)

* Add workflow and script to performance testing a PR. * Fix typo in workflow. * Alter workflow and a grammar to test. * Fix prerequisites for this script. * Typo. * Typo. * Fix global/local Trash tool issues. * Fix up Octave install and call. * Typo. * Add Octave statistics. Print out results. * Add in Student t test and output signifance. * Remove Bash trasce and add printfs for no-change reuslts. * Add bash echo back in to find problem in redirect. Test on .g4 changes, and only once. * Cat out the octave program to verify. * Change to Welch's test, and add in 'practical' difference test of 5%. * Reformat longest grammar to force testing on it. * Updates to test tip vs last PR. * Update test list computation. * Fix typo. * Update concurrency. Cancel this action if (1) workflow name the same; (2) the branch is the same; (3) the PR number is the same. Otherwise, don't cancel prior jobs. * Scale number or times to run test so that is min of 40 times, or whatever in 10 minutes. * Fix typo. * Fix for-loop. * Convert float to int as Bash cannot handle floats well. * Turn off echo and remove extraneous echo. * Remove temporary file.
hellozrh · Mar 29, 2024 · c7883a5 · c7883a5
1 parent ecbfb4c
commit c7883a5
Show file tree

Hide file tree

Showing 5 changed files with 459 additions and 88 deletions.
diff --git a/.github/workflows/perf.yml b/.github/workflows/perf.yml
@@ -0,0 +1,88 @@
+name: Perf analysis of what has changed
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+    branches: [ master ]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event.pull_request.number }}
+  cancel-in-progress: true
+
+jobs:
+  perf:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Info
+      shell: bash
+      run: |
+        arch
+        uname -a
+        if [ -f /proc/cpuinfo ]; then cat /proc/cpuinfo; fi
+    - name: Install Octave
+      run: |
+        sudo apt install octave
+        sudo apt install octave-statistics
+        octave --version
+    - name: Checkout
+      uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+    - name: Install Dotnet
+      uses: actions/[email protected]
+      with:
+        dotnet-version: '8.0.x'
+    - name: Test Dotnet
+      run: |
+        dotnet --version
+        dotnet --info
+        dotnet --list-runtimes
+        dotnet --list-sdks
+    - name: Install Java
+      uses: actions/setup-java@v4
+      with:
+        java-version: '11'
+        distribution: 'zulu'
+    - name: Test Java
+      run: |
+        dotnet --version
+        java --version
+        javac --version
+    - name: Install Antlr tool
+      run: |
+         pip install antlr4-tools
+    - name: Install JavaScript
+      if: ${{ matrix.language == 'JavaScript' }}
+      uses: actions/[email protected]
+      with:
+        node-version: '16.13.0'
+    - name: Test JavaScript
+      if: ${{ matrix.language == 'JavaScript' }}
+      run: |
+        node --version
+    - name: Update paths
+      shell: pwsh
+      run: |
+        if ("${{ matrix.os }}" -eq "ubuntu-latest") {
+            echo "$HOME/.dotnet/tools" >> $env:GITHUB_PATH
+        }
+    - name: Install Trash
+      shell: bash
+      run: |
+        dotnet tool restore
+    - name: Test Trash install
+      shell: bash
+      run: |
+        dotnet trgen -- --help
+    - name: Test
+      shell: bash
+      run: |
+        if [ "${{github.event_name}}" == "pull_request" ] ; then
+            Before="${{github.event.pull_request.base.sha}}"
+            After="${{github.event.pull_request.head.sha}}"
+        else
+            Before="${{github.event.before}}"
+            After="${{github.event.after}}"
+        fi
+        bash _scripts/perf-changed.sh -b "$Before" -a "$After"
+
diff --git a/_scripts/perf-changed.sh b/_scripts/perf-changed.sh
@@ -0,0 +1,270 @@
+#
+
+# set -x
+set -e
+
+# Check requirements.
+if ! command -v dotnet &> /dev/null
+then
+    echo "'dotnet' could not be found. Install Microsoft NET."
+    exit 1
+fi
+if ! command -v trxml2 &> /dev/null
+then
+    local=1
+fi
+if ! command -v dotnet trxml2 -- --version &> /dev/null
+then
+    echo "'dotnet' could not be found. Install Microsoft NET."
+    exit 1
+fi
+
+cwd=`pwd`
+
+while getopts 'a:b:' opt; do
+    case "$opt" in
+        a)
+            after="${OPTARG}"
+            ;;
+        b)
+            before="${OPTARG}"
+            ;;
+    esac
+done
+
+if [ "$after" == "" ]
+then
+    echo "'after' not set."
+    exit 1
+fi
+if [ "$before" == "" ]
+then
+    echo "'before' not set."
+    exit 1
+fi
+
+#############################
+#############################
+# Get last commit/pr. Note, some of the PR merges don't
+# fit the pattern, but we'll ignore them. Get "prior" commit before all these
+# changes.
+prs=( After Before )
+com=( $after $before )
+echo PRS = ${prs[@]}
+echo COM = ${com[@]}
+echo '#PRS' = ${#prs[@]}
+
+# Clean up.
+for ((i=0; i<${#prs[@]}; i++))
+do
+    rm -rf "$cwd/${prs[$i]}"
+done
+rm -rf `find . -name 'Generated-*'`
+
+# The PR that is more recent is the first in the list.
+# Get grammars changed for current PR. This will focus exactly on what to
+# test. Note, we only consider .g4 changes, no examples, no duplicates.
+tests=()
+changes=`git diff --name-only ${com[0]} ${com[1]} | grep '[.]g4$' | sed 's#\(.*\)[/][^/]*$#\1#' | sort -u | grep -v _scripts | fgrep -v .github | fgrep -v examples | sort -u | tr -d '\r'`
+echo Changed files = $changes
+
+#############################
+echo Computing grammars changed...
+prefix=`pwd`
+for g in ${changes[@]}
+do
+    if [ ! -d "$g" ]; then continue; fi
+    pushd $g > /dev/null 2> /dev/null
+    while true
+    do
+        if [ -f `pwd`/desc.xml ]
+        then
+            break
+        elif [ `pwd` == "$prefix" ]
+        then
+            break
+        fi
+        cd ..
+    done
+    g=`pwd`
+    g=${g##*$prefix}
+    g=${g##/}
+    if [ "$g" == "" ]; then continue; fi
+    if [ -f desc.xml ]
+    then
+        if [ "$local" == "" ]
+        then
+            gtargets=`trxml2 desc.xml | fgrep -e '/desc/targets' | awk -F '=' '{print $2}' | tr ';' '\n' | fgrep -e 'Java' | fgrep -v 'JavaScript'`
+        else
+            gtargets=`dotnet trxml2 -- desc.xml | fgrep -e '/desc/targets' | awk -F '=' '{print $2}' | tr ';' '\n' | fgrep -e 'Java' | fgrep -v 'JavaScript'`
+        fi
+        if [ "$gtargets" == "" ]; then continue; fi
+    fi
+    tests=( ${tests[@]} $g )
+    popd > /dev/null
+done
+echo Grammars to test = ${tests[@]}
+
+#############################
+echo Build each grammar changed in PR.
+for ((i=0; i<${#prs[@]}; i++))
+do
+    rm -rf "$cwd/${prs[$i]}"
+    mkdir "$cwd/${prs[$i]}"
+    git checkout ${com[$i]}
+    for g in ${tests[@]}
+    do
+        echo Grammar $g
+        pushd $g
+        gg=`echo $g | tr '/' '-'`
+        if [ "$local" == "" ]
+        then
+            trgen -t CSharp
+        else
+            dotnet trgen -- -t CSharp
+        fi
+        where=`echo Generated-CSharp* | tr ' ' '\n' | head -1`
+        echo $where
+        cd $where
+        make
+        popd
+    cp -r $g "$cwd/${prs[$i]}/$gg"
+    done
+done
+
+#===========================
+echo Test each grammar and PR in turn.
+for g in ${tests[@]}
+do
+    echo Grammar $g
+    rm -f "$cwd"/p[0-1]*
+    gg=`echo $g | tr '/' '-'`
+    what=()
+    for ((i=0; i<${#prs[@]}; i++))
+    do
+        pushd "$cwd/${prs[$i]}/$gg"
+        where=`echo Generated-CSharp* | tr ' ' '\n' | head -1`
+        echo $where
+        cd $where
+        if [ "${#what[@]}" -eq 0 ]
+        then
+            if [ "$local" == "" ]
+            then
+                what=`trxml2 desc.xml | grep inputs | head -1 | sed 's%^[^=]*=%%'`
+            else
+                what=`dotnet trxml2 -- desc.xml | grep inputs | head -1 | sed 's%^[^=]*=%%'`
+            fi
+            if [ "$what" == "" ]
+            then
+                dir=`pwd`
+                p=`realpath -s --relative-to=$dir "$cwd/${prs[1]}/$gg/examples"`
+                if [ "$local" == "" ]
+                then
+                    what=( `trglob $p | grep -v '.errors$' | grep -v '.tree$'` )
+                else
+                    what=( `dotnet trglob -- $p | grep -v '.errors$' | grep -v '.tree$'` )
+                fi
+            else
+                dir=`pwd`
+                p=`realpath -s --relative-to=$dir "$cwd/${prs[1]}/$gg/$what"`
+                if [ "$local" == "" ]
+                then
+                    what=( `trglob $p | grep -v '.errors$' | grep -v '.tree$'` )
+                else
+                    what=( `dotnet trglob -- $p | grep -v '.errors$' | grep -v '.tree$'` )
+                fi
+            fi
+            echo what = $what
+            newwhat=()
+            for f in ${what[@]}
+            do
+                if [ -d $f ]; then continue; fi
+                newwhat=( ${newwhat[@]} $f )
+            done
+            what=( ${newwhat[@]} )
+            if [ ${#what[@]} -eq 0 ]; then popd; continue; fi
+        fi
+        # Try first and scale number of times to work in 10 minutes tops.
+        # Format is in seconds, in floating point format.
+        runtime=`bash run.sh ${what[@]} 2>&1 | grep "Total Time" | awk '{print $3}'`
+        times=`python -c "print(int(min(40,600/$runtime)))"`
+        for ((j=1;j<=times;j++)); do
+            bash run.sh ${what[@]} 2>&1 | grep "Total Time" | awk '{print $3}' >> "$cwd/p$i-$gg.txt"
+        done
+        popd
+    done
+
+    echo Graphing out.
+    cd $cwd
+    rm -f xx.m
+    echo "pkg load statistics" >> xx.m
+    for ((i=0; i<${#prs[@]}; i++))
+    do
+        echo "p$i=["`cat "$cwd/p$i-$gg.txt"`"];" >> xx.m
+        echo "mp$i=mean(p$i);" >> xx.m
+        echo "sd$i=std(p$i);" >> xx.m
+        echo "printf('disp($i)\n');" >> xx.m
+        echo "disp($i);" >> xx.m
+        echo "printf('disp(p$i)\n');" >> xx.m
+        echo "disp(p$i);" >> xx.m
+        echo "printf('mp$i = %f\n', mp$i);" >> xx.m
+        echo "printf('sd$i = %f\n', sd$i);" >> xx.m
+    done
+    echo -n "x = [" >> xx.m
+    for ((i=1; i<=${#prs[@]}; i++))
+    do
+        echo -n " $i" >> xx.m
+    done
+    echo "];" >> xx.m
+    echo -n "str = [ " >> xx.m
+    for ((i=0; i<${#prs[@]}; i++))
+    do
+        if [ "$i" != "0" ]; then echo -n "; " >> xx.m; fi
+        echo -n " '"PR${prs[$i]}"'" >> xx.m
+    done
+    echo " ];" >> xx.m
+    echo -n "data = [" >> xx.m
+    for ((i=0; i<${#prs[@]}; i++))
+    do
+        echo -n " mp$i" >> xx.m
+    done
+    echo " ];" >> xx.m
+    echo -n "errhigh = [" >> xx.m
+    for ((i=0; i<${#prs[@]}; i++))
+    do
+        echo -n " sd$i" >> xx.m
+    done
+    echo " ];" >> xx.m
+    echo -n "errlow = [" >> xx.m
+    for ((i=0; i<${#prs[@]}; i++))
+    do
+        echo -n " sd$i" >> xx.m
+    done
+    echo " ];" >> xx.m
+    cat >> xx.m <<EOF
+        bar(x,data);
+        set(gca, 'XTickLabel', str, 'XTick', 1:numel(x));
+        hold on
+        er = errorbar(x,data,errlow,errhigh);
+        hold off
+        set(er, "color", [0 0 0])
+        set(er, "linewidth", 3);
+        set(er, "linestyle", "none");
+        set(gca, "fontsize", 6)
+        xlabel("Target");
+        ylabel("Runtime (s)");
+        title("Comparison of Runtimes")
+        print("./times-$gg.svg", "-dsvg")
+        [pval, t, df] = welch_test(p0, p1)
+        if (abs(pval) < 0.03 && mp0/mp1 > 1.05)
+          printf("The PR statistically and practically decreased performance for $gg.\n");
+        else
+          printf("The PR did not signficantly negatively alter performance for $gg.\n");
+        endif
+EOF
+    echo ========
+    cat xx.m
+    echo ========
+    cat xx.m | octave --no-gui
+
+done
diff --git a/sql/plsql/PlSqlLexer.g4 b/sql/plsql/PlSqlLexer.g4
@@ -2363,16 +2363,16 @@ LEAST            : 'LEAST';
 GREATEST         : 'GREATEST';
 TO_DATE          : 'TO_DATE';
 
-CHARSETID        : 'CHARSETID';
-CHARSETFORM      : 'CHARSETFORM';
-DURATION         : 'DURATION';
-EXTEND           : 'EXTEND';
-MAXLEN           : 'MAXLEN';
-PERSISTABLE      : 'PERSISTABLE';
-POLYMORPHIC      : 'POLYMORPHIC';
-STRUCT           : 'STRUCT';
-TDO              : 'TDO';
-WM_CONCAT        : 'WM_CONCAT';
+CHARSETID   : 'CHARSETID';
+CHARSETFORM : 'CHARSETFORM';
+DURATION    : 'DURATION';
+EXTEND      : 'EXTEND';
+MAXLEN      : 'MAXLEN';
+PERSISTABLE : 'PERSISTABLE';
+POLYMORPHIC : 'POLYMORPHIC';
+STRUCT      : 'STRUCT';
+TDO         : 'TDO';
+WM_CONCAT   : 'WM_CONCAT';
 
 // Rule #358 <NATIONAL_CHAR_STRING_LIT> - subtoken typecast in <REGULAR_ID>, it also incorporates <character_representation>
 //  Lowercase 'n' is a usual addition to the standard