Skip to content

Commit

Permalink
Merge pull request #4177 from martin-frbg/issue4176
Browse files Browse the repository at this point in the history
Fix ZAXPY calls with INCX=0 on pre-AVX x86_64 and add utest
  • Loading branch information
martin-frbg authored Aug 4, 2023
2 parents c2f4bdb + 862d06a commit ef23240
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 2 deletions.
4 changes: 2 additions & 2 deletions kernel/x86_64/zaxpy_sse2.S
Original file line number Diff line number Diff line change
Expand Up @@ -1418,10 +1418,10 @@
movq M, %rax
//If incx==0 || incy==0, avoid unloop and jump to end.
cmpq $0, INCX
je .L58
jne .L59
cmpq $0, INCY
je .L58

.L59:
sarq $3, %rax
jle .L55

Expand Down
39 changes: 39 additions & 0 deletions utest/test_axpy.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,26 @@ CTEST(axpy,zaxpy_inc_0)
ASSERT_DBL_NEAR_TOL(y2[i], y1[i], DOUBLE_EPS);
}
}

CTEST(axpy,zaxpy_incx_0)
{
blasint i;
blasint N=4,incX=0,incY=1;
double a[2]={0.25,0.5};
double x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
double y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
double x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
double y2[]={0.75,5.25,4.75,9.25,0.75,5.25,4.75,9.25};

//OpenBLAS
BLASFUNC(zaxpy)(&N,a,x1,&incX,y1,&incY);

for(i=0; i<2*N; i++){
ASSERT_DBL_NEAR_TOL(x2[i], x1[i], DOUBLE_EPS);
ASSERT_DBL_NEAR_TOL(y2[i], y1[i], DOUBLE_EPS);
}
}

#endif

#ifdef BUILD_SINGLE
Expand Down Expand Up @@ -116,5 +136,24 @@ CTEST(axpy,caxpy_inc_0)
ASSERT_DBL_NEAR_TOL(y2[i], y1[i], DOUBLE_EPS);
}
}

CTEST(axpy,caxpy_incx_0)
{
blasint i;
blasint N=4,incX=0,incY=1;
float a[2]={0.25,0.5};
float x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
float y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
double x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
double y2[]={0.75,5.25,4.75,9.25,0.75,5.25,4.75,9.25};

//OpenBLAS
BLASFUNC(caxpy)(&N,a,x1,&incX,y1,&incY);

for(i=0; i<2*N; i++){
ASSERT_DBL_NEAR_TOL(x2[i], x1[i], DOUBLE_EPS);
ASSERT_DBL_NEAR_TOL(y2[i], y1[i], DOUBLE_EPS);
}
}
#endif

0 comments on commit ef23240

Please sign in to comment.