Skip to content

Commit

Permalink
Add ASIMD Small GEMM kernels
Browse files Browse the repository at this point in the history
These are an experiment to see whether or not we can improve performance a bit on 128-bit SVE cores by using ASIMD instead.
  • Loading branch information
Mousius committed Nov 4, 2024
1 parent 453b9e4 commit 2f251c1
Show file tree
Hide file tree
Showing 11 changed files with 4,150 additions and 16 deletions.
18 changes: 18 additions & 0 deletions kernel/arm64/KERNEL.ARMV8
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,15 @@ endif
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)

SGEMM_SMALL_K_NT = sgemm_small_kernel_nt_asimd.c
SGEMM_SMALL_K_B0_NT = sgemm_small_kernel_nt_asimd.c
SGEMM_SMALL_K_NN = sgemm_small_kernel_nn_asimd.c
SGEMM_SMALL_K_B0_NN = sgemm_small_kernel_nn_asimd.c
SGEMM_SMALL_K_TT = sgemm_small_kernel_tt_asimd.c
SGEMM_SMALL_K_B0_TT = sgemm_small_kernel_tt_asimd.c
SGEMM_SMALL_K_TN = sgemm_small_kernel_tn_asimd.c
SGEMM_SMALL_K_B0_TN = sgemm_small_kernel_tn_asimd.c

DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S

Expand Down Expand Up @@ -171,6 +180,15 @@ endif
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)

DGEMM_SMALL_K_NT = dgemm_small_kernel_nt_asimd.c
DGEMM_SMALL_K_B0_NT = dgemm_small_kernel_nt_asimd.c
DGEMM_SMALL_K_NN = dgemm_small_kernel_nn_asimd.c
DGEMM_SMALL_K_B0_NN = dgemm_small_kernel_nn_asimd.c
DGEMM_SMALL_K_TT = dgemm_small_kernel_tt_asimd.c
DGEMM_SMALL_K_B0_TT = dgemm_small_kernel_tt_asimd.c
DGEMM_SMALL_K_TN = dgemm_small_kernel_tn_asimd.c
DGEMM_SMALL_K_B0_TN = dgemm_small_kernel_tn_asimd.c

CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N))
Expand Down
50 changes: 34 additions & 16 deletions kernel/arm64/KERNEL.ARMV8SVE
Original file line number Diff line number Diff line change
Expand Up @@ -132,14 +132,14 @@ SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)

SGEMM_SMALL_M_PERMIT = gemm_small_kernel_permit_sve.c
SGEMM_SMALL_K_NT = sgemm_small_kernel_nt_sve.c
SGEMM_SMALL_K_B0_NT = sgemm_small_kernel_nt_sve.c
SGEMM_SMALL_K_NN = sgemm_small_kernel_nn_sve.c
SGEMM_SMALL_K_B0_NN = sgemm_small_kernel_nn_sve.c
SGEMM_SMALL_K_TT = sgemm_small_kernel_tt_sve.c
SGEMM_SMALL_K_B0_TT = sgemm_small_kernel_tt_sve.c
SGEMM_SMALL_K_TN = sgemm_small_kernel_tn_sve.c
SGEMM_SMALL_K_B0_TN = sgemm_small_kernel_tn_sve.c
# SGEMM_SMALL_K_NT = sgemm_small_kernel_nt_sve.c
# SGEMM_SMALL_K_B0_NT = sgemm_small_kernel_nt_sve.c
# SGEMM_SMALL_K_NN = sgemm_small_kernel_nn_sve.c
# SGEMM_SMALL_K_B0_NN = sgemm_small_kernel_nn_sve.c
# SGEMM_SMALL_K_TT = sgemm_small_kernel_tt_sve.c
# SGEMM_SMALL_K_B0_TT = sgemm_small_kernel_tt_sve.c
# SGEMM_SMALL_K_TN = sgemm_small_kernel_tn_sve.c
# SGEMM_SMALL_K_B0_TN = sgemm_small_kernel_tn_sve.c

STRMMUNCOPY_M = trmm_uncopy_sve_v1.c
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c
Expand All @@ -163,14 +163,14 @@ DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)

DGEMM_SMALL_M_PERMIT = gemm_small_kernel_permit_sve.c
DGEMM_SMALL_K_NT = dgemm_small_kernel_nt_sve.c
DGEMM_SMALL_K_B0_NT = dgemm_small_kernel_nt_sve.c
DGEMM_SMALL_K_NN = dgemm_small_kernel_nn_sve.c
DGEMM_SMALL_K_B0_NN = dgemm_small_kernel_nn_sve.c
DGEMM_SMALL_K_TT = dgemm_small_kernel_tt_sve.c
DGEMM_SMALL_K_B0_TT = dgemm_small_kernel_tt_sve.c
DGEMM_SMALL_K_TN = dgemm_small_kernel_tn_sve.c
DGEMM_SMALL_K_B0_TN = dgemm_small_kernel_tn_sve.c
# DGEMM_SMALL_K_NT = dgemm_small_kernel_nt_sve.c
# DGEMM_SMALL_K_B0_NT = dgemm_small_kernel_nt_sve.c
# DGEMM_SMALL_K_NN = dgemm_small_kernel_nn_sve.c
# DGEMM_SMALL_K_B0_NN = dgemm_small_kernel_nn_sve.c
# DGEMM_SMALL_K_TT = dgemm_small_kernel_tt_sve.c
# DGEMM_SMALL_K_B0_TT = dgemm_small_kernel_tt_sve.c
# DGEMM_SMALL_K_TN = dgemm_small_kernel_tn_sve.c
# DGEMM_SMALL_K_B0_TN = dgemm_small_kernel_tn_sve.c

DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c
DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c
Expand Down Expand Up @@ -227,3 +227,21 @@ ZHEMMUTCOPY_M = zhemm_utcopy_sve.c

ZSYMMUCOPY_M = zsymm_ucopy_sve.c
ZSYMMLCOPY_M = zsymm_lcopy_sve.c

SGEMM_SMALL_K_NT = sgemm_small_kernel_nt_asimd.c
SGEMM_SMALL_K_B0_NT = sgemm_small_kernel_nt_asimd.c
SGEMM_SMALL_K_NN = sgemm_small_kernel_nn_asimd.c
SGEMM_SMALL_K_B0_NN = sgemm_small_kernel_nn_asimd.c
SGEMM_SMALL_K_TT = sgemm_small_kernel_tt_asimd.c
SGEMM_SMALL_K_B0_TT = sgemm_small_kernel_tt_asimd.c
SGEMM_SMALL_K_TN = sgemm_small_kernel_tn_asimd.c
SGEMM_SMALL_K_B0_TN = sgemm_small_kernel_tn_asimd.c

DGEMM_SMALL_K_NT = dgemm_small_kernel_nt_asimd.c
DGEMM_SMALL_K_B0_NT = dgemm_small_kernel_nt_asimd.c
DGEMM_SMALL_K_NN = dgemm_small_kernel_nn_asimd.c
DGEMM_SMALL_K_B0_NN = dgemm_small_kernel_nn_asimd.c
DGEMM_SMALL_K_TT = dgemm_small_kernel_tt_asimd.c
DGEMM_SMALL_K_B0_TT = dgemm_small_kernel_tt_asimd.c
DGEMM_SMALL_K_TN = dgemm_small_kernel_tn_asimd.c
DGEMM_SMALL_K_B0_TN = dgemm_small_kernel_tn_asimd.c
18 changes: 18 additions & 0 deletions kernel/arm64/KERNEL.NEOVERSEV2
Original file line number Diff line number Diff line change
@@ -1 +1,19 @@
include $(KERNELDIR)/KERNEL.ARMV8SVE

SGEMM_SMALL_K_NT = sgemm_small_kernel_nt_asimd.c
SGEMM_SMALL_K_B0_NT = sgemm_small_kernel_nt_asimd.c
SGEMM_SMALL_K_NN = sgemm_small_kernel_nn_asimd.c
SGEMM_SMALL_K_B0_NN = sgemm_small_kernel_nn_asimd.c
SGEMM_SMALL_K_TT = sgemm_small_kernel_tt_asimd.c
SGEMM_SMALL_K_B0_TT = sgemm_small_kernel_tt_asimd.c
SGEMM_SMALL_K_TN = sgemm_small_kernel_tn_asimd.c
SGEMM_SMALL_K_B0_TN = sgemm_small_kernel_tn_asimd.c

DGEMM_SMALL_K_NT = dgemm_small_kernel_nt_asimd.c
DGEMM_SMALL_K_B0_NT = dgemm_small_kernel_nt_asimd.c
DGEMM_SMALL_K_NN = dgemm_small_kernel_nn_asimd.c
DGEMM_SMALL_K_B0_NN = dgemm_small_kernel_nn_asimd.c
DGEMM_SMALL_K_TT = dgemm_small_kernel_tt_asimd.c
DGEMM_SMALL_K_B0_TT = dgemm_small_kernel_tt_asimd.c
DGEMM_SMALL_K_TN = dgemm_small_kernel_tn_asimd.c
DGEMM_SMALL_K_B0_TN = dgemm_small_kernel_tn_asimd.c
Loading

0 comments on commit 2f251c1

Please sign in to comment.