Skip to content

Commit

Permalink
Merge of 94c0b26
Browse files Browse the repository at this point in the history
  • Loading branch information
nstester committed Oct 30, 2023
2 parents 4f06620 + 94c0b26 commit 4a58f9c
Show file tree
Hide file tree
Showing 19 changed files with 1,097 additions and 49 deletions.
6 changes: 6 additions & 0 deletions gcc/common/config/i386/cpuinfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -663,6 +663,12 @@ get_zhaoxin_cpu (struct __processor_model *cpu_model,
reset_cpu_feature (cpu_model, cpu_features2, FEATURE_F16C);
cpu_model->__cpu_subtype = ZHAOXIN_FAM7H_LUJIAZUI;
}
else if (model >= 0x5b)
{
cpu = "yongfeng";
CHECK___builtin_cpu_is ("yongfeng");
cpu_model->__cpu_subtype = ZHAOXIN_FAM7H_YONGFENG;
}
break;
default:
break;
Expand Down
10 changes: 5 additions & 5 deletions gcc/common/config/i386/i386-common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2098,6 +2098,7 @@ const char *const processor_names[] =
"pantherlake",
"intel",
"lujiazui",
"yongfeng",
"geode",
"k6",
"athlon",
Expand Down Expand Up @@ -2305,12 +2306,11 @@ const pta processor_alias_table[] =
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
| PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR, 0, P_NONE},
{"lujiazui", PROCESSOR_LUJIAZUI, CPU_LUJIAZUI,
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
| PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_BMI | PTA_BMI2
| PTA_PRFCHW | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE
| PTA_RDRND | PTA_MOVBE | PTA_ADX | PTA_RDSEED | PTA_POPCNT,
PTA_LUJIAZUI,
M_CPU_SUBTYPE (ZHAOXIN_FAM7H_LUJIAZUI), P_NONE},
{"yongfeng", PROCESSOR_YONGFENG, CPU_YONGFENG,
PTA_YONGFENG,
M_CPU_SUBTYPE (ZHAOXIN_FAM7H_YONGFENG), P_NONE},
{"k8", PROCESSOR_K8, CPU_K8,
PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
| PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR, 0, P_NONE},
Expand Down
1 change: 1 addition & 0 deletions gcc/common/config/i386/i386-cpuinfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ enum processor_subtypes
INTEL_COREI7_ARROWLAKE,
INTEL_COREI7_ARROWLAKE_S,
INTEL_COREI7_PANTHERLAKE,
ZHAOXIN_FAM7H_YONGFENG,
CPU_SUBTYPE_MAX
};

Expand Down
12 changes: 10 additions & 2 deletions gcc/config.gcc
Original file line number Diff line number Diff line change
Expand Up @@ -706,7 +706,7 @@ slm nehalem westmere sandybridge ivybridge haswell broadwell bonnell \
silvermont knl knm skylake-avx512 cannonlake icelake-client icelake-server \
skylake goldmont goldmont-plus tremont cascadelake tigerlake cooperlake \
sapphirerapids alderlake rocketlake eden-x2 nano nano-1000 nano-2000 nano-3000 \
nano-x2 eden-x4 nano-x4 lujiazui x86-64 x86-64-v2 x86-64-v3 x86-64-v4 \
nano-x2 eden-x4 nano-x4 lujiazui yongfeng x86-64 x86-64-v2 x86-64-v3 x86-64-v4 \
sierraforest graniterapids graniterapids-d grandridge arrowlake arrowlake-s \
clearwaterforest pantherlake native"

Expand Down Expand Up @@ -3811,6 +3811,10 @@ case ${target} in
arch=lujiazui
cpu=lujiazui
;;
yongfeng-*)
arch=yongfeng
cpu=yongfeng
;;
pentium2-*)
arch=pentium2
cpu=pentium2
Expand Down Expand Up @@ -3924,10 +3928,14 @@ case ${target} in
arch=k8
cpu=k8
;;
lujiazui-*)
lujiazui-*)
arch=lujiazui
cpu=lujiazui
;;
yongfeng-*)
arch=yongfeng
cpu=yongfeng
;;
nocona-*)
arch=nocona
cpu=nocona
Expand Down
5 changes: 5 additions & 0 deletions gcc/config/i386/driver-i386.cc
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
case 7:
if (model == 0x3b)
processor = PROCESSOR_LUJIAZUI;
else if (model >= 0x5b)
processor = PROCESSOR_YONGFENG;
break;
default:
break;
Expand Down Expand Up @@ -817,6 +819,9 @@ const char *host_detect_local_cpu (int argc, const char **argv)
case PROCESSOR_LUJIAZUI:
cpu = "lujiazui";
break;
case PROCESSOR_YONGFENG:
cpu = "yongfeng";
break;

default:
/* Use something reasonable. */
Expand Down
7 changes: 7 additions & 0 deletions gcc/config/i386/i386-c.cc
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__lujiazui");
def_or_undef (parse_in, "__lujiazui__");
break;
case PROCESSOR_YONGFENG:
def_or_undef (parse_in, "__yongfeng");
def_or_undef (parse_in, "__yongfeng__");
break;
case PROCESSOR_PENTIUM4:
def_or_undef (parse_in, "__pentium4");
def_or_undef (parse_in, "__pentium4__");
Expand Down Expand Up @@ -379,6 +383,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
case PROCESSOR_LUJIAZUI:
def_or_undef (parse_in, "__tune_lujiazui__");
break;
case PROCESSOR_YONGFENG:
def_or_undef (parse_in, "__tune_yongfeng__");
break;
case PROCESSOR_PENTIUM4:
def_or_undef (parse_in, "__tune_pentium4__");
break;
Expand Down
3 changes: 3 additions & 0 deletions gcc/config/i386/i386-options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,8 @@ along with GCC; see the file COPYING3. If not see
| m_TIGERLAKE | m_COOPERLAKE | m_ROCKETLAKE)

#define m_LUJIAZUI (HOST_WIDE_INT_1U<<PROCESSOR_LUJIAZUI)
#define m_YONGFENG (HOST_WIDE_INT_1U<<PROCESSOR_YONGFENG)
#define m_ZHAOXIN (m_LUJIAZUI | m_YONGFENG)

#define m_GEODE (HOST_WIDE_INT_1U<<PROCESSOR_GEODE)
#define m_K6 (HOST_WIDE_INT_1U<<PROCESSOR_K6)
Expand Down Expand Up @@ -792,6 +794,7 @@ static const struct processor_costs *processor_cost_table[] =
&alderlake_cost,
&intel_cost,
&lujiazui_cost,
&yongfeng_cost,
&geode_cost,
&k6_cost,
&athlon_cost,
Expand Down
9 changes: 9 additions & 0 deletions gcc/config/i386/i386.h
Original file line number Diff line number Diff line change
Expand Up @@ -2297,6 +2297,7 @@ enum processor_type
PROCESSOR_PANTHERLAKE,
PROCESSOR_INTEL,
PROCESSOR_LUJIAZUI,
PROCESSOR_YONGFENG,
PROCESSOR_GEODE,
PROCESSOR_K6,
PROCESSOR_ATHLON,
Expand Down Expand Up @@ -2435,6 +2436,14 @@ constexpr wide_int_bitmask PTA_ZNVER4 = PTA_ZNVER3 | PTA_AVX512F | PTA_AVX512DQ
| PTA_AVX512BF16 | PTA_AVX512VBMI | PTA_AVX512VBMI2 | PTA_GFNI
| PTA_AVX512VNNI | PTA_AVX512BITALG | PTA_AVX512VPOPCNTDQ;

constexpr wide_int_bitmask PTA_LUJIAZUI = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
| PTA_SSE3 | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES
| PTA_PCLMUL | PTA_BMI | PTA_BMI2 | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT
| PTA_FSGSBASE | PTA_RDRND | PTA_MOVBE | PTA_ADX | PTA_RDSEED | PTA_POPCNT;

constexpr wide_int_bitmask PTA_YONGFENG = PTA_LUJIAZUI | PTA_AVX | PTA_AVX2 | PTA_F16C
| PTA_FMA | PTA_SHA | PTA_LZCNT;

#ifndef GENERATOR_FILE

#include "insn-attr-common.h"
Expand Down
3 changes: 2 additions & 1 deletion gcc/config/i386/i386.md
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,7 @@

;; Processor type.
(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,nehalem,
atom,slm,glm,haswell,generic,lujiazui,amdfam10,bdver1,
atom,slm,glm,haswell,generic,lujiazui,yongfeng,amdfam10,bdver1,
bdver2,bdver3,bdver4,btver2,znver1,znver2,znver3,znver4"
(const (symbol_ref "ix86_schedule")))

Expand Down Expand Up @@ -1382,6 +1382,7 @@
(include "core2.md")
(include "haswell.md")
(include "lujiazui.md")
(include "yongfeng.md")


;; Operand and operator predicates and constraints
Expand Down
2 changes: 1 addition & 1 deletion gcc/config/i386/lujiazui.md
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@
(define_insn_reservation "lua_lea" 1
(and (eq_attr "cpu" "lujiazui")
(eq_attr "type" "lea"))
"hsw_decodern,lua_p45")
"lua_decodern,lua_p45")

(define_insn_reservation "lua_shift_rotate" 1
(and (eq_attr "cpu" "lujiazui")
Expand Down
116 changes: 116 additions & 0 deletions gcc/config/i386/x86-tune-costs.h
Original file line number Diff line number Diff line change
Expand Up @@ -3393,6 +3393,122 @@ struct processor_costs lujiazui_cost = {
2, /* Small unroll factor. */
};

/* yongfeng_cost should produce code tuned for ZHAOXIN yongfeng CPU. */
static stringop_algs yongfeng_memcpy[2] = {
{libcall, {{6, unrolled_loop, true}, {256, unrolled_loop, false},
{-1, libcall, false}}},
{libcall, {{8, loop, false}, {512, unrolled_loop, false},
{-1, libcall, false}}}};
static stringop_algs yongfeng_memset[2] = {
{libcall, {{6, loop_1_byte, false}, {128, loop, false},
{-1, libcall, false}}},
{libcall, {{2, rep_prefix_4_byte, false}, {64, loop, false},
{1024, vector_loop, false},
{-1, libcall, false}}}};
static const
struct processor_costs yongfeng_cost = {
{
/* Start of register allocator costs. integer->integer move cost is 2. */
8, /* cost for loading QImode using movzbl. */
{8, 8, 8}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{8, 8, 8}, /* cost of storing integer registers. */
2, /* cost of reg,reg fld/fst. */
{8, 8, 8}, /* cost of loading fp registers
in SFmode, DFmode and XFmode. */
{8, 8, 8}, /* cost of storing fp registers
in SFmode, DFmode and XFmode. */
2, /* cost of moving MMX register. */
{8, 8}, /* cost of loading MMX registers
in SImode and DImode. */
{8, 8}, /* cost of storing MMX registers
in SImode and DImode. */
2, 3, 4, /* cost of moving XMM,YMM,ZMM register. */
{8, 8, 8, 10, 15}, /* cost of loading SSE registers
in 32,64,128,256 and 512-bit. */
{8, 8, 8, 10, 15}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit. */
8, 8, /* SSE->integer and integer->SSE moves. */
8, 8, /* mask->integer and integer->mask moves. */
{8, 8, 8}, /* cost of loading mask register
in QImode, HImode, SImode. */
{8, 8, 8}, /* cost if storing mask register
in QImode, HImode, SImode. */
2, /* cost of moving mask register. */
/* End of register allocator costs. */
},

COSTS_N_INSNS (1), /* cost of an add instruction. */
COSTS_N_INSNS (1), /* cost of a lea instruction. */
COSTS_N_INSNS (1), /* variable shift costs. */
COSTS_N_INSNS (1), /* constant shift costs. */
{COSTS_N_INSNS (2), /* cost of starting multiply for QI. */
COSTS_N_INSNS (3), /* HI. */
COSTS_N_INSNS (2), /* SI. */
COSTS_N_INSNS (2), /* DI. */
COSTS_N_INSNS (3)}, /* other. */
0, /* cost of multiply per each bit set. */
{COSTS_N_INSNS (8), /* cost of a divide/mod for QI. */
COSTS_N_INSNS (9), /* HI. */
COSTS_N_INSNS (8), /* SI. */
COSTS_N_INSNS (41), /* DI. */
COSTS_N_INSNS (41)}, /* other. */
COSTS_N_INSNS (1), /* cost of movsx. */
COSTS_N_INSNS (1), /* cost of movzx. */
8, /* "large" insn. */
17, /* MOVE_RATIO. */
6, /* CLEAR_RATIO. */
{8, 8, 8}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{8, 8, 8}, /* cost of storing integer registers. */
{8, 8, 8, 12, 15}, /* cost of loading SSE register
in 32bit, 64bit, 128bit, 256bit and 512bit. */
{8, 8, 8, 12, 15}, /* cost of storing SSE register
in 32bit, 64bit, 128bit, 256bit and 512bit. */
{8, 8, 8, 12, 15}, /* cost of unaligned loads. */
{8, 8, 8, 12, 15}, /* cost of unaligned storess. */
2, 3, 4, /* cost of moving XMM,YMM,ZMM register. */
8, /* cost of moving SSE register to integer. */
18, 6, /* Gather load static, per_elt. */
18, 6, /* Gather store static, per_elt. */
32, /* size of l1 cache. */
256, /* size of l2 cache. */
64, /* size of prefetch block. */
12, /* number of parallel prefetches. */
3, /* Branch cost. */
COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
COSTS_N_INSNS (3), /* cost of FMUL instruction. */
COSTS_N_INSNS (14), /* cost of FDIV instruction. */
COSTS_N_INSNS (1), /* cost of FABS instruction. */
COSTS_N_INSNS (1), /* cost of FCHS instruction. */
COSTS_N_INSNS (40), /* cost of FSQRT instruction. */

COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
COSTS_N_INSNS (3), /* cost of MULSS instruction. */
COSTS_N_INSNS (3), /* cost of MULSD instruction. */
COSTS_N_INSNS (5), /* cost of FMA SS instruction. */
COSTS_N_INSNS (5), /* cost of FMA SD instruction. */
COSTS_N_INSNS (10), /* cost of DIVSS instruction. */
COSTS_N_INSNS (14), /* cost of DIVSD instruction. */
COSTS_N_INSNS (20), /* cost of SQRTSS instruction. */
COSTS_N_INSNS (35), /* cost of SQRTSD instruction. */
4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */
yongfeng_memcpy,
yongfeng_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
"16:11:8", /* Loop alignment. */
"16:11:8", /* Jump alignment. */
"0:0:8", /* Label alignment. */
"16", /* Func alignment. */
4, /* Small unroll limit. */
2, /* Small unroll factor. */
};


/* Generic should produce code tuned for Core-i7 (and newer chips)
and btver1 (and newer chips). */

Expand Down
27 changes: 26 additions & 1 deletion gcc/config/i386/x86-tune-sched.cc
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ ix86_issue_rate (void)
case PROCESSOR_CASCADELAKE:
case PROCESSOR_CANNONLAKE:
case PROCESSOR_ALDERLAKE:
case PROCESSOR_YONGFENG:
case PROCESSOR_GENERIC:
return 4;

Expand Down Expand Up @@ -384,7 +385,6 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,

case PROCESSOR_ATHLON:
case PROCESSOR_K8:
case PROCESSOR_LUJIAZUI:
memory = get_attr_memory (insn);

/* Show ability of reorder buffer to hide latency of load by executing
Expand Down Expand Up @@ -445,6 +445,31 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
}
break;

case PROCESSOR_YONGFENG:
/* Stack engine allows to execute push&pop instructions in parallel. */
if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
&& (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
return 0;
/* FALLTHRU */

case PROCESSOR_LUJIAZUI:
memory = get_attr_memory (insn);

/* Show ability of reorder buffer to hide latency of load by executing
in parallel with previous instruction in case
previous instruction is not needed to compute the address. */
if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
&& !ix86_agi_dependent (dep_insn, insn))
{
int loadcost = 4;

if (cost >= loadcost)
cost -= loadcost;
else
cost = 0;
}
break;

case PROCESSOR_CORE2:
case PROCESSOR_NEHALEM:
case PROCESSOR_SANDYBRIDGE:
Expand Down
Loading

0 comments on commit 4a58f9c

Please sign in to comment.