Skip to content
This repository has been archived by the owner on Jan 20, 2024. It is now read-only.

Commit

Permalink
AMDGPU: Make v32bf16 a legal type (#76679)
Browse files Browse the repository at this point in the history
Depends #76678
  • Loading branch information
arsenm authored Jan 9, 2024
1 parent 25e0dc9 commit f9fec40
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 2 deletions.
5 changes: 5 additions & 0 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::v16bf16, &AMDGPU::SGPR_256RegClass);
addRegisterClass(MVT::v32i16, &AMDGPU::SGPR_512RegClass);
addRegisterClass(MVT::v32f16, &AMDGPU::SGPR_512RegClass);
addRegisterClass(MVT::v32bf16, &AMDGPU::SGPR_512RegClass);
}

addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass);
Expand Down Expand Up @@ -719,11 +720,15 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
AddPromotedToType(ISD::LOAD, MVT::v32i16, MVT::v16i32);
setOperationAction(ISD::LOAD, MVT::v32f16, Promote);
AddPromotedToType(ISD::LOAD, MVT::v32f16, MVT::v16i32);
setOperationAction(ISD::LOAD, MVT::v32bf16, Promote);
AddPromotedToType(ISD::LOAD, MVT::v32bf16, MVT::v16i32);

setOperationAction(ISD::STORE, MVT::v32i16, Promote);
AddPromotedToType(ISD::STORE, MVT::v32i16, MVT::v16i32);
setOperationAction(ISD::STORE, MVT::v32f16, Promote);
AddPromotedToType(ISD::STORE, MVT::v32f16, MVT::v16i32);
setOperationAction(ISD::STORE, MVT::v32bf16, Promote);
AddPromotedToType(ISD::STORE, MVT::v32bf16, MVT::v16i32);

setOperationAction({ISD::ANY_EXTEND, ISD::ZERO_EXTEND, ISD::SIGN_EXTEND},
MVT::v2i32, Expand);
Expand Down
32 changes: 32 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1801,6 +1801,38 @@ def : BitConvert <v8f64, v16f32, VReg_512>;
def : BitConvert <v16f32, v8i64, VReg_512>;
def : BitConvert <v16f32, v8f64, VReg_512>;



def : BitConvert <v32bf16, v32i16, VReg_512>;
def : BitConvert <v32i16, v32bf16, VReg_512>;
def : BitConvert <v32bf16, v32i16, SReg_512>;
def : BitConvert <v32i16, v32bf16, SReg_512>;

def : BitConvert <v32bf16, v32f16, VReg_512>;
def : BitConvert <v32f16, v32bf16, VReg_512>;
def : BitConvert <v32bf16, v32f16, SReg_512>;
def : BitConvert <v32f16, v32bf16, SReg_512>;

def : BitConvert <v32bf16, v16i32, VReg_512>;
def : BitConvert <v16i32, v32bf16, VReg_512>;
def : BitConvert <v32bf16, v16i32, SReg_512>;
def : BitConvert <v16i32, v32bf16, SReg_512>;

def : BitConvert <v32bf16, v16f32, VReg_512>;
def : BitConvert <v16f32, v32bf16, VReg_512>;
def : BitConvert <v32bf16, v16f32, SReg_512>;
def : BitConvert <v16f32, v32bf16, SReg_512>;

def : BitConvert <v32bf16, v8f64, VReg_512>;
def : BitConvert <v8f64, v32bf16, VReg_512>;
def : BitConvert <v32bf16, v8f64, SReg_512>;
def : BitConvert <v8f64, v32bf16, SReg_512>;

def : BitConvert <v32bf16, v8i64, VReg_512>;
def : BitConvert <v8i64, v32bf16, VReg_512>;
def : BitConvert <v32bf16, v8i64, SReg_512>;
def : BitConvert <v8i64, v32bf16, SReg_512>;

// 1024-bit bitcast
def : BitConvert <v32i32, v32f32, VReg_1024>;
def : BitConvert <v32f32, v32i32, VReg_1024>;
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -916,7 +916,7 @@ defm "" : SRegClass<11, [v11i32, v11f32], SGPR_352Regs, TTMP_352Regs>;
defm "" : SRegClass<12, [v12i32, v12f32], SGPR_384Regs, TTMP_384Regs>;

let GlobalPriority = true in {
defm "" : SRegClass<16, [v16i32, v16f32, v8i64, v8f64, v32i16, v32f16], SGPR_512Regs, TTMP_512Regs>;
defm "" : SRegClass<16, [v16i32, v16f32, v8i64, v8f64, v32i16, v32f16, v32bf16], SGPR_512Regs, TTMP_512Regs>;
defm "" : SRegClass<32, [v32i32, v32f32, v16i64, v16f64], SGPR_1024Regs>;
}

Expand Down Expand Up @@ -970,7 +970,7 @@ defm VReg_352 : VRegClass<11, [v11i32, v11f32], (add VGPR_352)>;
defm VReg_384 : VRegClass<12, [v12i32, v12f32], (add VGPR_384)>;

let GlobalPriority = true in {
defm VReg_512 : VRegClass<16, [v16i32, v16f32, v8i64, v8f64, v32i16, v32f16], (add VGPR_512)>;
defm VReg_512 : VRegClass<16, [v16i32, v16f32, v8i64, v8f64, v32i16, v32f16, v32bf16], (add VGPR_512)>;
defm VReg_1024 : VRegClass<32, [v32i32, v32f32, v16i64, v16f64], (add VGPR_1024)>;
}

Expand Down

0 comments on commit f9fec40

Please sign in to comment.