Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 117 additions & 1 deletion llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1829,6 +1829,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
setOperationAction(ISD::FCANONICALIZE, VT, Custom);
}

setOperationAction(ISD::LRINT, MVT::v16f32,
Subtarget.hasDQI() ? Legal : Custom);
setOperationAction(ISD::LRINT, MVT::v8f64,
Expand Down Expand Up @@ -2101,6 +2102,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// These operations are handled on non-VLX by artificially widening in
// isel patterns.

for (MVT VT : {MVT::f16, MVT::f32, MVT::f64, MVT::v8f16, MVT::v4f32,
MVT::v2f64, MVT::v16f16, MVT::v8f32, MVT::v4f64, MVT::v32f16,
MVT::v16f32, MVT::v8f64})
setOperationAction(ISD::FLDEXP, VT, Custom);

setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom);
Expand Down Expand Up @@ -19149,6 +19155,115 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
return SDValue();
}

static SDValue LowerFLDEXP(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
SDValue X = Op.getOperand(0);
MVT XTy = X.getSimpleValueType();
SDValue Exp = Op.getOperand(1);

switch (XTy.SimpleTy) {
default:
return SDValue();
case MVT::f16:
if (!Subtarget.hasFP16()) {
X = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, X);
}
break;
case MVT::f32:
case MVT::f64:
break;
case MVT::v4f32:
case MVT::v2f64:
if (Subtarget.hasVLX()) {
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, XTy, Exp);
return DAG.getNode(X86ISD::SCALEFS, DL, XTy, X, Exp, X);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why SCALEFS?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mistake on my part, should be SCALEF.

}
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, X.getValueType(), Exp);
break;
case MVT::v8f16:
if (Subtarget.hasFP16()) {
if (Subtarget.hasVLX()) {
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, XTy, Exp);
return DAG.getNode(X86ISD::SCALEF, DL, XTy, X, Exp, X);
}
break;
}
X = DAG.getNode(ISD::FP_EXTEND, DL, MVT::v8f32, X);
Exp = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::v8i32, Exp);
break;
case MVT::v8f32:
case MVT::v4f64:
if (Subtarget.hasVLX()) {
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, XTy, Exp);
return DAG.getNode(X86ISD::SCALEF, DL, XTy, X, Exp, X);
}
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, X.getValueType(), Exp);
break;
case MVT::v16f16:
if (Subtarget.hasFP16()) {
if (Subtarget.hasVLX()) {
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, XTy, Exp);
return DAG.getNode(X86ISD::SCALEF, DL, XTy, X, Exp, X);
}
break;
}
X = DAG.getNode(ISD::FP_EXTEND, DL, MVT::v16f32, X);
Exp = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::v16i32, Exp);
break;
case MVT::v16f32:
case MVT::v8f64:
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, XTy, Exp);
return DAG.getNode(X86ISD::SCALEF, DL, XTy, X, Exp, X);
case MVT::v32f16:
if (Subtarget.hasFP16()) {
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, XTy, Exp);
return DAG.getNode(X86ISD::SCALEF, DL, XTy, X, Exp, X);
}
SDValue Low = DAG.getExtractSubvector(DL, MVT::v16f16, X, 0);
SDValue High = DAG.getExtractSubvector(DL, MVT::v16f16, X, 16);
SDValue ExpLow = DAG.getExtractSubvector(DL, MVT::v16i16, Exp, 0);
SDValue ExpHigh = DAG.getExtractSubvector(DL, MVT::v16i16, Exp, 16);

SDValue OpLow = DAG.getNode(ISD::FLDEXP, DL, MVT::v16f16, Low, ExpLow);
SDValue OpHigh = DAG.getNode(ISD::FLDEXP, DL, MVT::v16f16, High, ExpHigh);
SDValue ScaledLow = LowerFLDEXP(OpLow, Subtarget, DAG);
SDValue ScaledHigh = LowerFLDEXP(OpHigh, Subtarget, DAG);
return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v32f16, ScaledLow,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You might be able to use splitVectorOp here?

ScaledHigh);
}

if (XTy.isVector()) {
SDValue WideX = widenSubVector(X, true, Subtarget, DAG, DL, 512);
SDValue WideExp = widenSubVector(Exp, true, Subtarget, DAG, DL, 512);
if (XTy.getScalarType() == MVT::f16 && !Subtarget.hasFP16()) {
WideExp =
DAG.getNode(ISD::SINT_TO_FP, DL, WideX.getSimpleValueType(), WideExp);
SDValue Scalef = DAG.getNode(X86ISD::SCALEF, DL, WideX.getValueType(),
WideX, WideExp, WideX);
MVT ExtractVT = XTy == MVT::v8f16 ? MVT::v8f32 : MVT::v16f32;
SDValue LowHalf = DAG.getExtractSubvector(DL, ExtractVT, Scalef, 0);
return DAG.getNode(ISD::FP_ROUND, DL, XTy, LowHalf,
DAG.getTargetConstant(0, DL, MVT::i32));
}
SDValue Scalef = DAG.getNode(X86ISD::SCALEF, DL, WideX.getValueType(),
WideX, WideExp, WideX);
return DAG.getExtractSubvector(DL, XTy, Scalef, 0);
} else {
MVT VT = MVT::getVectorVT(X.getSimpleValueType(),
128 / X.getSimpleValueType().getSizeInBits());
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, X.getValueType(), Exp);
SDValue VX = DAG.getInsertVectorElt(DL, DAG.getUNDEF(VT), X, 0);
SDValue VExp = DAG.getInsertVectorElt(DL, DAG.getUNDEF(VT), Exp, 0);
SDValue Scalefs = DAG.getNode(X86ISD::SCALEFS, DL, VT, VX, VExp, VX);
SDValue Final = DAG.getExtractVectorElt(DL, X.getValueType(), Scalefs, 0);
if (X.getValueType() != XTy)
Final = DAG.getNode(ISD::FP_ROUND, DL, XTy, Final,
DAG.getTargetConstant(0, DL, MVT::i32));
return Final;
}
}

static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc dl(Op);
Expand Down Expand Up @@ -33681,7 +33796,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ADDRSPACECAST: return LowerADDRSPACECAST(Op, DAG);
case X86ISD::CVTPS2PH: return LowerCVTPS2PH(Op, DAG);
case ISD::PREFETCH: return LowerPREFETCH(Op, Subtarget, DAG);
// clang-format on
case ISD::FLDEXP: return LowerFLDEXP(Op, Subtarget, DAG);
// clang-format on
}
}

Expand Down
159 changes: 54 additions & 105 deletions llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -79,38 +79,54 @@ define <4 x float> @fmul_pow2_ldexp_4xfloat(<4 x i32> %i) {
; CHECK-SSE-NEXT: .cfi_def_cfa_offset 8
; CHECK-SSE-NEXT: retq
;
; CHECK-AVX-LABEL: fmul_pow2_ldexp_4xfloat:
; CHECK-AVX: # %bb.0:
; CHECK-AVX-NEXT: subq $40, %rsp
; CHECK-AVX-NEXT: .cfi_def_cfa_offset 48
; CHECK-AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-AVX-NEXT: vextractps $1, %xmm0, %edi
; CHECK-AVX-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK-AVX-NEXT: callq ldexpf@PLT
; CHECK-AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-AVX-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-AVX-NEXT: vmovd %xmm0, %edi
; CHECK-AVX-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK-AVX-NEXT: callq ldexpf@PLT
; CHECK-AVX-NEXT: vinsertps $16, (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
; CHECK-AVX-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[2,3]
; CHECK-AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-AVX-NEXT: vextractps $2, %xmm0, %edi
; CHECK-AVX-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK-AVX-NEXT: callq ldexpf@PLT
; CHECK-AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
; CHECK-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
; CHECK-AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-AVX-NEXT: vextractps $3, %xmm0, %edi
; CHECK-AVX-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK-AVX-NEXT: callq ldexpf@PLT
; CHECK-AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
; CHECK-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; CHECK-AVX-NEXT: addq $40, %rsp
; CHECK-AVX-NEXT: .cfi_def_cfa_offset 8
; CHECK-AVX-NEXT: retq
; CHECK-AVX2-LABEL: fmul_pow2_ldexp_4xfloat:
; CHECK-AVX2: # %bb.0:
; CHECK-AVX2-NEXT: subq $40, %rsp
; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 48
; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-AVX2-NEXT: vextractps $1, %xmm0, %edi
; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK-AVX2-NEXT: callq ldexpf@PLT
; CHECK-AVX2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-AVX2-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-AVX2-NEXT: vmovd %xmm0, %edi
; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK-AVX2-NEXT: callq ldexpf@PLT
; CHECK-AVX2-NEXT: vinsertps $16, (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[2,3]
; CHECK-AVX2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-AVX2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-AVX2-NEXT: vextractps $2, %xmm0, %edi
; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK-AVX2-NEXT: callq ldexpf@PLT
; CHECK-AVX2-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
; CHECK-AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
; CHECK-AVX2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-AVX2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-AVX2-NEXT: vextractps $3, %xmm0, %edi
; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK-AVX2-NEXT: callq ldexpf@PLT
; CHECK-AVX2-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
; CHECK-AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; CHECK-AVX2-NEXT: addq $40, %rsp
; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8
; CHECK-AVX2-NEXT: retq
;
; CHECK-ONLY-AVX512F-LABEL: fmul_pow2_ldexp_4xfloat:
; CHECK-ONLY-AVX512F: # %bb.0:
; CHECK-ONLY-AVX512F-NEXT: vbroadcastss {{.*#+}} xmm1 = [9.0E+0,9.0E+0,9.0E+0,9.0E+0]
; CHECK-ONLY-AVX512F-NEXT: vcvtdq2ps %xmm0, %xmm0
; CHECK-ONLY-AVX512F-NEXT: vscalefps %zmm0, %zmm1, %zmm0
; CHECK-ONLY-AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; CHECK-ONLY-AVX512F-NEXT: vzeroupper
; CHECK-ONLY-AVX512F-NEXT: retq
;
; CHECK-SKX-LABEL: fmul_pow2_ldexp_4xfloat:
; CHECK-SKX: # %bb.0:
; CHECK-SKX-NEXT: vcvtdq2ps %xmm0, %xmm0
; CHECK-SKX-NEXT: vbroadcastss {{.*#+}} xmm1 = [9.0E+0,9.0E+0,9.0E+0,9.0E+0]
; CHECK-SKX-NEXT: vscalefss %xmm0, %xmm1, %xmm0
; CHECK-SKX-NEXT: retq
%r = call <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float> <float 9.000000e+00, float 9.000000e+00, float 9.000000e+00, float 9.000000e+00>, <4 x i32> %i)
ret <4 x float> %r
}
Expand Down Expand Up @@ -562,79 +578,12 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) {
;
; CHECK-AVX512F-LABEL: fmul_pow2_ldexp_8xhalf:
; CHECK-AVX512F: # %bb.0:
; CHECK-AVX512F-NEXT: subq $72, %rsp
; CHECK-AVX512F-NEXT: .cfi_def_cfa_offset 80
; CHECK-AVX512F-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
; CHECK-AVX512F-NEXT: vpextrw $7, %xmm0, %eax
; CHECK-AVX512F-NEXT: movswl %ax, %edi
; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
; CHECK-AVX512F-NEXT: callq ldexpf@PLT
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; CHECK-AVX512F-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
; CHECK-AVX512F-NEXT: vpextrw $6, %xmm0, %eax
; CHECK-AVX512F-NEXT: movswl %ax, %edi
; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
; CHECK-AVX512F-NEXT: callq ldexpf@PLT
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; CHECK-AVX512F-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
; CHECK-AVX512F-NEXT: vpextrw $5, %xmm0, %eax
; CHECK-AVX512F-NEXT: movswl %ax, %edi
; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
; CHECK-AVX512F-NEXT: callq ldexpf@PLT
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; CHECK-AVX512F-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
; CHECK-AVX512F-NEXT: vpextrw $4, %xmm0, %eax
; CHECK-AVX512F-NEXT: movswl %ax, %edi
; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
; CHECK-AVX512F-NEXT: callq ldexpf@PLT
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; CHECK-AVX512F-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
; CHECK-AVX512F-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
; CHECK-AVX512F-NEXT: vpextrw $3, %xmm0, %eax
; CHECK-AVX512F-NEXT: movswl %ax, %edi
; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
; CHECK-AVX512F-NEXT: callq ldexpf@PLT
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; CHECK-AVX512F-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
; CHECK-AVX512F-NEXT: vpextrw $2, %xmm0, %eax
; CHECK-AVX512F-NEXT: movswl %ax, %edi
; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
; CHECK-AVX512F-NEXT: callq ldexpf@PLT
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; CHECK-AVX512F-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
; CHECK-AVX512F-NEXT: vpextrw $1, %xmm0, %eax
; CHECK-AVX512F-NEXT: movswl %ax, %edi
; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
; CHECK-AVX512F-NEXT: callq ldexpf@PLT
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; CHECK-AVX512F-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
; CHECK-AVX512F-NEXT: vmovd %xmm0, %eax
; CHECK-AVX512F-NEXT: movswl %ax, %edi
; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
; CHECK-AVX512F-NEXT: callq ldexpf@PLT
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; CHECK-AVX512F-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
; CHECK-AVX512F-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
; CHECK-AVX512F-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0]
; CHECK-AVX512F-NEXT: addq $72, %rsp
; CHECK-AVX512F-NEXT: .cfi_def_cfa_offset 8
; CHECK-AVX512F-NEXT: vbroadcastss {{.*#+}} ymm1 = [8.192E+3,8.192E+3,8.192E+3,8.192E+3,8.192E+3,8.192E+3,8.192E+3,8.192E+3]
; CHECK-AVX512F-NEXT: vpmovsxwd %xmm0, %ymm0
; CHECK-AVX512F-NEXT: vcvtdq2ps %zmm0, %zmm0
; CHECK-AVX512F-NEXT: vscalefps %zmm0, %zmm1, %zmm0
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %ymm0, %xmm0
; CHECK-AVX512F-NEXT: vzeroupper
; CHECK-AVX512F-NEXT: retq
%r = call <8 x half> @llvm.ldexp.v8f16.v8i16(<8 x half> <half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000>, <8 x i16> %i)
ret <8 x half> %r
Expand Down
Loading
Loading