; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc -o - %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK-NO_FP16
; RUN: llc -o - %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 -mattr=+avx512fp16 | FileCheck %s --check-prefixes=CHECK-WITH_FP16

; Note: We could check more configurations, but anything with software
; emulation of fp16 generates a ton of assembly code and is not particularly
; interesting.

;----------------------------------------
; i8 input
;----------------------------------------

; uint8_t to float.
; - Go from i8 to i32: zext
; - Convert i32 to float
define float @uint8ToFloat(i8 %int8) {
; CHECK-NO_FP16-LABEL: uint8ToFloat:
; CHECK-NO_FP16:       # %bb.0:
; CHECK-NO_FP16-NEXT:    movzbl %dil, %eax
; CHECK-NO_FP16-NEXT:    vcvtsi2ss %eax, %xmm15, %xmm0
; CHECK-NO_FP16-NEXT:    retq
;
; CHECK-WITH_FP16-LABEL: uint8ToFloat:
; CHECK-WITH_FP16:       # %bb.0:
; CHECK-WITH_FP16-NEXT:    movzbl %dil, %eax
; CHECK-WITH_FP16-NEXT:    vcvtsi2ss %eax, %xmm15, %xmm0
; CHECK-WITH_FP16-NEXT:    retq
    %fp32 = uitofp i8 %int8 to float
    ret float %fp32
}

; vector uint8_t to float.
; Same as @uint8ToFloat but with vector types.
define <16 x float> @vector_uint8ToFloat(<16 x i8> %int8) {
; CHECK-NO_FP16-LABEL: vector_uint8ToFloat:
; CHECK-NO_FP16:       # %bb.0:
; CHECK-NO_FP16-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; CHECK-NO_FP16-NEXT:    vcvtdq2ps %zmm0, %zmm0
; CHECK-NO_FP16-NEXT:    retq
;
; CHECK-WITH_FP16-LABEL: vector_uint8ToFloat:
; CHECK-WITH_FP16:       # %bb.0:
; CHECK-WITH_FP16-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; CHECK-WITH_FP16-NEXT:    vcvtdq2ps %zmm0, %zmm0
; CHECK-WITH_FP16-NEXT:    retq
    %fp32 = uitofp <16 x i8> %int8 to <16 x float>
    ret <16 x float> %fp32
}


; uint8_t to half.
;
; If no half support:
; - Go from i8 to i32: zext
; - Convert i32 to float
; - Trunc from float to half
;
; Else if half support:
; - Go from i8 to i32: zext
; - Convert i32 to half
define half @uint8ToHalf(i8 %int8) {
; CHECK-NO_FP16-LABEL: uint8ToHalf:
; CHECK-NO_FP16:       # %bb.0:
; CHECK-NO_FP16-NEXT:    movzbl %dil, %eax
; CHECK-NO_FP16-NEXT:    vcvtsi2ss %eax, %xmm15, %xmm0
; CHECK-NO_FP16-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
; CHECK-NO_FP16-NEXT:    retq
;
; CHECK-WITH_FP16-LABEL: uint8ToHalf:
; CHECK-WITH_FP16:       # %bb.0:
; CHECK-WITH_FP16-NEXT:    movzbl %dil, %eax
; CHECK-WITH_FP16-NEXT:    vcvtsi2sh %eax, %xmm31, %xmm0
; CHECK-WITH_FP16-NEXT:    retq
    %fp32 = uitofp i8 %int8 to half
    ret half %fp32
}

; vector uint8_t to half.
;
; If no half support:
; - Go from i8 to i32: zext
; - Convert i32 to float
; - Trunc from float to half
;
; Else if half support:
; - Go from i8 to i16: zext
; - Convert i16 to half
;
; The difference with the scalar version (uint8ToHalf) is that we use i16
; for the intermediate type when we have half support.
define <16 x half> @vector_uint8ToHalf(<16 x i8> %int8) {
; CHECK-NO_FP16-LABEL: vector_uint8ToHalf:
; CHECK-NO_FP16:       # %bb.0:
; CHECK-NO_FP16-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; CHECK-NO_FP16-NEXT:    vcvtdq2ps %zmm0, %zmm0
; CHECK-NO_FP16-NEXT:    vcvtps2ph $4, %zmm0, %ymm0
; CHECK-NO_FP16-NEXT:    retq
;
; CHECK-WITH_FP16-LABEL: vector_uint8ToHalf:
; CHECK-WITH_FP16:       # %bb.0:
; CHECK-WITH_FP16-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; CHECK-WITH_FP16-NEXT:    vcvtw2ph %ymm0, %ymm0
; CHECK-WITH_FP16-NEXT:    retq
    %fp32 = uitofp <16 x i8> %int8 to <16 x half>
    ret <16 x half> %fp32
}

; Same as uint8_t but with the signed variant.
; I.e., use sext instead of zext.
define float @sint8ToFloat(i8 %int8) {
; CHECK-NO_FP16-LABEL: sint8ToFloat:
; CHECK-NO_FP16:       # %bb.0:
; CHECK-NO_FP16-NEXT:    movsbl %dil, %eax
; CHECK-NO_FP16-NEXT:    vcvtsi2ss %eax, %xmm15, %xmm0
; CHECK-NO_FP16-NEXT:    retq
;
; CHECK-WITH_FP16-LABEL: sint8ToFloat:
; CHECK-WITH_FP16:       # %bb.0:
; CHECK-WITH_FP16-NEXT:    movsbl %dil, %eax
; CHECK-WITH_FP16-NEXT:    vcvtsi2ss %eax, %xmm15, %xmm0
; CHECK-WITH_FP16-NEXT:    retq
    %fp32 = sitofp i8 %int8 to float
    ret float %fp32
}

define <16 x float> @vector_sint8ToFloat(<16 x i8> %int8) {
; CHECK-NO_FP16-LABEL: vector_sint8ToFloat:
; CHECK-NO_FP16:       # %bb.0:
; CHECK-NO_FP16-NEXT:    vpmovsxbd %xmm0, %zmm0
; CHECK-NO_FP16-NEXT:    vcvtdq2ps %zmm0, %zmm0
; CHECK-NO_FP16-NEXT:    retq
;
; CHECK-WITH_FP16-LABEL: vector_sint8ToFloat:
; CHECK-WITH_FP16:       # %bb.0:
; CHECK-WITH_FP16-NEXT:    vpmovsxbd %xmm0, %zmm0
; CHECK-WITH_FP16-NEXT:    vcvtdq2ps %zmm0, %zmm0
; CHECK-WITH_FP16-NEXT:    retq
    %fp32 = sitofp <16 x i8> %int8 to <16 x float>
    ret <16 x float> %fp32
}

define half @sint8ToHalf(i8 %int8) {
; CHECK-NO_FP16-LABEL: sint8ToHalf:
; CHECK-NO_FP16:       # %bb.0:
; CHECK-NO_FP16-NEXT:    movsbl %dil, %eax
; CHECK-NO_FP16-NEXT:    vcvtsi2ss %eax, %xmm15, %xmm0
; CHECK-NO_FP16-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
; CHECK-NO_FP16-NEXT:    retq
;
; CHECK-WITH_FP16-LABEL: sint8ToHalf:
; CHECK-WITH_FP16:       # %bb.0:
; CHECK-WITH_FP16-NEXT:    movsbl %dil, %eax
; CHECK-WITH_FP16-NEXT:    vcvtsi2sh %eax, %xmm31, %xmm0
; CHECK-WITH_FP16-NEXT:    retq
    %fp32 = sitofp i8 %int8 to half
    ret half %fp32
}

define <16 x half> @vector_sint8ToHalf(<16 x i8> %int8) {
; CHECK-NO_FP16-LABEL: vector_sint8ToHalf:
; CHECK-NO_FP16:       # %bb.0:
; CHECK-NO_FP16-NEXT:    vpmovsxbd %xmm0, %zmm0
; CHECK-NO_FP16-NEXT:    vcvtdq2ps %zmm0, %zmm0
; CHECK-NO_FP16-NEXT:    vcvtps2ph $4, %zmm0, %ymm0
; CHECK-NO_FP16-NEXT:    retq
;
; CHECK-WITH_FP16-LABEL: vector_sint8ToHalf:
; CHECK-WITH_FP16:       # %bb.0:
; CHECK-WITH_FP16-NEXT:    vpmovsxbw %xmm0, %ymm0
; CHECK-WITH_FP16-NEXT:    vcvtw2ph %ymm0, %ymm0
; CHECK-WITH_FP16-NEXT:    retq
    %fp32 = sitofp <16 x i8> %int8 to <16 x half>
    ret <16 x half> %fp32
}


;----------------------------------------
; i16 input
;----------------------------------------

; Similar lowering as i8, but with i16 as the input type.

define float @uint16ToFloat(i16 %int16) {
; CHECK-NO_FP16-LABEL: uint16ToFloat:
; CHECK-NO_FP16:       # %bb.0:
; CHECK-NO_FP16-NEXT:    movzwl %di, %eax
; CHECK-NO_FP16-NEXT:    vcvtsi2ss %eax, %xmm15, %xmm0
; CHECK-NO_FP16-NEXT:    retq
;
; CHECK-WITH_FP16-LABEL: uint16ToFloat:
; CHECK-WITH_FP16:       # %bb.0:
; CHECK-WITH_FP16-NEXT:    movzwl %di, %eax
; CHECK-WITH_FP16-NEXT:    vcvtsi2ss %eax, %xmm15, %xmm0
; CHECK-WITH_FP16-NEXT:    retq
    %fp32 = uitofp i16 %int16 to float
    ret float %fp32
}

define <16 x float> @vector_uint16ToFloat(<16 x i16> %int16) {
; CHECK-NO_FP16-LABEL: vector_uint16ToFloat:
; CHECK-NO_FP16:       # %bb.0:
; CHECK-NO_FP16-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; CHECK-NO_FP16-NEXT:    vcvtdq2ps %zmm0, %zmm0
; CHECK-NO_FP16-NEXT:    retq
;
; CHECK-WITH_FP16-LABEL: vector_uint16ToFloat:
; CHECK-WITH_FP16:       # %bb.0:
; CHECK-WITH_FP16-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; CHECK-WITH_FP16-NEXT:    vcvtdq2ps %zmm0, %zmm0
; CHECK-WITH_FP16-NEXT:    retq
    %fp32 = uitofp <16 x i16> %int16 to <16 x float>
    ret <16 x float> %fp32
}

define half @uint16ToHalf(i16 %int16) {
; CHECK-NO_FP16-LABEL: uint16ToHalf:
; CHECK-NO_FP16:       # %bb.0:
; CHECK-NO_FP16-NEXT:    movzwl %di, %eax
; CHECK-NO_FP16-NEXT:    vcvtsi2ss %eax, %xmm15, %xmm0
; CHECK-NO_FP16-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
; CHECK-NO_FP16-NEXT:    retq
;
; CHECK-WITH_FP16-LABEL: uint16ToHalf:
; CHECK-WITH_FP16:       # %bb.0:
; CHECK-WITH_FP16-NEXT:    movzwl %di, %eax
; CHECK-WITH_FP16-NEXT:    vcvtsi2sh %eax, %xmm31, %xmm0
; CHECK-WITH_FP16-NEXT:    retq
    %fp32 = uitofp i16 %int16 to half
    ret half %fp32
}

define <16 x half> @vector_uint16ToHalf(<16 x i16> %int16) {
; CHECK-NO_FP16-LABEL: vector_uint16ToHalf:
; CHECK-NO_FP16:       # %bb.0:
; CHECK-NO_FP16-NEXT:    vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; CHECK-NO_FP16-NEXT:    vcvtdq2ps %zmm0, %zmm0
; CHECK-NO_FP16-NEXT:    vcvtps2ph $4, %zmm0, %ymm0
; CHECK-NO_FP16-NEXT:    retq
;
; CHECK-WITH_FP16-LABEL: vector_uint16ToHalf:
; CHECK-WITH_FP16:       # %bb.0:
; CHECK-WITH_FP16-NEXT:    vcvtuw2ph %ymm0, %ymm0
; CHECK-WITH_FP16-NEXT:    retq
    %fp32 = uitofp <16 x i16> %int16 to <16 x half>
    ret <16 x half> %fp32
}

define float @sint16ToFloat(i16 %int16) {
; CHECK-NO_FP16-LABEL: sint16ToFloat:
; CHECK-NO_FP16:       # %bb.0:
; CHECK-NO_FP16-NEXT:    movswl %di, %eax
; CHECK-NO_FP16-NEXT:    vcvtsi2ss %eax, %xmm15, %xmm0
; CHECK-NO_FP16-NEXT:    retq
;
; CHECK-WITH_FP16-LABEL: sint16ToFloat:
; CHECK-WITH_FP16:       # %bb.0:
; CHECK-WITH_FP16-NEXT:    movswl %di, %eax
; CHECK-WITH_FP16-NEXT:    vcvtsi2ss %eax, %xmm15, %xmm0
; CHECK-WITH_FP16-NEXT:    retq
    %fp32 = sitofp i16 %int16 to float
    ret float %fp32
}

define <16 x float> @vector_sint16ToFloat(<16 x i16> %int16) {
; CHECK-NO_FP16-LABEL: vector_sint16ToFloat:
; CHECK-NO_FP16:       # %bb.0:
; CHECK-NO_FP16-NEXT:    vpmovsxwd %ymm0, %zmm0
; CHECK-NO_FP16-NEXT:    vcvtdq2ps %zmm0, %zmm0
; CHECK-NO_FP16-NEXT:    retq
;
; CHECK-WITH_FP16-LABEL: vector_sint16ToFloat:
; CHECK-WITH_FP16:       # %bb.0:
; CHECK-WITH_FP16-NEXT:    vpmovsxwd %ymm0, %zmm0
; CHECK-WITH_FP16-NEXT:    vcvtdq2ps %zmm0, %zmm0
; CHECK-WITH_FP16-NEXT:    retq
    %fp32 = sitofp <16 x i16> %int16 to <16 x float>
    ret <16 x float> %fp32
}

define half @sint16ToHalf(i16 %int16) {
; CHECK-NO_FP16-LABEL: sint16ToHalf:
; CHECK-NO_FP16:       # %bb.0:
; CHECK-NO_FP16-NEXT:    movswl %di, %eax
; CHECK-NO_FP16-NEXT:    vcvtsi2ss %eax, %xmm15, %xmm0
; CHECK-NO_FP16-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
; CHECK-NO_FP16-NEXT:    retq
;
; CHECK-WITH_FP16-LABEL: sint16ToHalf:
; CHECK-WITH_FP16:       # %bb.0:
; CHECK-WITH_FP16-NEXT:    movswl %di, %eax
; CHECK-WITH_FP16-NEXT:    vcvtsi2sh %eax, %xmm31, %xmm0
; CHECK-WITH_FP16-NEXT:    retq
    %fp32 = sitofp i16 %int16 to half
    ret half %fp32
}

define <16 x half> @vector_sint16ToHalf(<16 x i16> %int16) {
; CHECK-NO_FP16-LABEL: vector_sint16ToHalf:
; CHECK-NO_FP16:       # %bb.0:
; CHECK-NO_FP16-NEXT:    vpmovsxwd %ymm0, %zmm0
; CHECK-NO_FP16-NEXT:    vcvtdq2ps %zmm0, %zmm0
; CHECK-NO_FP16-NEXT:    vcvtps2ph $4, %zmm0, %ymm0
; CHECK-NO_FP16-NEXT:    retq
;
; CHECK-WITH_FP16-LABEL: vector_sint16ToHalf:
; CHECK-WITH_FP16:       # %bb.0:
; CHECK-WITH_FP16-NEXT:    vcvtw2ph %ymm0, %ymm0
; CHECK-WITH_FP16-NEXT:    retq
    %fp32 = sitofp <16 x i16> %int16 to <16 x half>
    ret <16 x half> %fp32
}
