[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PULL 04/47] target/arm: Implement VFP fp16 VMLA, VMLS, VNMLS, VNMLA, VN
From: |
Peter Maydell |
Subject: |
[PULL 04/47] target/arm: Implement VFP fp16 VMLA, VMLS, VNMLS, VNMLA, VNMUL |
Date: |
Tue, 1 Sep 2020 16:17:40 +0100 |
Implement fp16 versions of the VFP VMLA, VMLS, VNMLS, VNMLA, VNMUL
instructions. (These are all the remaining ones which we implement
via do_vfp_3op_[hsd]p().)
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20200828183354.27913-5-peter.maydell@linaro.org
---
target/arm/helper.h | 1 +
target/arm/vfp.decode | 5 ++
target/arm/vfp_helper.c | 5 ++
target/arm/translate-vfp.c.inc | 84 ++++++++++++++++++++++++++++++++++
4 files changed, 95 insertions(+)
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 61e4e938861..58f9c4e933e 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -125,6 +125,7 @@ DEF_HELPER_3(vfp_maxnumd, f64, f64, f64, ptr)
DEF_HELPER_3(vfp_minnumh, f16, f16, f16, ptr)
DEF_HELPER_3(vfp_minnums, f32, f32, f32, ptr)
DEF_HELPER_3(vfp_minnumd, f64, f64, f64, ptr)
+DEF_HELPER_1(vfp_negh, f16, f16)
DEF_HELPER_1(vfp_negs, f32, f32)
DEF_HELPER_1(vfp_negd, f64, f64)
DEF_HELPER_1(vfp_abss, f32, f32)
diff --git a/target/arm/vfp.decode b/target/arm/vfp.decode
index 1ecd5e28ca0..e5545076a51 100644
--- a/target/arm/vfp.decode
+++ b/target/arm/vfp.decode
@@ -103,15 +103,19 @@ VLDM_VSTM_dp ---- 1101 0.1 l:1 rn:4 .... 1011 imm:8 \
vd=%vd_dp p=1 u=0 w=1
# 3-register VFP data-processing; bits [23,21:20,6] identify the operation.
+VMLA_hp ---- 1110 0.00 .... .... 1001 .0.0 .... @vfp_dnm_s
VMLA_sp ---- 1110 0.00 .... .... 1010 .0.0 .... @vfp_dnm_s
VMLA_dp ---- 1110 0.00 .... .... 1011 .0.0 .... @vfp_dnm_d
+VMLS_hp ---- 1110 0.00 .... .... 1001 .1.0 .... @vfp_dnm_s
VMLS_sp ---- 1110 0.00 .... .... 1010 .1.0 .... @vfp_dnm_s
VMLS_dp ---- 1110 0.00 .... .... 1011 .1.0 .... @vfp_dnm_d
+VNMLS_hp ---- 1110 0.01 .... .... 1001 .0.0 .... @vfp_dnm_s
VNMLS_sp ---- 1110 0.01 .... .... 1010 .0.0 .... @vfp_dnm_s
VNMLS_dp ---- 1110 0.01 .... .... 1011 .0.0 .... @vfp_dnm_d
+VNMLA_hp ---- 1110 0.01 .... .... 1001 .1.0 .... @vfp_dnm_s
VNMLA_sp ---- 1110 0.01 .... .... 1010 .1.0 .... @vfp_dnm_s
VNMLA_dp ---- 1110 0.01 .... .... 1011 .1.0 .... @vfp_dnm_d
@@ -119,6 +123,7 @@ VMUL_hp ---- 1110 0.10 .... .... 1001 .0.0 ....
@vfp_dnm_s
VMUL_sp ---- 1110 0.10 .... .... 1010 .0.0 .... @vfp_dnm_s
VMUL_dp ---- 1110 0.10 .... .... 1011 .0.0 .... @vfp_dnm_d
+VNMUL_hp ---- 1110 0.10 .... .... 1001 .1.0 .... @vfp_dnm_s
VNMUL_sp ---- 1110 0.10 .... .... 1010 .1.0 .... @vfp_dnm_s
VNMUL_dp ---- 1110 0.10 .... .... 1011 .1.0 .... @vfp_dnm_d
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
index b8ca744bccc..f93ddf0b208 100644
--- a/target/arm/vfp_helper.c
+++ b/target/arm/vfp_helper.c
@@ -261,6 +261,11 @@ VFP_BINOP(minnum)
VFP_BINOP(maxnum)
#undef VFP_BINOP
+dh_ctype_f16 VFP_HELPER(neg, h)(dh_ctype_f16 a)
+{
+ return float16_chs(a);
+}
+
float32 VFP_HELPER(neg, s)(float32 a)
{
return float32_chs(a);
diff --git a/target/arm/translate-vfp.c.inc b/target/arm/translate-vfp.c.inc
index 01a5fd65115..15bb23688bd 100644
--- a/target/arm/translate-vfp.c.inc
+++ b/target/arm/translate-vfp.c.inc
@@ -1547,6 +1547,21 @@ static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn
*fn, int vd, int vm)
return true;
}
+static void gen_VMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+ /* Note that order of inputs to the add matters for NaNs */
+ TCGv_i32 tmp = tcg_temp_new_i32();
+
+ gen_helper_vfp_mulh(tmp, vn, vm, fpst);
+ gen_helper_vfp_addh(vd, vd, tmp, fpst);
+ tcg_temp_free_i32(tmp);
+}
+
+static bool trans_VMLA_hp(DisasContext *s, arg_VMLA_sp *a)
+{
+ return do_vfp_3op_hp(s, gen_VMLA_hp, a->vd, a->vn, a->vm, true);
+}
+
static void gen_VMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
{
/* Note that order of inputs to the add matters for NaNs */
@@ -1577,6 +1592,25 @@ static bool trans_VMLA_dp(DisasContext *s, arg_VMLA_dp
*a)
return do_vfp_3op_dp(s, gen_VMLA_dp, a->vd, a->vn, a->vm, true);
}
+static void gen_VMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+ /*
+ * VMLS: vd = vd + -(vn * vm)
+ * Note that order of inputs to the add matters for NaNs.
+ */
+ TCGv_i32 tmp = tcg_temp_new_i32();
+
+ gen_helper_vfp_mulh(tmp, vn, vm, fpst);
+ gen_helper_vfp_negh(tmp, tmp);
+ gen_helper_vfp_addh(vd, vd, tmp, fpst);
+ tcg_temp_free_i32(tmp);
+}
+
+static bool trans_VMLS_hp(DisasContext *s, arg_VMLS_sp *a)
+{
+ return do_vfp_3op_hp(s, gen_VMLS_hp, a->vd, a->vn, a->vm, true);
+}
+
static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
{
/*
@@ -1615,6 +1649,27 @@ static bool trans_VMLS_dp(DisasContext *s, arg_VMLS_dp
*a)
return do_vfp_3op_dp(s, gen_VMLS_dp, a->vd, a->vn, a->vm, true);
}
+static void gen_VNMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+ /*
+ * VNMLS: -fd + (fn * fm)
+ * Note that it isn't valid to replace (-A + B) with (B - A) or similar
+ * plausible looking simplifications because this will give wrong results
+ * for NaNs.
+ */
+ TCGv_i32 tmp = tcg_temp_new_i32();
+
+ gen_helper_vfp_mulh(tmp, vn, vm, fpst);
+ gen_helper_vfp_negh(vd, vd);
+ gen_helper_vfp_addh(vd, vd, tmp, fpst);
+ tcg_temp_free_i32(tmp);
+}
+
+static bool trans_VNMLS_hp(DisasContext *s, arg_VNMLS_sp *a)
+{
+ return do_vfp_3op_hp(s, gen_VNMLS_hp, a->vd, a->vn, a->vm, true);
+}
+
static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
{
/*
@@ -1657,6 +1712,23 @@ static bool trans_VNMLS_dp(DisasContext *s, arg_VNMLS_dp
*a)
return do_vfp_3op_dp(s, gen_VNMLS_dp, a->vd, a->vn, a->vm, true);
}
+static void gen_VNMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+ /* VNMLA: -fd + -(fn * fm) */
+ TCGv_i32 tmp = tcg_temp_new_i32();
+
+ gen_helper_vfp_mulh(tmp, vn, vm, fpst);
+ gen_helper_vfp_negh(tmp, tmp);
+ gen_helper_vfp_negh(vd, vd);
+ gen_helper_vfp_addh(vd, vd, tmp, fpst);
+ tcg_temp_free_i32(tmp);
+}
+
+static bool trans_VNMLA_hp(DisasContext *s, arg_VNMLA_sp *a)
+{
+ return do_vfp_3op_hp(s, gen_VNMLA_hp, a->vd, a->vn, a->vm, true);
+}
+
static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
{
/* VNMLA: -fd + -(fn * fm) */
@@ -1706,6 +1778,18 @@ static bool trans_VMUL_dp(DisasContext *s, arg_VMUL_dp
*a)
return do_vfp_3op_dp(s, gen_helper_vfp_muld, a->vd, a->vn, a->vm, false);
}
+static void gen_VNMUL_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
+{
+ /* VNMUL: -(fn * fm) */
+ gen_helper_vfp_mulh(vd, vn, vm, fpst);
+ gen_helper_vfp_negh(vd, vd);
+}
+
+static bool trans_VNMUL_hp(DisasContext *s, arg_VNMUL_sp *a)
+{
+ return do_vfp_3op_hp(s, gen_VNMUL_hp, a->vd, a->vn, a->vm, false);
+}
+
static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
{
/* VNMUL: -(fn * fm) */
--
2.20.1
- [PULL 00/47] target-arm queue, Peter Maydell, 2020/09/01
- [PULL 01/47] target/arm: Remove local definitions of float constants, Peter Maydell, 2020/09/01
- [PULL 02/47] target/arm: Use correct ID register check for aa32_fp16_arith, Peter Maydell, 2020/09/01
- [PULL 03/47] target/arm: Implement VFP fp16 for VFP_BINOP operations, Peter Maydell, 2020/09/01
- [PULL 05/47] target/arm: Macroify trans functions for VFMA, VFMS, VFNMA, VFNMS, Peter Maydell, 2020/09/01
- [PULL 04/47] target/arm: Implement VFP fp16 VMLA, VMLS, VNMLS, VNMLA, VNMUL,
Peter Maydell <=
- [PULL 06/47] target/arm: Implement VFP fp16 for fused-multiply-add, Peter Maydell, 2020/09/01
- [PULL 08/47] target/arm: Implement VFP fp16 for VABS, VNEG, VSQRT, Peter Maydell, 2020/09/01
- [PULL 07/47] target/arm: Macroify uses of do_vfp_2op_sp() and do_vfp_2op_dp(), Peter Maydell, 2020/09/01
- [PULL 09/47] target/arm: Implement VFP fp16 for VMOV immediate, Peter Maydell, 2020/09/01
- [PULL 10/47] target/arm: Implement VFP fp16 VCMP, Peter Maydell, 2020/09/01
- [PULL 12/47] target/arm: Implement VFP fp16 VCVT between float and integer, Peter Maydell, 2020/09/01
- [PULL 11/47] target/arm: Implement VFP fp16 VLDR and VSTR, Peter Maydell, 2020/09/01
- [PULL 13/47] target/arm: Make VFP_CONV_FIX macros take separate float type and float size, Peter Maydell, 2020/09/01
- [PULL 15/47] target/arm: Implement VFP fp16 VCVT between float and fixed-point, Peter Maydell, 2020/09/01
- [PULL 14/47] target/arm: Use macros instead of open-coding fp16 conversion helpers, Peter Maydell, 2020/09/01