[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH 2/4] tcg-hppa: Finish the port.
From: |
Aurelien Jarno |
Subject: |
Re: [Qemu-devel] [PATCH 2/4] tcg-hppa: Finish the port. |
Date: |
Thu, 8 Apr 2010 11:56:13 +0200 |
User-agent: |
Mutt/1.5.20 (2009-06-14) |
On Wed, Apr 07, 2010 at 04:56:43AM -0700, Richard Henderson wrote:
> Delete inline functions from tcg-target.h that don't need to be there,
> move the others to tcg-target.c. Add 'Z', 'I', 'J' constraints for
> 0, signed 11-bit, and signed 5-bit respectively. Add GUEST_BASE support
> similar to ppc64, with the value stored in a register. Add missing
Doing so actually don't work in a lot of cases. See below for more
explanations.
> registers to reg_alloc_order. Add support for 12-bit branch relocations.
> Add functions for synthetic operations: addi, mtctl, dep, shd, vshd, ori,
> andi, shifts, rotates, multiply, branches, setcond. Split out TLB reads
> from qemu_ld and qemu_st; fix argument loading for tlb external calls.
> Generate the prologue.
I have applied the patch. I have some comments though, it would be nice
if you can address them with additional patches.
> Signed-off-by: Richard Henderson <address@hidden>
> ---
> configure | 5 +-
> tcg/hppa/tcg-target.c | 1758
> ++++++++++++++++++++++++++++++++++---------------
> tcg/hppa/tcg-target.h | 142 +----
> 3 files changed, 1258 insertions(+), 647 deletions(-)
>
> diff --git a/configure b/configure
> index 1d5fb17..966cd7d 100755
> --- a/configure
> +++ b/configure
> @@ -722,6 +722,9 @@ case "$cpu" in
> ia64*)
> host_guest_base="yes"
> ;;
> + hppa*)
> + host_guest_base="yes"
> + ;;
> esac
>
> [ -z "$guest_base" ] && guest_base="$host_guest_base"
> @@ -2744,7 +2747,7 @@ if test "$target_linux_user" = "yes" -o
> "$target_bsd_user" = "yes" ; then
> # -static is used to avoid g1/g3 usage by the dynamic linker
> ldflags="$linker_script -static $ldflags"
> ;;
> - i386|x86_64|ppc|ppc64|s390|sparc64|alpha|arm|m68k|mips|mips64|ia64)
> + *)
> ldflags="$linker_script $ldflags"
> ;;
> esac
> diff --git a/tcg/hppa/tcg-target.c b/tcg/hppa/tcg-target.c
> index f9ae898..4e15256 100644
> --- a/tcg/hppa/tcg-target.c
> +++ b/tcg/hppa/tcg-target.c
> @@ -24,41 +24,26 @@
>
> #ifndef NDEBUG
> static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
> - "%r0",
> - "%r1",
> - "%rp",
> - "%r3",
> - "%r4",
> - "%r5",
> - "%r6",
> - "%r7",
> - "%r8",
> - "%r9",
> - "%r10",
> - "%r11",
> - "%r12",
> - "%r13",
> - "%r14",
> - "%r15",
> - "%r16",
> - "%r17",
> - "%r18",
> - "%r19",
> - "%r20",
> - "%r21",
> - "%r22",
> - "%r23",
> - "%r24",
> - "%r25",
> - "%r26",
> - "%dp",
> - "%ret0",
> - "%ret1",
> - "%sp",
> - "%r31",
> + "%r0", "%r1", "%rp", "%r3", "%r4", "%r5", "%r6", "%r7",
> + "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
> + "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
> + "%r24", "%r25", "%r26", "%dp", "%ret0", "%ret1", "%sp", "%r31",
> };
> #endif
>
> +/* This is an 8 byte temp slot in the stack frame. */
> +#define STACK_TEMP_OFS -16
> +
> +#ifndef GUEST_BASE
> +#define GUEST_BASE 0
> +#endif
> +
> +#ifdef CONFIG_USE_GUEST_BASE
> +#define TCG_GUEST_BASE_REG TCG_REG_R16
> +#else
> +#define TCG_GUEST_BASE_REG TCG_REG_R0
> +#endif
> +
> static const int tcg_target_reg_alloc_order[] = {
> TCG_REG_R4,
> TCG_REG_R5,
> @@ -75,6 +60,14 @@ static const int tcg_target_reg_alloc_order[] = {
> TCG_REG_R14,
> TCG_REG_R15,
> TCG_REG_R16,
> +
> + TCG_REG_R26,
> + TCG_REG_R25,
> + TCG_REG_R24,
> + TCG_REG_R23,
> +
> + TCG_REG_RET0,
> + TCG_REG_RET1,
> };
>
> static const int tcg_target_call_iarg_regs[4] = {
> @@ -89,16 +82,98 @@ static const int tcg_target_call_oarg_regs[2] = {
> TCG_REG_RET1,
> };
>
> +/* True iff val fits a signed field of width BITS. */
> +static inline int check_fit_tl(tcg_target_long val, unsigned int bits)
> +{
> + return (val << ((sizeof(tcg_target_long) * 8 - bits))
> + >> (sizeof(tcg_target_long) * 8 - bits)) == val;
> +}
> +
> +/* True iff depi can be used to compute (reg | MASK).
> + Accept a bit pattern like:
> + 0....01....1
> + 1....10....0
> + 0..01..10..0
> + Copied from gcc sources. */
> +static inline int or_mask_p(tcg_target_ulong mask)
> +{
> + mask += mask & -mask;
> + return (mask & (mask - 1)) == 0;
> +}
> +
> +/* True iff depi or extru can be used to compute (reg & mask).
> + Accept a bit pattern like these:
> + 0....01....1
> + 1....10....0
> + 1..10..01..1
> + Copied from gcc sources. */
> +static inline int and_mask_p(tcg_target_ulong mask)
> +{
> + return or_mask_p(~mask);
> +}
> +
> +static int low_sign_ext(int val, int len)
> +{
> + return (((val << 1) & ~(-1u << len)) | ((val >> (len - 1)) & 1));
> +}
> +
> +static int reassemble_12(int as12)
> +{
> + return (((as12 & 0x800) >> 11) |
> + ((as12 & 0x400) >> 8) |
> + ((as12 & 0x3ff) << 3));
> +}
> +
> +static int reassemble_17(int as17)
> +{
> + return (((as17 & 0x10000) >> 16) |
> + ((as17 & 0x0f800) << 5) |
> + ((as17 & 0x00400) >> 8) |
> + ((as17 & 0x003ff) << 3));
> +}
> +
> +static int reassemble_21(int as21)
> +{
> + return (((as21 & 0x100000) >> 20) |
> + ((as21 & 0x0ffe00) >> 8) |
> + ((as21 & 0x000180) << 7) |
> + ((as21 & 0x00007c) << 14) |
> + ((as21 & 0x000003) << 12));
> +}
> +
> +/* ??? Bizzarely, there is no PCREL12F relocation type. I guess all
> + such relocations are simply fully handled by the assembler. */
> +#define R_PARISC_PCREL12F R_PARISC_NONE
> +
> static void patch_reloc(uint8_t *code_ptr, int type,
> tcg_target_long value, tcg_target_long addend)
> {
> + uint32_t *insn_ptr = (uint32_t *)code_ptr;
> + uint32_t insn = *insn_ptr;
> + tcg_target_long pcrel;
> +
> + value += addend;
> + pcrel = (value - ((tcg_target_long)code_ptr + 8)) >> 2;
> +
> switch (type) {
> + case R_PARISC_PCREL12F:
> + assert(check_fit_tl(pcrel, 12));
> + /* ??? We assume all patches are forward. See tcg_out_brcond
> + re setting the NUL bit on the branch and eliding the nop. */
> + assert(pcrel >= 0);
> + insn &= ~0x1ffdu;
> + insn |= reassemble_12(pcrel);
> + break;
> case R_PARISC_PCREL17F:
> - hppa_patch17f((uint32_t *)code_ptr, value, addend);
> + assert(check_fit_tl(pcrel, 17));
> + insn &= ~0x1f1ffdu;
> + insn |= reassemble_17(pcrel);
> break;
> default:
> tcg_abort();
> }
> +
> + *insn_ptr = insn;
> }
>
> /* maximum number of register used for input function arguments */
> @@ -126,6 +201,15 @@ static int target_parse_constraint(TCGArgConstraint *ct,
> const char **pct_str)
> tcg_regset_reset_reg(ct->u.regs, TCG_REG_R24);
> tcg_regset_reset_reg(ct->u.regs, TCG_REG_R23);
> break;
> + case 'Z':
> + ct->ct |= TCG_CT_CONST_0;
> + break;
> + case 'I':
> + ct->ct |= TCG_CT_CONST_S11;
> + break;
> + case 'J':
> + ct->ct |= TCG_CT_CONST_S5;
> + break;
> default:
> return -1;
> }
> @@ -135,15 +219,19 @@ static int target_parse_constraint(TCGArgConstraint
> *ct, const char **pct_str)
> }
>
> /* test if a constant matches the constraint */
> -static inline int tcg_target_const_match(tcg_target_long val,
> - const TCGArgConstraint *arg_ct)
> +static int tcg_target_const_match(tcg_target_long val,
> + const TCGArgConstraint *arg_ct)
> {
> - int ct;
> -
> - ct = arg_ct->ct;
> -
> - /* TODO */
> -
> + int ct = arg_ct->ct;
> + if (ct & TCG_CT_CONST) {
> + return 1;
> + } else if (ct & TCG_CT_CONST_0) {
> + return val == 0;
> + } else if (ct & TCG_CT_CONST_S5) {
> + return check_fit_tl(val, 5);
> + } else if (ct & TCG_CT_CONST_S11) {
> + return check_fit_tl(val, 11);
> + }
> return 0;
> }
>
> @@ -163,191 +251,588 @@ static inline int
> tcg_target_const_match(tcg_target_long val,
> #define INSN_SHDEP_CP(x) ((31 - (x)) << 5)
> #define INSN_SHDEP_P(x) ((x) << 5)
> #define INSN_COND(x) ((x) << 13)
> +#define INSN_IM11(x) low_sign_ext(x, 11)
> +#define INSN_IM14(x) low_sign_ext(x, 14)
> +#define INSN_IM5(x) (low_sign_ext(x, 5) << 16)
> +
> +#define COND_NEVER 0
> +#define COND_EQ 1
> +#define COND_LT 2
> +#define COND_LE 3
> +#define COND_LTU 4
> +#define COND_LEU 5
> +#define COND_SV 6
> +#define COND_OD 7
> +#define COND_FALSE 8
> +
> +#define INSN_ADD (INSN_OP(0x02) | INSN_EXT6(0x18))
> +#define INSN_ADDC (INSN_OP(0x02) | INSN_EXT6(0x1c))
> +#define INSN_ADDI (INSN_OP(0x2d))
> +#define INSN_ADDIL (INSN_OP(0x0a))
> +#define INSN_ADDL (INSN_OP(0x02) | INSN_EXT6(0x28))
> +#define INSN_AND (INSN_OP(0x02) | INSN_EXT6(0x08))
> +#define INSN_ANDCM (INSN_OP(0x02) | INSN_EXT6(0x00))
> +#define INSN_COMCLR (INSN_OP(0x02) | INSN_EXT6(0x22))
> +#define INSN_COMICLR (INSN_OP(0x24))
> +#define INSN_DEP (INSN_OP(0x35) | INSN_EXT3SH(3))
> +#define INSN_DEPI (INSN_OP(0x35) | INSN_EXT3SH(7))
> +#define INSN_EXTRS (INSN_OP(0x34) | INSN_EXT3SH(7))
> +#define INSN_EXTRU (INSN_OP(0x34) | INSN_EXT3SH(6))
> +#define INSN_LDIL (INSN_OP(0x08))
> +#define INSN_LDO (INSN_OP(0x0d))
> +#define INSN_MTCTL (INSN_OP(0x00) | INSN_EXT8B(0xc2))
> +#define INSN_OR (INSN_OP(0x02) | INSN_EXT6(0x09))
> +#define INSN_SHD (INSN_OP(0x34) | INSN_EXT3SH(2))
> +#define INSN_SUB (INSN_OP(0x02) | INSN_EXT6(0x10))
> +#define INSN_SUBB (INSN_OP(0x02) | INSN_EXT6(0x14))
> +#define INSN_SUBI (INSN_OP(0x25))
> +#define INSN_VEXTRS (INSN_OP(0x34) | INSN_EXT3SH(5))
> +#define INSN_VEXTRU (INSN_OP(0x34) | INSN_EXT3SH(4))
> +#define INSN_VSHD (INSN_OP(0x34) | INSN_EXT3SH(0))
> +#define INSN_XOR (INSN_OP(0x02) | INSN_EXT6(0x0a))
> +#define INSN_ZDEP (INSN_OP(0x35) | INSN_EXT3SH(2))
> +#define INSN_ZVDEP (INSN_OP(0x35) | INSN_EXT3SH(0))
> +
> +#define INSN_BL (INSN_OP(0x3a) | INSN_EXT3BR(0))
> +#define INSN_BL_N (INSN_OP(0x3a) | INSN_EXT3BR(0) | 2)
> +#define INSN_BLR (INSN_OP(0x3a) | INSN_EXT3BR(2))
> +#define INSN_BV (INSN_OP(0x3a) | INSN_EXT3BR(6))
> +#define INSN_BV_N (INSN_OP(0x3a) | INSN_EXT3BR(6) | 2)
> +#define INSN_BLE_SR4 (INSN_OP(0x39) | (1 << 13))
> +
> +#define INSN_LDB (INSN_OP(0x10))
> +#define INSN_LDH (INSN_OP(0x11))
> +#define INSN_LDW (INSN_OP(0x12))
> +#define INSN_LDWM (INSN_OP(0x13))
> +#define INSN_FLDDS (INSN_OP(0x0b) | INSN_EXT4(0) | (1 << 12))
> +
> +#define INSN_LDBX (INSN_OP(0x03) | INSN_EXT4(0))
> +#define INSN_LDHX (INSN_OP(0x03) | INSN_EXT4(1))
> +#define INSN_LDWX (INSN_OP(0x03) | INSN_EXT4(2))
> +
> +#define INSN_STB (INSN_OP(0x18))
> +#define INSN_STH (INSN_OP(0x19))
> +#define INSN_STW (INSN_OP(0x1a))
> +#define INSN_STWM (INSN_OP(0x1b))
> +#define INSN_FSTDS (INSN_OP(0x0b) | INSN_EXT4(8) | (1 << 12))
> +
> +#define INSN_COMBT (INSN_OP(0x20))
> +#define INSN_COMBF (INSN_OP(0x22))
> +#define INSN_COMIBT (INSN_OP(0x21))
> +#define INSN_COMIBF (INSN_OP(0x23))
> +
> +/* supplied by libgcc */
> +extern void *__canonicalize_funcptr_for_compare(void *);
> +
> +static void tcg_out_mov(TCGContext *s, int ret, int arg)
> +{
> + /* PA1.1 defines COPY as OR r,0,t; PA2.0 defines COPY as LDO 0(r),t
> + but hppa-dis.c is unaware of this definition */
> + if (ret != arg) {
> + tcg_out32(s, INSN_OR | INSN_T(ret) | INSN_R1(arg)
> + | INSN_R2(TCG_REG_R0));
> + }
> +}
>
> -#define COND_NEVER 0
> -#define COND_EQUAL 1
> -#define COND_LT 2
> -#define COND_LTEQ 3
> -#define COND_LTU 4
> -#define COND_LTUEQ 5
> -#define COND_SV 6
> -#define COND_OD 7
> +static void tcg_out_movi(TCGContext *s, TCGType type,
> + int ret, tcg_target_long arg)
> +{
> + if (check_fit_tl(arg, 14)) {
> + tcg_out32(s, INSN_LDO | INSN_R1(ret)
> + | INSN_R2(TCG_REG_R0) | INSN_IM14(arg));
> + } else {
> + uint32_t hi, lo;
> + hi = arg >> 11;
> + lo = arg & 0x7ff;
> +
> + tcg_out32(s, INSN_LDIL | INSN_R2(ret) | reassemble_21(hi));
> + if (lo) {
> + tcg_out32(s, INSN_LDO | INSN_R1(ret)
> + | INSN_R2(ret) | INSN_IM14(lo));
> + }
> + }
> +}
>
> +static void tcg_out_ldst(TCGContext *s, int ret, int addr,
> + tcg_target_long offset, int op)
> +{
> + if (!check_fit_tl(offset, 14)) {
> + uint32_t hi, lo, op;
>
> -/* Logical ADD */
> -#define ARITH_ADD (INSN_OP(0x02) | INSN_EXT6(0x28))
> -#define ARITH_AND (INSN_OP(0x02) | INSN_EXT6(0x08))
> -#define ARITH_OR (INSN_OP(0x02) | INSN_EXT6(0x09))
> -#define ARITH_XOR (INSN_OP(0x02) | INSN_EXT6(0x0a))
> -#define ARITH_SUB (INSN_OP(0x02) | INSN_EXT6(0x10))
> + hi = offset >> 11;
> + lo = offset & 0x7ff;
>
> -#define SHD (INSN_OP(0x34) | INSN_EXT3SH(2))
> -#define VSHD (INSN_OP(0x34) | INSN_EXT3SH(0))
> -#define DEP (INSN_OP(0x35) | INSN_EXT3SH(3))
> -#define ZDEP (INSN_OP(0x35) | INSN_EXT3SH(2))
> -#define ZVDEP (INSN_OP(0x35) | INSN_EXT3SH(0))
> -#define EXTRU (INSN_OP(0x34) | INSN_EXT3SH(6))
> -#define EXTRS (INSN_OP(0x34) | INSN_EXT3SH(7))
> -#define VEXTRS (INSN_OP(0x34) | INSN_EXT3SH(5))
> + if (addr == TCG_REG_R0) {
> + op = INSN_LDIL | INSN_R2(TCG_REG_R1);
> + } else {
> + op = INSN_ADDIL | INSN_R2(addr);
> + }
> + tcg_out32(s, op | reassemble_21(hi));
>
> -#define SUBI (INSN_OP(0x25))
> -#define MTCTL (INSN_OP(0x00) | INSN_EXT8B(0xc2))
> + addr = TCG_REG_R1;
> + offset = lo;
> + }
>
> -#define BL (INSN_OP(0x3a) | INSN_EXT3BR(0))
> -#define BLE_SR4 (INSN_OP(0x39) | (1 << 13))
> -#define BV (INSN_OP(0x3a) | INSN_EXT3BR(6))
> -#define BV_N (INSN_OP(0x3a) | INSN_EXT3BR(6) | 2)
> -#define LDIL (INSN_OP(0x08))
> -#define LDO (INSN_OP(0x0d))
> + if (ret != addr || offset != 0 || op != INSN_LDO) {
> + tcg_out32(s, op | INSN_R1(ret) | INSN_R2(addr) | INSN_IM14(offset));
> + }
> +}
>
> -#define LDB (INSN_OP(0x10))
> -#define LDH (INSN_OP(0x11))
> -#define LDW (INSN_OP(0x12))
> -#define LDWM (INSN_OP(0x13))
> +/* This function is required by tcg.c. */
> +static inline void tcg_out_ld(TCGContext *s, TCGType type, int ret,
> + int arg1, tcg_target_long arg2)
> +{
> + tcg_out_ldst(s, ret, arg1, arg2, INSN_LDW);
> +}
> +
> +/* This function is required by tcg.c. */
> +static inline void tcg_out_st(TCGContext *s, TCGType type, int ret,
> + int arg1, tcg_target_long arg2)
> +{
> + tcg_out_ldst(s, ret, arg1, arg2, INSN_STW);
> +}
> +
> +static void tcg_out_ldst_index(TCGContext *s, int data,
> + int base, int index, int op)
> +{
> + tcg_out32(s, op | INSN_T(data) | INSN_R1(index) | INSN_R2(base));
> +}
> +
> +static inline void tcg_out_addi2(TCGContext *s, int ret, int arg1,
> + tcg_target_long val)
> +{
> + tcg_out_ldst(s, ret, arg1, val, INSN_LDO);
> +}
>
> -#define STB (INSN_OP(0x18))
> -#define STH (INSN_OP(0x19))
> -#define STW (INSN_OP(0x1a))
> -#define STWM (INSN_OP(0x1b))
> +/* This function is required by tcg.c. */
> +static inline void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
> +{
> + tcg_out_addi2(s, reg, reg, val);
> +}
>
> -#define COMBT (INSN_OP(0x20))
> -#define COMBF (INSN_OP(0x22))
> +static inline void tcg_out_arith(TCGContext *s, int t, int r1, int r2, int
> op)
> +{
> + tcg_out32(s, op | INSN_T(t) | INSN_R1(r1) | INSN_R2(r2));
> +}
>
> -static int lowsignext(uint32_t val, int start, int length)
> +static inline void tcg_out_arithi(TCGContext *s, int t, int r1,
> + tcg_target_long val, int op)
> {
> - return (((val << 1) & ~(~0 << length)) |
> - ((val >> (length - 1)) & 1)) << start;
> + assert(check_fit_tl(val, 11));
> + tcg_out32(s, op | INSN_R1(t) | INSN_R2(r1) | INSN_IM11(val));
> }
>
> -static inline void tcg_out_mov(TCGContext *s, int ret, int arg)
> +static inline void tcg_out_nop(TCGContext *s)
> {
> - /* PA1.1 defines COPY as OR r,0,t */
> - tcg_out32(s, ARITH_OR | INSN_T(ret) | INSN_R1(arg) |
> INSN_R2(TCG_REG_R0));
> + tcg_out_arith(s, TCG_REG_R0, TCG_REG_R0, TCG_REG_R0, INSN_OR);
> +}
>
> - /* PA2.0 defines COPY as LDO 0(r),t
> - * but hppa-dis.c is unaware of this definition */
> - /* tcg_out32(s, LDO | INSN_R1(ret) | INSN_R2(arg) | reassemble_14(0)); */
> +static inline void tcg_out_mtctl_sar(TCGContext *s, int arg)
> +{
> + tcg_out32(s, INSN_MTCTL | INSN_R2(11) | INSN_R1(arg));
> +}
> +
> +/* Extract LEN bits at position OFS from ARG and place in RET.
> + Note that here the bit ordering is reversed from the PA-RISC
> + standard, such that the right-most bit is 0. */
> +static inline void tcg_out_extr(TCGContext *s, int ret, int arg,
> + unsigned ofs, unsigned len, int sign)
> +{
> + assert(ofs < 32 && len <= 32 - ofs);
> + tcg_out32(s, (sign ? INSN_EXTRS : INSN_EXTRU)
> + | INSN_R1(ret) | INSN_R2(arg)
> + | INSN_SHDEP_P(31 - ofs) | INSN_DEP_LEN(len));
> }
>
> -static inline void tcg_out_movi(TCGContext *s, TCGType type,
> - int ret, tcg_target_long arg)
> +/* Likewise with OFS interpreted little-endian. */
> +static inline void tcg_out_dep(TCGContext *s, int ret, int arg,
> + unsigned ofs, unsigned len)
> {
> - if (arg == (arg & 0x1fff)) {
> - tcg_out32(s, LDO | INSN_R1(ret) | INSN_R2(TCG_REG_R0) |
> - reassemble_14(arg));
> + assert(ofs < 32 && len <= 32 - ofs);
> + tcg_out32(s, INSN_DEP | INSN_R2(ret) | INSN_R1(arg)
> + | INSN_SHDEP_CP(31 - ofs) | INSN_DEP_LEN(len));
> +}
> +
> +static inline void tcg_out_shd(TCGContext *s, int ret, int hi, int lo,
> + unsigned count)
> +{
> + assert(count < 32);
> + tcg_out32(s, INSN_SHD | INSN_R1(hi) | INSN_R2(lo) | INSN_T(ret)
> + | INSN_SHDEP_CP(count));
> +}
> +
> +static void tcg_out_vshd(TCGContext *s, int ret, int hi, int lo, int creg)
> +{
> + tcg_out_mtctl_sar(s, creg);
> + tcg_out32(s, INSN_VSHD | INSN_T(ret) | INSN_R1(hi) | INSN_R2(lo));
> +}
> +
> +static void tcg_out_ori(TCGContext *s, int ret, int arg, tcg_target_ulong m)
> +{
> + if (m == 0) {
> + tcg_out_mov(s, ret, arg);
> + } else if (m == -1) {
> + tcg_out_movi(s, TCG_TYPE_I32, ret, -1);
Those cases are already eliminated in tcg/tcg-op.h. This code looks
redundant.
> + } else if (or_mask_p(m)) {
> + int bs0, bs1;
> +
> + for (bs0 = 0; bs0 < 32; bs0++) {
> + if ((m & (1u << bs0)) != 0) {
> + break;
> + }
> + }
> + for (bs1 = bs0; bs1 < 32; bs1++) {
> + if ((m & (1u << bs1)) == 0) {
> + break;
> + }
> + }
> + assert(bs1 == 32 || (1ul << bs1) > m);
> +
> + tcg_out_mov(s, ret, arg);
> + tcg_out32(s, INSN_DEPI | INSN_R2(ret) | INSN_IM5(-1)
> + | INSN_SHDEP_CP(31 - bs0) | INSN_DEP_LEN(bs1 - bs0));
> + } else {
> + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R1, m);
> + tcg_out_arith(s, ret, arg, TCG_REG_R1, INSN_OR);
Do we really want a movi here? It would be better to leave the tcg code
load the constant itself, so that if the same constant is used twice, it
is only loaded once.
> + }
> +}
> +
> +static void tcg_out_andi(TCGContext *s, int ret, int arg, tcg_target_ulong m)
> +{
> + if (m == 0) {
> + tcg_out_mov(s, ret, TCG_REG_R0);
> + } else if (m == -1) {
> + tcg_out_mov(s, ret, arg);
Same.
> + } else if (and_mask_p(m)) {
> + int ls0, ls1, ms0;
> +
> + for (ls0 = 0; ls0 < 32; ls0++) {
> + if ((m & (1u << ls0)) == 0) {
> + break;
> + }
> + }
> + for (ls1 = ls0; ls1 < 32; ls1++) {
> + if ((m & (1u << ls1)) != 0) {
> + break;
> + }
> + }
> + for (ms0 = ls1; ms0 < 32; ms0++) {
> + if ((m & (1u << ms0)) == 0) {
> + break;
> + }
> + }
> + assert (ms0 == 32);
> +
> + if (ls1 == 32) {
> + tcg_out_extr(s, ret, arg, 0, ls0, 0);
> + } else {
> + tcg_out_mov(s, ret, arg);
> + tcg_out32(s, INSN_DEPI | INSN_R2(ret) | INSN_IM5(0)
> + | INSN_SHDEP_CP(31 - ls0) | INSN_DEP_LEN(ls1 - ls0));
> + }
> } else {
> - tcg_out32(s, LDIL | INSN_R2(ret) |
> - reassemble_21(lrsel((uint32_t)arg, 0)));
> - if (arg & 0x7ff)
> - tcg_out32(s, LDO | INSN_R1(ret) | INSN_R2(ret) |
> - reassemble_14(rrsel((uint32_t)arg, 0)));
> + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R1, m);
> + tcg_out_arith(s, ret, arg, TCG_REG_R1, INSN_AND);
Same.
> }
> }
>
> -static inline void tcg_out_ld_raw(TCGContext *s, int ret,
> - tcg_target_long arg)
> +static inline void tcg_out_ext8s(TCGContext *s, int ret, int arg)
> {
> - tcg_out32(s, LDIL | INSN_R2(ret) |
> - reassemble_21(lrsel((uint32_t)arg, 0)));
> - tcg_out32(s, LDW | INSN_R1(ret) | INSN_R2(ret) |
> - reassemble_14(rrsel((uint32_t)arg, 0)));
> + tcg_out_extr(s, ret, arg, 0, 8, 1);
> }
>
> -static inline void tcg_out_ld_ptr(TCGContext *s, int ret,
> - tcg_target_long arg)
> +static inline void tcg_out_ext16s(TCGContext *s, int ret, int arg)
> {
> - tcg_out_ld_raw(s, ret, arg);
> + tcg_out_extr(s, ret, arg, 0, 16, 1);
> }
>
> -static inline void tcg_out_ldst(TCGContext *s, int ret, int addr, int offset,
> - int op)
> +static void tcg_out_shli(TCGContext *s, int ret, int arg, int count)
> {
> - if (offset == (offset & 0xfff))
> - tcg_out32(s, op | INSN_R1(ret) | INSN_R2(addr) |
> - reassemble_14(offset));
> - else {
> - fprintf(stderr, "unimplemented %s with offset %d\n", __func__,
> offset);
> - tcg_abort();
> - }
> + count &= 31;
> + tcg_out32(s, INSN_ZDEP | INSN_R2(ret) | INSN_R1(arg)
> + | INSN_SHDEP_CP(31 - count) | INSN_DEP_LEN(32 - count));
> }
>
> -static inline void tcg_out_ld(TCGContext *s, TCGType type, int ret,
> - int arg1, tcg_target_long arg2)
> +static void tcg_out_shl(TCGContext *s, int ret, int arg, int creg)
> {
> - fprintf(stderr, "unimplemented %s\n", __func__);
> - tcg_abort();
> + tcg_out_arithi(s, TCG_REG_R20, creg, 31, INSN_SUBI);
> + tcg_out_mtctl_sar(s, TCG_REG_R20);
> + tcg_out32(s, INSN_ZVDEP | INSN_R2(ret) | INSN_R1(arg) |
> INSN_DEP_LEN(32));
> }
>
> -static inline void tcg_out_st(TCGContext *s, TCGType type, int ret,
> - int arg1, tcg_target_long arg2)
> +static void tcg_out_shri(TCGContext *s, int ret, int arg, int count)
> {
> - fprintf(stderr, "unimplemented %s\n", __func__);
> - tcg_abort();
> + count &= 31;
> + tcg_out_extr(s, ret, arg, count, 32 - count, 0);
> }
>
> -static inline void tcg_out_arith(TCGContext *s, int t, int r1, int r2, int
> op)
> +static void tcg_out_shr(TCGContext *s, int ret, int arg, int creg)
> {
> - tcg_out32(s, op | INSN_T(t) | INSN_R1(r1) | INSN_R2(r2));
> + tcg_out_vshd(s, ret, TCG_REG_R0, arg, creg);
> }
>
> -static inline void tcg_out_arithi(TCGContext *s, int t, int r1,
> - tcg_target_long val, int op)
> +static void tcg_out_sari(TCGContext *s, int ret, int arg, int count)
> {
> - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R20, val);
> - tcg_out_arith(s, t, r1, TCG_REG_R20, op);
> + count &= 31;
> + tcg_out_extr(s, ret, arg, count, 32 - count, 1);
> }
>
> -static inline void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
> +static void tcg_out_sar(TCGContext *s, int ret, int arg, int creg)
> {
> - tcg_out_arithi(s, reg, reg, val, ARITH_ADD);
> + tcg_out_arithi(s, TCG_REG_R20, creg, 31, INSN_SUBI);
> + tcg_out_mtctl_sar(s, TCG_REG_R20);
> + tcg_out32(s, INSN_VEXTRS | INSN_R1(ret) | INSN_R2(arg) |
> INSN_DEP_LEN(32));
> }
>
> -static inline void tcg_out_nop(TCGContext *s)
> +static void tcg_out_rotli(TCGContext *s, int ret, int arg, int count)
> {
> - tcg_out32(s, ARITH_OR | INSN_T(TCG_REG_R0) | INSN_R1(TCG_REG_R0) |
> - INSN_R2(TCG_REG_R0));
> + count &= 31;
> + tcg_out_shd(s, ret, arg, arg, 32 - count);
> }
>
> -static inline void tcg_out_ext8s(TCGContext *s, int ret, int arg) {
> - tcg_out32(s, EXTRS | INSN_R1(ret) | INSN_R2(arg) |
> - INSN_SHDEP_P(31) | INSN_DEP_LEN(8));
> +static void tcg_out_rotl(TCGContext *s, int ret, int arg, int creg)
> +{
> + tcg_out_arithi(s, TCG_REG_R20, creg, 32, INSN_SUBI);
> + tcg_out_vshd(s, ret, arg, arg, TCG_REG_R20);
> }
>
> -static inline void tcg_out_ext16s(TCGContext *s, int ret, int arg) {
> - tcg_out32(s, EXTRS | INSN_R1(ret) | INSN_R2(arg) |
> - INSN_SHDEP_P(31) | INSN_DEP_LEN(16));
> +static void tcg_out_rotri(TCGContext *s, int ret, int arg, int count)
> +{
> + count &= 31;
> + tcg_out_shd(s, ret, arg, arg, count);
> }
>
> -static inline void tcg_out_bswap16(TCGContext *s, int ret, int arg) {
> - if(ret != arg)
> - tcg_out_mov(s, ret, arg);
> - tcg_out32(s, DEP | INSN_R2(ret) | INSN_R1(ret) |
> - INSN_SHDEP_CP(15) | INSN_DEP_LEN(8));
> - tcg_out32(s, SHD | INSN_T(ret) | INSN_R1(TCG_REG_R0) |
> - INSN_R2(ret) | INSN_SHDEP_CP(8));
> +static void tcg_out_rotr(TCGContext *s, int ret, int arg, int creg)
> +{
> + tcg_out_vshd(s, ret, arg, arg, creg);
> }
>
> -static inline void tcg_out_bswap32(TCGContext *s, int ret, int arg, int
> temp) {
> - tcg_out32(s, SHD | INSN_T(temp) | INSN_R1(arg) |
> - INSN_R2(arg) | INSN_SHDEP_CP(16));
> - tcg_out32(s, DEP | INSN_R2(temp) | INSN_R1(temp) |
> - INSN_SHDEP_CP(15) | INSN_DEP_LEN(8));
> - tcg_out32(s, SHD | INSN_T(ret) | INSN_R1(arg) |
> - INSN_R2(temp) | INSN_SHDEP_CP(8));
> +static void tcg_out_bswap16(TCGContext *s, int ret, int arg, int sign)
> +{
> + if (ret != arg) {
> + tcg_out_mov(s, ret, arg); /* arg = xxAB */
> + }
> + tcg_out_dep(s, ret, ret, 16, 8); /* ret = xBAB */
> + tcg_out_extr(s, ret, ret, 8, 16, sign); /* ret = ..BA */
> }
>
> -static inline void tcg_out_call(TCGContext *s, void *func)
> +static void tcg_out_bswap32(TCGContext *s, int ret, int arg, int temp)
> {
> - uint32_t val = (uint32_t)__canonicalize_funcptr_for_compare(func);
> - tcg_out32(s, LDIL | INSN_R2(TCG_REG_R20) |
> - reassemble_21(lrsel(val, 0)));
> - tcg_out32(s, BLE_SR4 | INSN_R2(TCG_REG_R20) |
> - reassemble_17(rrsel(val, 0) >> 2));
> - tcg_out_mov(s, TCG_REG_RP, TCG_REG_R31);
> + /* arg = ABCD */
> + tcg_out_rotri(s, temp, arg, 16); /* temp = CDAB */
> + tcg_out_dep(s, temp, temp, 16, 8); /* temp = CBAB */
> + tcg_out_shd(s, ret, arg, temp, 8); /* ret = DCBA */
> }
>
> -#if defined(CONFIG_SOFTMMU)
> +static void tcg_out_call(TCGContext *s, void *func)
> +{
> + tcg_target_long val, hi, lo, disp;
> +
> + val = (uint32_t)__canonicalize_funcptr_for_compare(func);
> + disp = (val - ((tcg_target_long)s->code_ptr + 8)) >> 2;
> +
> + if (check_fit_tl(disp, 17)) {
> + tcg_out32(s, INSN_BL_N | INSN_R2(TCG_REG_RP) | reassemble_17(disp));
> + } else {
> + hi = val >> 11;
> + lo = val & 0x7ff;
> +
> + tcg_out32(s, INSN_LDIL | INSN_R2(TCG_REG_R20) | reassemble_21(hi));
> + tcg_out32(s, INSN_BLE_SR4 | INSN_R2(TCG_REG_R20)
> + | reassemble_17(lo >> 2));
> + tcg_out_mov(s, TCG_REG_RP, TCG_REG_R31);
> + }
> +}
>
> +static void tcg_out_xmpyu(TCGContext *s, int retl, int reth,
> + int arg1, int arg2)
> +{
> + /* Store both words into the stack for copy to the FPU. */
> + tcg_out_ldst(s, arg1, TCG_REG_SP, STACK_TEMP_OFS, INSN_STW);
> + tcg_out_ldst(s, arg2, TCG_REG_SP, STACK_TEMP_OFS + 4, INSN_STW);
> +
> + /* Load both words into the FPU at the same time. We get away
> + with this because we can address the left and right half of the
> + FPU registers individually once loaded. */
> + /* fldds stack_temp(sp),fr22 */
> + tcg_out32(s, INSN_FLDDS | INSN_R2(TCG_REG_SP)
> + | INSN_IM5(STACK_TEMP_OFS) | INSN_T(22));
> +
> + /* xmpyu fr22r,fr22,fr22 */
> + tcg_out32(s, 0x3ad64796);
> +
> + /* Store the 64-bit result back into the stack. */
> + /* fstds stack_temp(sp),fr22 */
> + tcg_out32(s, INSN_FSTDS | INSN_R2(TCG_REG_SP)
> + | INSN_IM5(STACK_TEMP_OFS) | INSN_T(22));
> +
> + /* Load the pieces of the result that the caller requested. */
> + if (reth) {
> + tcg_out_ldst(s, reth, TCG_REG_SP, STACK_TEMP_OFS, INSN_LDW);
> + }
> + if (retl) {
> + tcg_out_ldst(s, retl, TCG_REG_SP, STACK_TEMP_OFS + 4, INSN_LDW);
> + }
> +}
> +
> +static void tcg_out_branch(TCGContext *s, int label_index, int nul)
> +{
> + TCGLabel *l = &s->labels[label_index];
> + uint32_t op = nul ? INSN_BL_N : INSN_BL;
> +
> + if (l->has_value) {
> + tcg_target_long val = l->u.value;
> +
> + val -= (tcg_target_long)s->code_ptr + 8;
> + val >>= 2;
> + assert(check_fit_tl(val, 17));
> +
> + tcg_out32(s, op | reassemble_17(val));
> + } else {
> + tcg_out_reloc(s, s->code_ptr, R_PARISC_PCREL17F, label_index, 0);
> + tcg_out32(s, op);
This breaks partial retranslation. The bits corresponding to the offset
should be preserved.
> + }
> +}
> +
> +static const uint8_t tcg_cond_to_cmp_cond[10] =
> +{
> + [TCG_COND_EQ] = COND_EQ,
> + [TCG_COND_NE] = COND_EQ | COND_FALSE,
> + [TCG_COND_LT] = COND_LT,
> + [TCG_COND_GE] = COND_LT | COND_FALSE,
> + [TCG_COND_LE] = COND_LE,
> + [TCG_COND_GT] = COND_LE | COND_FALSE,
> + [TCG_COND_LTU] = COND_LTU,
> + [TCG_COND_GEU] = COND_LTU | COND_FALSE,
> + [TCG_COND_LEU] = COND_LEU,
> + [TCG_COND_GTU] = COND_LEU | COND_FALSE,
> +};
> +
> +static void tcg_out_brcond(TCGContext *s, int cond, TCGArg c1,
> + TCGArg c2, int c2const, int label_index)
> +{
> + TCGLabel *l = &s->labels[label_index];
> + int op, pacond;
> +
> + /* Note that COMIB operates as if the immediate is the first
> + operand. We model brcond with the immediate in the second
> + to better match what targets are likely to give us. For
> + consistency, model COMB with reversed operands as well. */
> + pacond = tcg_cond_to_cmp_cond[tcg_swap_cond(cond)];
> +
> + if (c2const) {
> + op = (pacond & COND_FALSE ? INSN_COMIBF : INSN_COMIBT);
> + op |= INSN_IM5(c2);
> + } else {
> + op = (pacond & COND_FALSE ? INSN_COMBF : INSN_COMBT);
> + op |= INSN_R1(c2);
> + }
> + op |= INSN_R2(c1);
> + op |= INSN_COND(pacond & 7);
> +
> + if (l->has_value) {
> + tcg_target_long val = l->u.value;
> +
> + val -= (tcg_target_long)s->code_ptr + 8;
> + val >>= 2;
> + assert(check_fit_tl(val, 12));
> +
> + /* ??? Assume that all branches to defined labels are backward.
> + Which means that if the nul bit is set, the delay slot is
> + executed if the branch is taken, and not executed in fallthru. */
> + tcg_out32(s, op | reassemble_12(val));
> + tcg_out_nop(s);
> + } else {
> + tcg_out_reloc(s, s->code_ptr, R_PARISC_PCREL12F, label_index, 0);
> + /* ??? Assume that all branches to undefined labels are forward.
> + Which means that if the nul bit is set, the delay slot is
> + not executed if the branch is taken, which is what we want. */
> + tcg_out32(s, op | 2);
Same problem about partial retranslation here.
> + }
> +}
> +
> +static void tcg_out_comclr(TCGContext *s, int cond, TCGArg ret,
> + TCGArg c1, TCGArg c2, int c2const)
> +{
> + int op, pacond;
> +
> + /* Note that COMICLR operates as if the immediate is the first
> + operand. We model setcond with the immediate in the second
> + to better match what targets are likely to give us. For
> + consistency, model COMCLR with reversed operands as well. */
> + pacond = tcg_cond_to_cmp_cond[tcg_swap_cond(cond)];
> +
> + if (c2const) {
> + op = INSN_COMICLR | INSN_R2(c1) | INSN_R1(ret) | INSN_IM11(c2);
> + } else {
> + op = INSN_COMCLR | INSN_R2(c1) | INSN_R1(c2) | INSN_T(ret);
> + }
> + op |= INSN_COND(pacond & 7);
> + op |= pacond & COND_FALSE ? 1 << 12 : 0;
> +
> + tcg_out32(s, op);
> +}
> +
> +static void tcg_out_brcond2(TCGContext *s, int cond, TCGArg al, TCGArg ah,
> + TCGArg bl, int blconst, TCGArg bh, int bhconst,
> + int label_index)
> +{
> + switch (cond) {
> + case TCG_COND_EQ:
> + case TCG_COND_NE:
> + tcg_out_comclr(s, tcg_invert_cond(cond), TCG_REG_R0, al, bl,
> blconst);
> + tcg_out_brcond(s, cond, ah, bh, bhconst, label_index);
> + break;
> +
> + default:
> + tcg_out_brcond(s, cond, ah, bh, bhconst, label_index);
> + tcg_out_comclr(s, TCG_COND_NE, TCG_REG_R0, ah, bh, bhconst);
> + tcg_out_brcond(s, tcg_unsigned_cond(cond),
> + al, bl, blconst, label_index);
> + break;
> + }
> +}
> +
> +static void tcg_out_setcond(TCGContext *s, int cond, TCGArg ret,
> + TCGArg c1, TCGArg c2, int c2const)
> +{
> + tcg_out_comclr(s, tcg_invert_cond(cond), ret, c1, c2, c2const);
> + tcg_out_movi(s, TCG_TYPE_I32, ret, 1);
> +}
> +
> +static void tcg_out_setcond2(TCGContext *s, int cond, TCGArg ret,
> + TCGArg al, TCGArg ah, TCGArg bl, int blconst,
> + TCGArg bh, int bhconst)
> +{
> + int scratch = TCG_REG_R20;
> +
> + if (ret != al && ret != ah
> + && (blconst || ret != bl)
> + && (bhconst || ret != bh)) {
> + scratch = ret;
> + }
> +
> + switch (cond) {
> + case TCG_COND_EQ:
> + case TCG_COND_NE:
> + tcg_out_setcond(s, cond, scratch, al, bl, blconst);
> + tcg_out_comclr(s, TCG_COND_EQ, TCG_REG_R0, ah, bh, bhconst);
> + tcg_out_movi(s, TCG_TYPE_I32, scratch, cond == TCG_COND_NE);
> + break;
> +
> + default:
> + tcg_out_setcond(s, tcg_unsigned_cond(cond), scratch, al, bl,
> blconst);
> + tcg_out_comclr(s, TCG_COND_EQ, TCG_REG_R0, ah, bh, bhconst);
> + tcg_out_movi(s, TCG_TYPE_I32, scratch, 0);
> + tcg_out_comclr(s, cond, TCG_REG_R0, ah, bh, bhconst);
> + tcg_out_movi(s, TCG_TYPE_I32, scratch, 1);
> + break;
> + }
> +
> + tcg_out_mov(s, ret, scratch);
> +}
> +
> +#if defined(CONFIG_SOFTMMU)
> #include "../../softmmu_defs.h"
>
> static void *qemu_ld_helpers[4] = {
> @@ -363,30 +848,77 @@ static void *qemu_st_helpers[4] = {
> __stl_mmu,
> __stq_mmu,
> };
> +
> +/* Load and compare a TLB entry, and branch if TLB miss. OFFSET is set to
> + the offset of the first ADDR_READ or ADDR_WRITE member of the appropriate
> + TLB for the memory index. The return value is the offset from ENV
> + contained in R1 afterward (to be used when loading ADDEND); if the
> + return value is 0, R1 is not used. */
> +
> +static int tcg_out_tlb_read(TCGContext *s, int r0, int r1, int addrlo,
> + int addrhi, int s_bits, int lab_miss, int offset)
> +{
> + int ret;
> +
> + /* Extracting the index into the TLB. The "normal C operation" is
> + r1 = addr_reg >> TARGET_PAGE_BITS;
> + r1 &= CPU_TLB_SIZE - 1;
> + r1 <<= CPU_TLB_ENTRY_BITS;
> + What this does is extract CPU_TLB_BITS beginning at TARGET_PAGE_BITS
> + and place them at CPU_TLB_ENTRY_BITS. We can combine the first two
> + operations with an EXTRU. Unfortunately, the current value of
> + CPU_TLB_ENTRY_BITS is > 3, so we can't merge that shift with the
> + add that follows. */
> + tcg_out_extr(s, r1, addrlo, TARGET_PAGE_BITS, CPU_TLB_BITS, 0);
> + tcg_out_andi(s, r0, addrlo, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
> + tcg_out_shli(s, r1, r1, CPU_TLB_ENTRY_BITS);
> + tcg_out_arith(s, r1, r1, TCG_AREG0, INSN_ADDL);
> +
> + /* Make sure that both the addr_{read,write} and addend can be
> + read with a 14-bit offset from the same base register. */
> + if (check_fit_tl(offset + CPU_TLB_SIZE, 14)) {
> + ret = 0;
> + } else {
> + ret = (offset + 0x400) & ~0x7ff;
> + offset = ret - offset;
> + tcg_out_addi2(s, TCG_REG_R1, r1, ret);
> + r1 = TCG_REG_R1;
> + }
> +
> + /* Load the entry from the computed slot. */
> + if (TARGET_LONG_BITS == 64) {
> + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R23, r1, offset);
> + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, r1, offset + 4);
> + } else {
> + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, r1, offset);
> + }
> +
> + /* If not equal, jump to lab_miss. */
> + if (TARGET_LONG_BITS == 64) {
> + tcg_out_brcond2(s, TCG_COND_NE, TCG_REG_R20, TCG_REG_R23,
> + r0, 0, addrhi, 0, lab_miss);
> + } else {
> + tcg_out_brcond(s, TCG_COND_NE, TCG_REG_R20, r0, 0, lab_miss);
> + }
> +
> + return ret;
> +}
> #endif
>
> static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
> {
> - int addr_reg, data_reg, data_reg2, r0, r1, mem_index, s_bits, bswap;
> + int addr_reg, addr_reg2;
> + int data_reg, data_reg2;
> + int r0, r1, mem_index, s_bits, bswap;
> + tcg_target_long offset;
> #if defined(CONFIG_SOFTMMU)
> - uint32_t *label1_ptr, *label2_ptr;
> -#endif
> -#if TARGET_LONG_BITS == 64
> -#if defined(CONFIG_SOFTMMU)
> - uint32_t *label3_ptr;
> -#endif
> - int addr_reg2;
> + int lab1, lab2, argreg;
> #endif
>
> data_reg = *args++;
> - if (opc == 3)
> - data_reg2 = *args++;
> - else
> - data_reg2 = 0; /* suppress warning */
> + data_reg2 = (opc == 3 ? *args++ : TCG_REG_R0);
I am not sure TCG_REG_R0 is really correct here, and I find it confusing.
While it's value is zero, the assignment there is just to make GCC
happy, it won't be used after
> addr_reg = *args++;
> -#if TARGET_LONG_BITS == 64
> - addr_reg2 = *args++;
> -#endif
> + addr_reg2 = (TARGET_LONG_BITS == 64 ? *args++ : TCG_REG_R0);
Same here.
> mem_index = *args;
> s_bits = opc & 3;
>
> @@ -394,96 +926,22 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg
> *args, int opc)
> r1 = TCG_REG_R25;
>
> #if defined(CONFIG_SOFTMMU)
> - tcg_out_mov(s, r1, addr_reg);
> + lab1 = gen_new_label();
> + lab2 = gen_new_label();
Do you really want to use label here? load/store are the most common
instructions, I am not really sure of the resulting performance.
> - tcg_out_mov(s, r0, addr_reg);
> + offset = tcg_out_tlb_read(s, r0, r1, addr_reg, addr_reg2, s_bits, lab1,
> + offsetof(CPUState,
> + tlb_table[mem_index][0].addr_read));
>
> - tcg_out32(s, SHD | INSN_T(r1) | INSN_R1(TCG_REG_R0) | INSN_R2(r1) |
> - INSN_SHDEP_CP(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS));
> + /* TLB Hit. */
> + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, (offset ? TCG_REG_R1 : r1),
> + offsetof(CPUState, tlb_table[mem_index][0].addend) - offset);
>
> - tcg_out_arithi(s, r0, r0, TARGET_PAGE_MASK | ((1 << s_bits) - 1),
> - ARITH_AND);
> -
> - tcg_out_arithi(s, r1, r1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS,
> - ARITH_AND);
> -
> - tcg_out_arith(s, r1, r1, TCG_AREG0, ARITH_ADD);
> - tcg_out_arithi(s, r1, r1,
> - offsetof(CPUState, tlb_table[mem_index][0].addr_read),
> - ARITH_ADD);
> -
> - tcg_out_ldst(s, TCG_REG_R20, r1, 0, LDW);
> -
> -#if TARGET_LONG_BITS == 32
> - /* if equal, jump to label1 */
> - label1_ptr = (uint32_t *)s->code_ptr;
> - tcg_out32(s, COMBT | INSN_R1(TCG_REG_R20) | INSN_R2(r0) |
> - INSN_COND(COND_EQUAL));
> - tcg_out_mov(s, r0, addr_reg); /* delay slot */
> -#else
> - /* if not equal, jump to label3 */
> - label3_ptr = (uint32_t *)s->code_ptr;
> - tcg_out32(s, COMBF | INSN_R1(TCG_REG_R20) | INSN_R2(r0) |
> - INSN_COND(COND_EQUAL));
> - tcg_out_mov(s, r0, addr_reg); /* delay slot */
> -
> - tcg_out_ldst(s, TCG_REG_R20, r1, 4, LDW);
> -
> - /* if equal, jump to label1 */
> - label1_ptr = (uint32_t *)s->code_ptr;
> - tcg_out32(s, COMBT | INSN_R1(TCG_REG_R20) | INSN_R2(addr_reg2) |
> - INSN_COND(COND_EQUAL));
> - tcg_out_nop(s); /* delay slot */
> -
> - /* label3: */
> - *label3_ptr |= reassemble_12((uint32_t *)s->code_ptr - label3_ptr - 2);
> -#endif
> -
> -#if TARGET_LONG_BITS == 32
> - tcg_out_mov(s, TCG_REG_R26, addr_reg);
> - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R25, mem_index);
> -#else
> - tcg_out_mov(s, TCG_REG_R26, addr_reg);
> - tcg_out_mov(s, TCG_REG_R25, addr_reg2);
> - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R24, mem_index);
> -#endif
> -
> - tcg_out_call(s, qemu_ld_helpers[s_bits]);
> -
> - switch(opc) {
> - case 0 | 4:
> - tcg_out_ext8s(s, data_reg, TCG_REG_RET0);
> - break;
> - case 1 | 4:
> - tcg_out_ext16s(s, data_reg, TCG_REG_RET0);
> - break;
> - case 0:
> - case 1:
> - case 2:
> - default:
> - tcg_out_mov(s, data_reg, TCG_REG_RET0);
> - break;
> - case 3:
> - tcg_abort();
> - tcg_out_mov(s, data_reg, TCG_REG_RET0);
> - tcg_out_mov(s, data_reg2, TCG_REG_RET1);
> - break;
> - }
> -
> - /* jump to label2 */
> - label2_ptr = (uint32_t *)s->code_ptr;
> - tcg_out32(s, BL | INSN_R2(TCG_REG_R0) | 2);
> -
> - /* label1: */
> - *label1_ptr |= reassemble_12((uint32_t *)s->code_ptr - label1_ptr - 2);
> -
> - tcg_out_arithi(s, TCG_REG_R20, r1,
> - offsetof(CPUTLBEntry, addend) - offsetof(CPUTLBEntry,
> addr_read),
> - ARITH_ADD);
> - tcg_out_ldst(s, TCG_REG_R20, TCG_REG_R20, 0, LDW);
> - tcg_out_arith(s, r0, r0, TCG_REG_R20, ARITH_ADD);
> + tcg_out_arith(s, r0, addr_reg, TCG_REG_R20, INSN_ADDL);
> + offset = TCG_REG_R0;
> #else
> r0 = addr_reg;
> + offset = GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_R0;
> #endif
>
> #ifdef TARGET_WORDS_BIGENDIAN
> @@ -492,190 +950,151 @@ static void tcg_out_qemu_ld(TCGContext *s, const
> TCGArg *args, int opc)
> bswap = 1;
> #endif
> switch (opc) {
> - case 0:
> - tcg_out_ldst(s, data_reg, r0, 0, LDB);
> - break;
> - case 0 | 4:
> - tcg_out_ldst(s, data_reg, r0, 0, LDB);
> - tcg_out_ext8s(s, data_reg, data_reg);
> - break;
> - case 1:
> - tcg_out_ldst(s, data_reg, r0, 0, LDH);
> - if (bswap)
> - tcg_out_bswap16(s, data_reg, data_reg);
> - break;
> - case 1 | 4:
> - tcg_out_ldst(s, data_reg, r0, 0, LDH);
> - if (bswap)
> - tcg_out_bswap16(s, data_reg, data_reg);
> + case 0:
> + tcg_out_ldst_index(s, data_reg, r0, offset, INSN_LDBX);
> + break;
> + case 0 | 4:
> + tcg_out_ldst_index(s, data_reg, r0, offset, INSN_LDBX);
> + tcg_out_ext8s(s, data_reg, data_reg);
> + break;
> + case 1:
> + tcg_out_ldst_index(s, data_reg, r0, offset, INSN_LDHX);
> + if (bswap) {
> + tcg_out_bswap16(s, data_reg, data_reg, 0);
> + }
> + break;
> + case 1 | 4:
> + tcg_out_ldst_index(s, data_reg, r0, offset, INSN_LDHX);
> + if (bswap) {
> + tcg_out_bswap16(s, data_reg, data_reg, 1);
> + } else {
> tcg_out_ext16s(s, data_reg, data_reg);
> - break;
> - case 2:
> - tcg_out_ldst(s, data_reg, r0, 0, LDW);
> - if (bswap)
> - tcg_out_bswap32(s, data_reg, data_reg, TCG_REG_R20);
> - break;
> - case 3:
> - tcg_abort();
> - if (!bswap) {
> - tcg_out_ldst(s, data_reg, r0, 0, LDW);
> - tcg_out_ldst(s, data_reg2, r0, 4, LDW);
> + }
> + break;
> + case 2:
> + tcg_out_ldst_index(s, data_reg, r0, offset, INSN_LDWX);
> + if (bswap) {
> + tcg_out_bswap32(s, data_reg, data_reg, TCG_REG_R20);
> + }
> + break;
> + case 3:
> + if (bswap) {
> + int t = data_reg2;
> + data_reg2 = data_reg;
> + data_reg = t;
> + }
> + if (offset == TCG_REG_R0) {
> + /* Make sure not to clobber the base register. */
> + if (data_reg2 == r0) {
> + tcg_out_ldst(s, data_reg, r0, 4, INSN_LDW);
> + tcg_out_ldst(s, data_reg2, r0, 0, INSN_LDW);
> } else {
> - tcg_out_ldst(s, data_reg, r0, 4, LDW);
> - tcg_out_bswap32(s, data_reg, data_reg, TCG_REG_R20);
> - tcg_out_ldst(s, data_reg2, r0, 0, LDW);
> - tcg_out_bswap32(s, data_reg2, data_reg2, TCG_REG_R20);
> + tcg_out_ldst(s, data_reg2, r0, 0, INSN_LDW);
> + tcg_out_ldst(s, data_reg, r0, 4, INSN_LDW);
> }
> - break;
> - default:
> - tcg_abort();
> + } else {
> + tcg_out_addi2(s, TCG_REG_R20, r0, 4);
> + tcg_out_ldst_index(s, data_reg2, r0, offset, INSN_LDWX);
> + tcg_out_ldst_index(s, data_reg, TCG_REG_R20, offset, INSN_LDWX);
> + }
> + if (bswap) {
> + tcg_out_bswap32(s, data_reg, data_reg, TCG_REG_R20);
> + tcg_out_bswap32(s, data_reg2, data_reg2, TCG_REG_R20);
> + }
> + break;
> + default:
> + tcg_abort();
> }
>
> #if defined(CONFIG_SOFTMMU)
> + tcg_out_branch(s, lab2, 1);
> +
> + /* TLB Miss. */
> + /* label1: */
> + tcg_out_label(s, lab1, (tcg_target_long)s->code_ptr);
> +
> + argreg = TCG_REG_R26;
> + tcg_out_mov(s, argreg--, addr_reg);
> + if (TARGET_LONG_BITS == 64) {
> + tcg_out_mov(s, argreg--, addr_reg2);
> + }
> + tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
> +
> + tcg_out_call(s, qemu_ld_helpers[s_bits]);
> +
> + switch (opc) {
> + case 0:
> + tcg_out_andi(s, data_reg, TCG_REG_RET0, 0xff);
> + break;
> + case 0 | 4:
> + tcg_out_ext8s(s, data_reg, TCG_REG_RET0);
> + break;
> + case 1:
> + tcg_out_andi(s, data_reg, TCG_REG_RET0, 0xffff);
> + break;
> + case 1 | 4:
> + tcg_out_ext16s(s, data_reg, TCG_REG_RET0);
> + break;
> + case 2:
> + case 2 | 4:
> + tcg_out_mov(s, data_reg, TCG_REG_RET0);
> + break;
> + case 3:
> + tcg_out_mov(s, data_reg, TCG_REG_RET0);
> + tcg_out_mov(s, data_reg2, TCG_REG_RET1);
> + break;
> + default:
> + tcg_abort();
> + }
> +
> /* label2: */
> - *label2_ptr |= reassemble_17((uint32_t *)s->code_ptr - label2_ptr - 2);
> + tcg_out_label(s, lab2, (tcg_target_long)s->code_ptr);
> #endif
> }
>
> static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
> {
> - int addr_reg, data_reg, data_reg2, r0, r1, mem_index, s_bits, bswap;
> -#if defined(CONFIG_SOFTMMU)
> - uint32_t *label1_ptr, *label2_ptr;
> -#endif
> -#if TARGET_LONG_BITS == 64
> + int addr_reg, addr_reg2;
> + int data_reg, data_reg2;
> + int r0, r1, mem_index, s_bits, bswap;
> #if defined(CONFIG_SOFTMMU)
> - uint32_t *label3_ptr;
> -#endif
> - int addr_reg2;
> + tcg_target_long offset;
> + int lab1, lab2, argreg;
> #endif
>
> data_reg = *args++;
> - if (opc == 3)
> - data_reg2 = *args++;
> - else
> - data_reg2 = 0; /* suppress warning */
> + data_reg2 = (opc == 3 ? *args++ : 0);
> addr_reg = *args++;
> -#if TARGET_LONG_BITS == 64
> - addr_reg2 = *args++;
> -#endif
> + addr_reg2 = (TARGET_LONG_BITS == 64 ? *args++ : 0);
Here it makes more sense ;-)
> mem_index = *args;
> -
> s_bits = opc;
>
> r0 = TCG_REG_R26;
> r1 = TCG_REG_R25;
>
> #if defined(CONFIG_SOFTMMU)
> - tcg_out_mov(s, r1, addr_reg);
> -
> - tcg_out_mov(s, r0, addr_reg);
> -
> - tcg_out32(s, SHD | INSN_T(r1) | INSN_R1(TCG_REG_R0) | INSN_R2(r1) |
> - INSN_SHDEP_CP(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS));
> -
> - tcg_out_arithi(s, r0, r0, TARGET_PAGE_MASK | ((1 << s_bits) - 1),
> - ARITH_AND);
> -
> - tcg_out_arithi(s, r1, r1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS,
> - ARITH_AND);
> + lab1 = gen_new_label();
> + lab2 = gen_new_label();
Same here.
> - tcg_out_arith(s, r1, r1, TCG_AREG0, ARITH_ADD);
> - tcg_out_arithi(s, r1, r1,
> - offsetof(CPUState, tlb_table[mem_index][0].addr_write),
> - ARITH_ADD);
> + offset = tcg_out_tlb_read(s, r0, r1, addr_reg, addr_reg2, s_bits, lab1,
> + offsetof(CPUState,
> + tlb_table[mem_index][0].addr_write));
>
> - tcg_out_ldst(s, TCG_REG_R20, r1, 0, LDW);
> + /* TLB Hit. */
> + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, (offset ? TCG_REG_R1 : r1),
> + offsetof(CPUState, tlb_table[mem_index][0].addend) - offset);
>
> -#if TARGET_LONG_BITS == 32
> - /* if equal, jump to label1 */
> - label1_ptr = (uint32_t *)s->code_ptr;
> - tcg_out32(s, COMBT | INSN_R1(TCG_REG_R20) | INSN_R2(r0) |
> - INSN_COND(COND_EQUAL));
> - tcg_out_mov(s, r0, addr_reg); /* delay slot */
> + tcg_out_arith(s, r0, addr_reg, TCG_REG_R20, INSN_ADDL);
> #else
> - /* if not equal, jump to label3 */
> - label3_ptr = (uint32_t *)s->code_ptr;
> - tcg_out32(s, COMBF | INSN_R1(TCG_REG_R20) | INSN_R2(r0) |
> - INSN_COND(COND_EQUAL));
> - tcg_out_mov(s, r0, addr_reg); /* delay slot */
> -
> - tcg_out_ldst(s, TCG_REG_R20, r1, 4, LDW);
> -
> - /* if equal, jump to label1 */
> - label1_ptr = (uint32_t *)s->code_ptr;
> - tcg_out32(s, COMBT | INSN_R1(TCG_REG_R20) | INSN_R2(addr_reg2) |
> - INSN_COND(COND_EQUAL));
> - tcg_out_nop(s); /* delay slot */
> -
> - /* label3: */
> - *label3_ptr |= reassemble_12((uint32_t *)s->code_ptr - label3_ptr - 2);
> -#endif
> -
> - tcg_out_mov(s, TCG_REG_R26, addr_reg);
> -#if TARGET_LONG_BITS == 64
> - tcg_out_mov(s, TCG_REG_R25, addr_reg2);
> - if (opc == 3) {
> - tcg_abort();
> - tcg_out_mov(s, TCG_REG_R24, data_reg);
> - tcg_out_mov(s, TCG_REG_R23, data_reg2);
> - /* TODO: push mem_index */
> - tcg_abort();
> + /* There are no indexed stores, so if GUEST_BASE is set
> + we must do the add explicitly. Careful to avoid R20,
> + which is used for the bswaps to follow. */
> + if (GUEST_BASE == 0) {
> + r0 = addr_reg;
> } else {
> - switch(opc) {
> - case 0:
> - tcg_out32(s, EXTRU | INSN_R1(TCG_REG_R24) | INSN_R2(data_reg) |
> - INSN_SHDEP_P(31) | INSN_DEP_LEN(8));
> - break;
> - case 1:
> - tcg_out32(s, EXTRU | INSN_R1(TCG_REG_R24) | INSN_R2(data_reg) |
> - INSN_SHDEP_P(31) | INSN_DEP_LEN(16));
> - break;
> - case 2:
> - tcg_out_mov(s, TCG_REG_R24, data_reg);
> - break;
> - }
> - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R23, mem_index);
> + tcg_out_arith(s, TCG_REG_R31, addr_reg, TCG_GUEST_BASE_REG,
> INSN_ADDL);
> + r0 = TCG_REG_R31;
> }
> -#else
> - if (opc == 3) {
> - tcg_abort();
> - tcg_out_mov(s, TCG_REG_R25, data_reg);
> - tcg_out_mov(s, TCG_REG_R24, data_reg2);
> - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R23, mem_index);
> - } else {
> - switch(opc) {
> - case 0:
> - tcg_out32(s, EXTRU | INSN_R1(TCG_REG_R25) | INSN_R2(data_reg) |
> - INSN_SHDEP_P(31) | INSN_DEP_LEN(8));
> - break;
> - case 1:
> - tcg_out32(s, EXTRU | INSN_R1(TCG_REG_R25) | INSN_R2(data_reg) |
> - INSN_SHDEP_P(31) | INSN_DEP_LEN(16));
> - break;
> - case 2:
> - tcg_out_mov(s, TCG_REG_R25, data_reg);
> - break;
> - }
> - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R24, mem_index);
> - }
> -#endif
> - tcg_out_call(s, qemu_st_helpers[s_bits]);
> -
> - /* jump to label2 */
> - label2_ptr = (uint32_t *)s->code_ptr;
> - tcg_out32(s, BL | INSN_R2(TCG_REG_R0) | 2);
> -
> - /* label1: */
> - *label1_ptr |= reassemble_12((uint32_t *)s->code_ptr - label1_ptr - 2);
> -
> - tcg_out_arithi(s, TCG_REG_R20, r1,
> - offsetof(CPUTLBEntry, addend) - offsetof(CPUTLBEntry,
> addr_write),
> - ARITH_ADD);
> - tcg_out_ldst(s, TCG_REG_R20, TCG_REG_R20, 0, LDW);
> - tcg_out_arith(s, r0, r0, TCG_REG_R20, ARITH_ADD);
> -#else
> - r0 = addr_reg;
> #endif
>
> #ifdef TARGET_WORDS_BIGENDIAN
> @@ -685,170 +1104,345 @@ static void tcg_out_qemu_st(TCGContext *s, const
> TCGArg *args, int opc)
> #endif
> switch (opc) {
> case 0:
> - tcg_out_ldst(s, data_reg, r0, 0, STB);
> + tcg_out_ldst(s, data_reg, r0, 0, INSN_STB);
> break;
> case 1:
> if (bswap) {
> - tcg_out_bswap16(s, TCG_REG_R20, data_reg);
> + tcg_out_bswap16(s, TCG_REG_R20, data_reg, 0);
> data_reg = TCG_REG_R20;
> }
> - tcg_out_ldst(s, data_reg, r0, 0, STH);
> + tcg_out_ldst(s, data_reg, r0, 0, INSN_STH);
> break;
> case 2:
> if (bswap) {
> tcg_out_bswap32(s, TCG_REG_R20, data_reg, TCG_REG_R20);
> data_reg = TCG_REG_R20;
> }
> - tcg_out_ldst(s, data_reg, r0, 0, STW);
> + tcg_out_ldst(s, data_reg, r0, 0, INSN_STW);
> break;
> case 3:
> - tcg_abort();
> - if (!bswap) {
> - tcg_out_ldst(s, data_reg, r0, 0, STW);
> - tcg_out_ldst(s, data_reg2, r0, 4, STW);
> - } else {
> + if (bswap) {
> tcg_out_bswap32(s, TCG_REG_R20, data_reg, TCG_REG_R20);
> - tcg_out_ldst(s, TCG_REG_R20, r0, 4, STW);
> - tcg_out_bswap32(s, TCG_REG_R20, data_reg2, TCG_REG_R20);
> - tcg_out_ldst(s, TCG_REG_R20, r0, 0, STW);
> + tcg_out_bswap32(s, TCG_REG_R23, data_reg2, TCG_REG_R23);
> + data_reg2 = TCG_REG_R20;
> + data_reg = TCG_REG_R23;
> }
> + tcg_out_ldst(s, data_reg2, r0, 0, INSN_STW);
> + tcg_out_ldst(s, data_reg, r0, 4, INSN_STW);
> break;
> default:
> tcg_abort();
> }
>
> #if defined(CONFIG_SOFTMMU)
> + tcg_out_branch(s, lab2, 1);
> +
> + /* TLB Miss. */
> + /* label1: */
> + tcg_out_label(s, lab1, (tcg_target_long)s->code_ptr);
> +
> + argreg = TCG_REG_R26;
> + tcg_out_mov(s, argreg--, addr_reg);
> + if (TARGET_LONG_BITS == 64) {
> + tcg_out_mov(s, argreg--, addr_reg2);
> + }
> +
> + switch(opc) {
> + case 0:
> + tcg_out_andi(s, argreg--, data_reg, 0xff);
> + tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
> + break;
> + case 1:
> + tcg_out_andi(s, argreg--, data_reg, 0xffff);
> + tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
> + break;
> + case 2:
> + tcg_out_mov(s, argreg--, data_reg);
> + tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
> + break;
> + case 3:
> + /* Because of the alignment required by the 64-bit data argument,
> + we will always use R23/R24. Also, we will always run out of
> + argument registers for storing mem_index, so that will have
> + to go on the stack. */
> + if (mem_index == 0) {
> + argreg = TCG_REG_R0;
> + } else {
> + argreg = TCG_REG_R20;
> + tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
> + }
> + tcg_out_mov(s, TCG_REG_R23, data_reg2);
> + tcg_out_mov(s, TCG_REG_R24, data_reg);
> + tcg_out_st(s, TCG_TYPE_I32, argreg, TCG_REG_SP,
> + TCG_TARGET_CALL_STACK_OFFSET - 4);
> + break;
> + default:
> + tcg_abort();
> + }
> +
> + tcg_out_call(s, qemu_st_helpers[s_bits]);
> +
> /* label2: */
> - *label2_ptr |= reassemble_17((uint32_t *)s->code_ptr - label2_ptr - 2);
> + tcg_out_label(s, lab2, (tcg_target_long)s->code_ptr);
> #endif
> }
>
> +static void tcg_out_exit_tb(TCGContext *s, TCGArg arg)
> +{
> + if (!check_fit_tl(arg, 14)) {
> + uint32_t hi, lo;
> + hi = arg & ~0x7ff;
> + lo = arg & 0x7ff;
> + if (lo) {
> + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RET0, hi);
> + tcg_out32(s, INSN_BV | INSN_R2(TCG_REG_R18));
> + tcg_out_addi(s, TCG_REG_RET0, lo);
> + return;
> + }
> + arg = hi;
> + }
> + tcg_out32(s, INSN_BV | INSN_R2(TCG_REG_R18));
> + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RET0, arg);
> +}
> +
> +static void tcg_out_goto_tb(TCGContext *s, TCGArg arg)
> +{
> + if (s->tb_jmp_offset) {
> + /* direct jump method */
> + fprintf(stderr, "goto_tb direct\n");
> + tcg_abort();
> + } else {
> + /* indirect jump method */
> + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, TCG_REG_R0,
> + (tcg_target_long)(s->tb_next + arg));
> + tcg_out32(s, INSN_BV_N | INSN_R2(TCG_REG_R20));
> + }
> + s->tb_next_offset[arg] = s->code_ptr - s->code_buf;
> +}
> +
> static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg
> *args,
> const int *const_args)
> {
> - int c;
> -
> switch (opc) {
> case INDEX_op_exit_tb:
> - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RET0, args[0]);
> - tcg_out32(s, BV_N | INSN_R2(TCG_REG_R18));
> + tcg_out_exit_tb(s, args[0]);
> break;
> case INDEX_op_goto_tb:
> - if (s->tb_jmp_offset) {
> - /* direct jump method */
> - fprintf(stderr, "goto_tb direct\n");
> - tcg_abort();
> - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R20, args[0]);
> - tcg_out32(s, BV_N | INSN_R2(TCG_REG_R20));
> - s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
> - } else {
> - /* indirect jump method */
> - tcg_out_ld_ptr(s, TCG_REG_R20,
> - (tcg_target_long)(s->tb_next + args[0]));
> - tcg_out32(s, BV_N | INSN_R2(TCG_REG_R20));
> - }
> - s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
> + tcg_out_goto_tb(s, args[0]);
> break;
> +
> case INDEX_op_call:
> - tcg_out32(s, BLE_SR4 | INSN_R2(args[0]));
> - tcg_out_mov(s, TCG_REG_RP, TCG_REG_R31);
> + if (const_args[0]) {
> + tcg_out_call(s, (void *)args[0]);
> + } else {
> + tcg_out32(s, INSN_BLE_SR4 | INSN_R2(args[0]));
> + tcg_out_mov(s, TCG_REG_RP, TCG_REG_R31);
> + }
> break;
> +
> case INDEX_op_jmp:
> fprintf(stderr, "unimplemented jmp\n");
> tcg_abort();
> break;
> +
> case INDEX_op_br:
> - fprintf(stderr, "unimplemented br\n");
> - tcg_abort();
> + tcg_out_branch(s, args[0], 1);
> break;
> +
> case INDEX_op_movi_i32:
> tcg_out_movi(s, TCG_TYPE_I32, args[0], (uint32_t)args[1]);
> break;
>
> case INDEX_op_ld8u_i32:
> - tcg_out_ldst(s, args[0], args[1], args[2], LDB);
> + tcg_out_ldst(s, args[0], args[1], args[2], INSN_LDB);
> break;
> case INDEX_op_ld8s_i32:
> - tcg_out_ldst(s, args[0], args[1], args[2], LDB);
> + tcg_out_ldst(s, args[0], args[1], args[2], INSN_LDB);
> tcg_out_ext8s(s, args[0], args[0]);
> break;
> case INDEX_op_ld16u_i32:
> - tcg_out_ldst(s, args[0], args[1], args[2], LDH);
> + tcg_out_ldst(s, args[0], args[1], args[2], INSN_LDH);
> break;
> case INDEX_op_ld16s_i32:
> - tcg_out_ldst(s, args[0], args[1], args[2], LDH);
> + tcg_out_ldst(s, args[0], args[1], args[2], INSN_LDH);
> tcg_out_ext16s(s, args[0], args[0]);
> break;
> case INDEX_op_ld_i32:
> - tcg_out_ldst(s, args[0], args[1], args[2], LDW);
> + tcg_out_ldst(s, args[0], args[1], args[2], INSN_LDW);
> break;
>
> case INDEX_op_st8_i32:
> - tcg_out_ldst(s, args[0], args[1], args[2], STB);
> + tcg_out_ldst(s, args[0], args[1], args[2], INSN_STB);
> break;
> case INDEX_op_st16_i32:
> - tcg_out_ldst(s, args[0], args[1], args[2], STH);
> + tcg_out_ldst(s, args[0], args[1], args[2], INSN_STH);
> break;
> case INDEX_op_st_i32:
> - tcg_out_ldst(s, args[0], args[1], args[2], STW);
> + tcg_out_ldst(s, args[0], args[1], args[2], INSN_STW);
> + break;
> +
> + case INDEX_op_add_i32:
> + if (const_args[2]) {
> + tcg_out_addi2(s, args[0], args[1], args[2]);
> + } else {
> + tcg_out_arith(s, args[0], args[1], args[2], INSN_ADDL);
> + }
> break;
>
> case INDEX_op_sub_i32:
> - c = ARITH_SUB;
> - goto gen_arith;
> + if (const_args[1]) {
> + if (const_args[2]) {
> + tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1] - args[2]);
> + } else {
> + /* Recall that SUBI is a reversed subtract. */
> + tcg_out_arithi(s, args[0], args[2], args[1], INSN_SUBI);
> + }
> + } else if (const_args[2]) {
> + tcg_out_addi2(s, args[0], args[1], -args[2]);
> + } else {
> + tcg_out_arith(s, args[0], args[1], args[2], INSN_SUB);
> + }
> + break;
> +
> case INDEX_op_and_i32:
> - c = ARITH_AND;
> - goto gen_arith;
> + if (const_args[2]) {
> + tcg_out_andi(s, args[0], args[1], args[2]);
> + } else {
> + tcg_out_arith(s, args[0], args[1], args[2], INSN_AND);
> + }
> + break;
> +
> case INDEX_op_or_i32:
> - c = ARITH_OR;
> - goto gen_arith;
> + if (const_args[2]) {
> + tcg_out_ori(s, args[0], args[1], args[2]);
> + } else {
> + tcg_out_arith(s, args[0], args[1], args[2], INSN_OR);
> + }
> + break;
> +
> case INDEX_op_xor_i32:
> - c = ARITH_XOR;
> - goto gen_arith;
> - case INDEX_op_add_i32:
> - c = ARITH_ADD;
> - goto gen_arith;
> + tcg_out_arith(s, args[0], args[1], args[2], INSN_XOR);
> + break;
> +
> + case INDEX_op_andc_i32:
> + if (const_args[2]) {
> + tcg_out_andi(s, args[0], args[1], ~args[2]);
> + } else {
> + tcg_out_arith(s, args[0], args[1], args[2], INSN_ANDCM);
> + }
> + break;
>
> case INDEX_op_shl_i32:
> - tcg_out32(s, SUBI | INSN_R1(TCG_REG_R20) | INSN_R2(args[2]) |
> - lowsignext(0x1f, 0, 11));
> - tcg_out32(s, MTCTL | INSN_R2(11) | INSN_R1(TCG_REG_R20));
> - tcg_out32(s, ZVDEP | INSN_R2(args[0]) | INSN_R1(args[1]) |
> - INSN_DEP_LEN(32));
> + if (const_args[2]) {
> + tcg_out_shli(s, args[0], args[1], args[2]);
> + } else {
> + tcg_out_shl(s, args[0], args[1], args[2]);
> + }
> break;
> +
> case INDEX_op_shr_i32:
> - tcg_out32(s, MTCTL | INSN_R2(11) | INSN_R1(args[2]));
> - tcg_out32(s, VSHD | INSN_T(args[0]) | INSN_R1(TCG_REG_R0) |
> - INSN_R2(args[1]));
> + if (const_args[2]) {
> + tcg_out_shri(s, args[0], args[1], args[2]);
> + } else {
> + tcg_out_shr(s, args[0], args[1], args[2]);
> + }
> break;
> +
> case INDEX_op_sar_i32:
> - tcg_out32(s, SUBI | INSN_R1(TCG_REG_R20) | INSN_R2(args[2]) |
> - lowsignext(0x1f, 0, 11));
> - tcg_out32(s, MTCTL | INSN_R2(11) | INSN_R1(TCG_REG_R20));
> - tcg_out32(s, VEXTRS | INSN_R1(args[0]) | INSN_R2(args[1]) |
> - INSN_DEP_LEN(32));
> + if (const_args[2]) {
> + tcg_out_sari(s, args[0], args[1], args[2]);
> + } else {
> + tcg_out_sar(s, args[0], args[1], args[2]);
> + }
> + break;
> +
> + case INDEX_op_rotl_i32:
> + if (const_args[2]) {
> + tcg_out_rotli(s, args[0], args[1], args[2]);
> + } else {
> + tcg_out_rotl(s, args[0], args[1], args[2]);
> + }
> + break;
> +
> + case INDEX_op_rotr_i32:
> + if (const_args[2]) {
> + tcg_out_rotri(s, args[0], args[1], args[2]);
> + } else {
> + tcg_out_rotr(s, args[0], args[1], args[2]);
> + }
> break;
>
> case INDEX_op_mul_i32:
> - fprintf(stderr, "unimplemented mul\n");
> - tcg_abort();
> + tcg_out_xmpyu(s, args[0], TCG_REG_R0, args[1], args[2]);
> break;
> case INDEX_op_mulu2_i32:
> - fprintf(stderr, "unimplemented mulu2\n");
> - tcg_abort();
> + tcg_out_xmpyu(s, args[0], args[1], args[2], args[3]);
> break;
> - case INDEX_op_div2_i32:
> - fprintf(stderr, "unimplemented div2\n");
> - tcg_abort();
> +
> + case INDEX_op_bswap16_i32:
> + tcg_out_bswap16(s, args[0], args[1], 0);
> break;
> - case INDEX_op_divu2_i32:
> - fprintf(stderr, "unimplemented divu2\n");
> - tcg_abort();
> + case INDEX_op_bswap32_i32:
> + tcg_out_bswap32(s, args[0], args[1], TCG_REG_R20);
> + break;
> +
> + case INDEX_op_not_i32:
> + tcg_out_arithi(s, args[0], args[1], -1, INSN_SUBI);
> + break;
> + case INDEX_op_ext8s_i32:
> + tcg_out_ext8s(s, args[0], args[1]);
> + break;
> + case INDEX_op_ext16s_i32:
> + tcg_out_ext16s(s, args[0], args[1]);
> + break;
> +
> + /* These three correspond exactly to the fallback implementation.
> + But by including them we reduce the number of TCG ops that
> + need to be generated, and these opcodes are fairly common. */
Are you sure it really makes a difference?
> + case INDEX_op_neg_i32:
> + tcg_out_arith(s, args[0], TCG_REG_R0, args[1], INSN_SUB);
> + break;
> + case INDEX_op_ext8u_i32:
> + tcg_out_andi(s, args[0], args[1], 0xff);
> + break;
> + case INDEX_op_ext16u_i32:
> + tcg_out_andi(s, args[0], args[1], 0xffff);
> break;
>
> case INDEX_op_brcond_i32:
> - fprintf(stderr, "unimplemented brcond\n");
> - tcg_abort();
> + tcg_out_brcond(s, args[2], args[0], args[1], const_args[1], args[3]);
> + break;
> + case INDEX_op_brcond2_i32:
> + tcg_out_brcond2(s, args[4], args[0], args[1],
> + args[2], const_args[2],
> + args[3], const_args[3], args[5]);
> + break;
> +
> + case INDEX_op_setcond_i32:
> + tcg_out_setcond(s, args[3], args[0], args[1], args[2],
> const_args[2]);
> + break;
> + case INDEX_op_setcond2_i32:
> + tcg_out_setcond2(s, args[5], args[0], args[1], args[2],
> + args[3], const_args[3], args[4], const_args[4]);
> + break;
> +
> + case INDEX_op_add2_i32:
> + if (const_args[4]) {
> + tcg_out_arithi(s, args[0], args[2], args[4], INSN_ADDI);
> + } else {
> + tcg_out_arith(s, args[0], args[2], args[4], INSN_ADD);
> + }
> + tcg_out_arith(s, args[1], args[3], args[5], INSN_ADDC);
> + break;
> +
> + case INDEX_op_sub2_i32:
> + if (const_args[2]) {
> + /* Recall that SUBI is a reversed subtract. */
> + tcg_out_arithi(s, args[0], args[4], args[2], INSN_SUBI);
> + } else {
> + tcg_out_arith(s, args[0], args[2], args[4], INSN_SUB);
> + }
> + tcg_out_arith(s, args[1], args[3], args[5], INSN_SUBB);
> break;
>
> case INDEX_op_qemu_ld8u:
> @@ -866,6 +1460,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode
> opc, const TCGArg *args,
> case INDEX_op_qemu_ld32:
> tcg_out_qemu_ld(s, args, 2);
> break;
> + case INDEX_op_qemu_ld64:
> + tcg_out_qemu_ld(s, args, 3);
> + break;
>
> case INDEX_op_qemu_st8:
> tcg_out_qemu_st(s, args, 0);
> @@ -876,47 +1473,70 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode
> opc, const TCGArg *args,
> case INDEX_op_qemu_st32:
> tcg_out_qemu_st(s, args, 2);
> break;
> + case INDEX_op_qemu_st64:
> + tcg_out_qemu_st(s, args, 3);
> + break;
>
> default:
> fprintf(stderr, "unknown opcode 0x%x\n", opc);
> tcg_abort();
> }
> - return;
> -
> -gen_arith:
> - tcg_out_arith(s, args[0], args[1], args[2], c);
> }
>
> static const TCGTargetOpDef hppa_op_defs[] = {
> { INDEX_op_exit_tb, { } },
> { INDEX_op_goto_tb, { } },
>
> - { INDEX_op_call, { "r" } },
> + { INDEX_op_call, { "ri" } },
> { INDEX_op_jmp, { "r" } },
> { INDEX_op_br, { } },
>
> { INDEX_op_mov_i32, { "r", "r" } },
> { INDEX_op_movi_i32, { "r" } },
> +
> { INDEX_op_ld8u_i32, { "r", "r" } },
> { INDEX_op_ld8s_i32, { "r", "r" } },
> { INDEX_op_ld16u_i32, { "r", "r" } },
> { INDEX_op_ld16s_i32, { "r", "r" } },
> { INDEX_op_ld_i32, { "r", "r" } },
> - { INDEX_op_st8_i32, { "r", "r" } },
> - { INDEX_op_st16_i32, { "r", "r" } },
> - { INDEX_op_st_i32, { "r", "r" } },
> + { INDEX_op_st8_i32, { "rZ", "r" } },
> + { INDEX_op_st16_i32, { "rZ", "r" } },
> + { INDEX_op_st_i32, { "rZ", "r" } },
> +
> + { INDEX_op_add_i32, { "r", "rZ", "ri" } },
> + { INDEX_op_sub_i32, { "r", "rI", "ri" } },
> + { INDEX_op_and_i32, { "r", "rZ", "ri" } },
> + { INDEX_op_or_i32, { "r", "rZ", "ri" } },
Already commented for "and" and "or", but the same apply for add and
sub. Do we really need a "i" contraints here if the constant is going
to be loaded with a movi.
> + { INDEX_op_xor_i32, { "r", "rZ", "rZ" } },
> + { INDEX_op_andc_i32, { "r", "rZ", "ri" } },
same here.
> +
> + { INDEX_op_mul_i32, { "r", "r", "r" } },
> + { INDEX_op_mulu2_i32, { "r", "r", "r", "r" } },
>
> - { INDEX_op_add_i32, { "r", "r", "r" } },
> - { INDEX_op_sub_i32, { "r", "r", "r" } },
> - { INDEX_op_and_i32, { "r", "r", "r" } },
> - { INDEX_op_or_i32, { "r", "r", "r" } },
> - { INDEX_op_xor_i32, { "r", "r", "r" } },
> + { INDEX_op_shl_i32, { "r", "r", "ri" } },
> + { INDEX_op_shr_i32, { "r", "r", "ri" } },
> + { INDEX_op_sar_i32, { "r", "r", "ri" } },
> + { INDEX_op_rotl_i32, { "r", "r", "ri" } },
> + { INDEX_op_rotr_i32, { "r", "r", "ri" } },
>
> - { INDEX_op_shl_i32, { "r", "r", "r" } },
> - { INDEX_op_shr_i32, { "r", "r", "r" } },
> - { INDEX_op_sar_i32, { "r", "r", "r" } },
> + { INDEX_op_bswap16_i32, { "r", "r" } },
> + { INDEX_op_bswap32_i32, { "r", "r" } },
> + { INDEX_op_neg_i32, { "r", "r" } },
> + { INDEX_op_not_i32, { "r", "r" } },
>
> - { INDEX_op_brcond_i32, { "r", "r" } },
> + { INDEX_op_ext8s_i32, { "r", "r" } },
> + { INDEX_op_ext8u_i32, { "r", "r" } },
> + { INDEX_op_ext16s_i32, { "r", "r" } },
> + { INDEX_op_ext16u_i32, { "r", "r" } },
> +
> + { INDEX_op_brcond_i32, { "rZ", "rJ" } },
> + { INDEX_op_brcond2_i32, { "rZ", "rZ", "rJ", "rJ" } },
> +
> + { INDEX_op_setcond_i32, { "r", "rZ", "rI" } },
> + { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rI", "rI" } },
> +
> + { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rI", "rZ" } },
> + { INDEX_op_sub2_i32, { "r", "r", "rI", "rZ", "rZ", "rZ" } },
>
> #if TARGET_LONG_BITS == 32
> { INDEX_op_qemu_ld8u, { "r", "L" } },
> @@ -926,10 +1546,10 @@ static const TCGTargetOpDef hppa_op_defs[] = {
> { INDEX_op_qemu_ld32, { "r", "L" } },
> { INDEX_op_qemu_ld64, { "r", "r", "L" } },
>
> - { INDEX_op_qemu_st8, { "L", "L" } },
> - { INDEX_op_qemu_st16, { "L", "L" } },
> - { INDEX_op_qemu_st32, { "L", "L" } },
> - { INDEX_op_qemu_st64, { "L", "L", "L" } },
> + { INDEX_op_qemu_st8, { "LZ", "L" } },
> + { INDEX_op_qemu_st16, { "LZ", "L" } },
> + { INDEX_op_qemu_st32, { "LZ", "L" } },
> + { INDEX_op_qemu_st64, { "LZ", "LZ", "L" } },
> #else
> { INDEX_op_qemu_ld8u, { "r", "L", "L" } },
> { INDEX_op_qemu_ld8s, { "r", "L", "L" } },
> @@ -938,25 +1558,98 @@ static const TCGTargetOpDef hppa_op_defs[] = {
> { INDEX_op_qemu_ld32, { "r", "L", "L" } },
> { INDEX_op_qemu_ld64, { "r", "r", "L", "L" } },
>
> - { INDEX_op_qemu_st8, { "L", "L", "L" } },
> - { INDEX_op_qemu_st16, { "L", "L", "L" } },
> - { INDEX_op_qemu_st32, { "L", "L", "L" } },
> - { INDEX_op_qemu_st64, { "L", "L", "L", "L" } },
> + { INDEX_op_qemu_st8, { "LZ", "L", "L" } },
> + { INDEX_op_qemu_st16, { "LZ", "L", "L" } },
> + { INDEX_op_qemu_st32, { "LZ", "L", "L" } },
> + { INDEX_op_qemu_st64, { "LZ", "LZ", "L", "L" } },
> #endif
> { -1 },
> };
>
> +static int tcg_target_callee_save_regs[] = {
> + /* R2, the return address register, is saved specially
> + in the caller's frame. */
> + /* R3, the frame pointer, is not currently modified. */
> + TCG_REG_R4,
> + TCG_REG_R5,
> + TCG_REG_R6,
> + TCG_REG_R7,
> + TCG_REG_R8,
> + TCG_REG_R9,
> + TCG_REG_R10,
> + TCG_REG_R11,
> + TCG_REG_R12,
> + TCG_REG_R13,
> + TCG_REG_R14,
> + TCG_REG_R15,
> + TCG_REG_R16,
> + /* R17 is the global env, so no need to save. */
> + TCG_REG_R18
> +};
> +
> +void tcg_target_qemu_prologue(TCGContext *s)
> +{
> + int frame_size, i;
> +
> + /* Allocate space for the fixed frame marker. */
> + frame_size = -TCG_TARGET_CALL_STACK_OFFSET;
> + frame_size += TCG_TARGET_STATIC_CALL_ARGS_SIZE;
> +
> + /* Allocate space for the saved registers. */
> + frame_size += ARRAY_SIZE(tcg_target_callee_save_regs) * 4;
> +
> + /* Align the allocated space. */
> + frame_size = ((frame_size + TCG_TARGET_STACK_ALIGN - 1)
> + & -TCG_TARGET_STACK_ALIGN);
> +
> + /* The return address is stored in the caller's frame. */
> + tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_RP, TCG_REG_SP, -20);
> +
> + /* Allocate stack frame, saving the first register at the same time. */
> + tcg_out_ldst(s, tcg_target_callee_save_regs[0],
> + TCG_REG_SP, frame_size, INSN_STWM);
> +
> + /* Save all callee saved registers. */
> + for (i = 1; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
> + tcg_out_st(s, TCG_TYPE_PTR, tcg_target_callee_save_regs[i],
> + TCG_REG_SP, -frame_size + i * 4);
> + }
> +
> + if (GUEST_BASE != 0) {
> + tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE);
> + }
The final GUEST_BASE value is computed after the prologue has been
generated. The value is modified in two cases:
- The user specify a non-aligned base address.
- /proc/sys/vm/mmap_min_addr is different than 0, which is now the
in default configuration for more than one year.
When it happens, the guest crashes almost immediately.
> + /* Jump to TB, and adjust R18 to be the return address. */
> + tcg_out32(s, INSN_BLE_SR4 | INSN_R2(TCG_REG_R26));
> + tcg_out_mov(s, TCG_REG_R18, TCG_REG_R31);
> +
> + /* Restore callee saved registers. */
> + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_RP, TCG_REG_SP, -frame_size - 20);
> + for (i = 1; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
> + tcg_out_ld(s, TCG_TYPE_PTR, tcg_target_callee_save_regs[i],
> + TCG_REG_SP, -frame_size + i * 4);
> + }
> +
> + /* Deallocate stack frame and return. */
> + tcg_out32(s, INSN_BV | INSN_R2(TCG_REG_RP));
> + tcg_out_ldst(s, tcg_target_callee_save_regs[0],
> + TCG_REG_SP, -frame_size, INSN_LDWM);
> +}
> +
> void tcg_target_init(TCGContext *s)
> {
> tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
> - tcg_regset_set32(tcg_target_call_clobber_regs, 0,
> - (1 << TCG_REG_R20) |
> - (1 << TCG_REG_R21) |
> - (1 << TCG_REG_R22) |
> - (1 << TCG_REG_R23) |
> - (1 << TCG_REG_R24) |
> - (1 << TCG_REG_R25) |
> - (1 << TCG_REG_R26));
> +
> + tcg_regset_clear(tcg_target_call_clobber_regs);
> + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R20);
> + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R21);
> + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R22);
> + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R23);
> + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R24);
> + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R25);
> + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R26);
> + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RET0);
> + tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_RET1);
>
> tcg_regset_clear(s->reserved_regs);
> tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0); /* hardwired to zero
> */
> @@ -969,6 +1662,9 @@ void tcg_target_init(TCGContext *s)
> tcg_regset_set_reg(s->reserved_regs, TCG_REG_DP); /* data pointer */
> tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); /* stack pointer */
> tcg_regset_set_reg(s->reserved_regs, TCG_REG_R31); /* ble link reg */
> + if (GUEST_BASE != 0) {
> + tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
> + }
>
> tcg_add_target_add_op_defs(hppa_op_defs);
> }
> diff --git a/tcg/hppa/tcg-target.h b/tcg/hppa/tcg-target.h
> index e956e71..36b6949 100644
> --- a/tcg/hppa/tcg-target.h
> +++ b/tcg/hppa/tcg-target.h
> @@ -69,17 +69,33 @@ enum {
> TCG_REG_R31,
> };
>
> +#define TCG_CT_CONST_0 0x0100
> +#define TCG_CT_CONST_S5 0x0200
> +#define TCG_CT_CONST_S11 0x0400
> +
> /* used for function call generation */
> #define TCG_REG_CALL_STACK TCG_REG_SP
> -#define TCG_TARGET_STACK_ALIGN 16
> +#define TCG_TARGET_STACK_ALIGN 64
> +#define TCG_TARGET_CALL_STACK_OFFSET -48
> +#define TCG_TARGET_STATIC_CALL_ARGS_SIZE 8*4
> +#define TCG_TARGET_CALL_ALIGN_ARGS 1
> #define TCG_TARGET_STACK_GROWSUP
>
> /* optional instructions */
> -#define TCG_TARGET_HAS_div2_i32
> -//#define TCG_TARGET_HAS_ext8s_i32
> -//#define TCG_TARGET_HAS_ext16s_i32
> -//#define TCG_TARGET_HAS_bswap16_i32
> -//#define TCG_TARGET_HAS_bswap32_i32
> +// #define TCG_TARGET_HAS_div_i32
> +#define TCG_TARGET_HAS_rot_i32
> +#define TCG_TARGET_HAS_ext8s_i32
> +#define TCG_TARGET_HAS_ext16s_i32
> +#define TCG_TARGET_HAS_ext8u_i32
> +#define TCG_TARGET_HAS_ext16u_i32
> +#define TCG_TARGET_HAS_bswap16_i32
> +#define TCG_TARGET_HAS_bswap32_i32
> +#define TCG_TARGET_HAS_not_i32
> +#define TCG_TARGET_HAS_neg_i32
> +#define TCG_TARGET_HAS_andc_i32
> +// #define TCG_TARGET_HAS_orc_i32
> +
> +#define TCG_TARGET_HAS_GUEST_BASE
>
> /* Note: must be synced with dyngen-exec.h */
> #define TCG_AREG0 TCG_REG_R17
> @@ -87,116 +103,12 @@ enum {
> static inline void flush_icache_range(unsigned long start, unsigned long
> stop)
> {
> start &= ~31;
> - while (start <= stop)
> - {
> - asm volatile ("fdc 0(%0)\n"
> - "sync\n"
> - "fic 0(%%sr4, %0)\n"
> - "sync\n"
> + while (start <= stop) {
> + asm volatile ("fdc 0(%0)\n\t"
> + "sync\n\t"
> + "fic 0(%%sr4, %0)\n\t"
> + "sync"
> : : "r"(start) : "memory");
> start += 32;
> }
> }
> -
> -/* supplied by libgcc */
> -extern void *__canonicalize_funcptr_for_compare(void *);
> -
> -/* Field selection types defined by hppa */
> -#define rnd(x) (((x)+0x1000)&~0x1fff)
> -/* lsel: select left 21 bits */
> -#define lsel(v,a) (((v)+(a))>>11)
> -/* rsel: select right 11 bits */
> -#define rsel(v,a) (((v)+(a))&0x7ff)
> -/* lrsel with rounding of addend to nearest 8k */
> -#define lrsel(v,a) (((v)+rnd(a))>>11)
> -/* rrsel with rounding of addend to nearest 8k */
> -#define rrsel(v,a) ((((v)+rnd(a))&0x7ff)+((a)-rnd(a)))
> -
> -#define mask(x,sz) ((x) & ~((1<<(sz))-1))
> -
> -static inline int reassemble_12(int as12)
> -{
> - return (((as12 & 0x800) >> 11) |
> - ((as12 & 0x400) >> 8) |
> - ((as12 & 0x3ff) << 3));
> -}
> -
> -static inline int reassemble_14(int as14)
> -{
> - return (((as14 & 0x1fff) << 1) |
> - ((as14 & 0x2000) >> 13));
> -}
> -
> -static inline int reassemble_17(int as17)
> -{
> - return (((as17 & 0x10000) >> 16) |
> - ((as17 & 0x0f800) << 5) |
> - ((as17 & 0x00400) >> 8) |
> - ((as17 & 0x003ff) << 3));
> -}
> -
> -static inline int reassemble_21(int as21)
> -{
> - return (((as21 & 0x100000) >> 20) |
> - ((as21 & 0x0ffe00) >> 8) |
> - ((as21 & 0x000180) << 7) |
> - ((as21 & 0x00007c) << 14) |
> - ((as21 & 0x000003) << 12));
> -}
> -
> -static inline void hppa_patch21l(uint32_t *insn, int val, int addend)
> -{
> - val = lrsel(val, addend);
> - *insn = mask(*insn, 21) | reassemble_21(val);
> -}
> -
> -static inline void hppa_patch14r(uint32_t *insn, int val, int addend)
> -{
> - val = rrsel(val, addend);
> - *insn = mask(*insn, 14) | reassemble_14(val);
> -}
> -
> -static inline void hppa_patch17r(uint32_t *insn, int val, int addend)
> -{
> - val = rrsel(val, addend);
> - *insn = (*insn & ~0x1f1ffd) | reassemble_17(val);
> -}
> -
> -
> -static inline void hppa_patch21l_dprel(uint32_t *insn, int val, int addend)
> -{
> - register unsigned int dp asm("r27");
> - hppa_patch21l(insn, val - dp, addend);
> -}
> -
> -static inline void hppa_patch14r_dprel(uint32_t *insn, int val, int addend)
> -{
> - register unsigned int dp asm("r27");
> - hppa_patch14r(insn, val - dp, addend);
> -}
> -
> -static inline void hppa_patch17f(uint32_t *insn, int val, int addend)
> -{
> - int dot = (int)insn & ~0x3;
> - int v = ((val + addend) - dot - 8) / 4;
> - if (v > (1 << 16) || v < -(1 << 16)) {
> - printf("cannot fit branch to offset %d [%08x->%08x]\n", v, dot, val);
> - abort();
> - }
> - *insn = (*insn & ~0x1f1ffd) | reassemble_17(v);
> -}
> -
> -static inline void hppa_load_imm21l(uint32_t *insn, int val, int addend)
> -{
> - /* Transform addil L'sym(%dp) to ldil L'val, %r1 */
> - *insn = 0x20200000 | reassemble_21(lrsel(val, 0));
> -}
> -
> -static inline void hppa_load_imm14r(uint32_t *insn, int val, int addend)
> -{
> - /* Transform ldw R'sym(%r1), %rN to ldo R'sym(%r1), %rN */
> - hppa_patch14r(insn, val, addend);
> - /* HACK */
> - if (addend == 0)
> - *insn = (*insn & ~0xfc000000) | (0x0d << 26);
> -}
> --
> 1.6.2.5
>
>
>
>
--
Aurelien Jarno GPG: 1024D/F1BCDB73
address@hidden http://www.aurel32.net
- Re: [Qemu-devel] [PATCH 0/2] tcg-hppa finish, v3, (continued)
- Re: [Qemu-devel] [PATCH 0/2] tcg-hppa finish, v3, Aurelien Jarno, 2010/04/07
- [Qemu-devel] [PATCH 1/4] tcg-hppa: Compute is_write in cpu_signal_handler., Richard Henderson, 2010/04/07
- [Qemu-devel] [PATCH 4/4] tcg-hppa: Don't try to calls to non-constant addresses., Richard Henderson, 2010/04/07
- [Qemu-devel] [PATCH 0/4] tcg-hppa finish, v4, Richard Henderson, 2010/04/07
- [Qemu-devel] [PATCH 3/4] tcg-hppa: Fix in/out register overlap in add2/sub2., Richard Henderson, 2010/04/07
- [Qemu-devel] [PATCH 2/4] tcg-hppa: Finish the port., Richard Henderson, 2010/04/07