aboutsummaryrefslogtreecommitdiff
path: root/tcg
diff options
context:
space:
mode:
Diffstat (limited to 'tcg')
-rw-r--r--tcg/README46
-rw-r--r--tcg/arm/tcg-target.c215
-rw-r--r--tcg/arm/tcg-target.h4
-rw-r--r--tcg/hppa/tcg-target.c233
-rw-r--r--tcg/hppa/tcg-target.h4
-rw-r--r--tcg/i386/tcg-target.c764
-rw-r--r--tcg/i386/tcg-target.h14
-rw-r--r--tcg/ia64/tcg-target.c236
-rw-r--r--tcg/ia64/tcg-target.h13
-rw-r--r--tcg/mips/tcg-target.c389
-rw-r--r--tcg/mips/tcg-target.h28
-rw-r--r--tcg/optimize.c688
-rw-r--r--tcg/ppc/tcg-target.c597
-rw-r--r--tcg/ppc/tcg-target.h3
-rw-r--r--tcg/ppc64/tcg-target.c46
-rw-r--r--tcg/ppc64/tcg-target.h3
-rw-r--r--tcg/s390/tcg-target.c55
-rw-r--r--tcg/s390/tcg-target.h5
-rw-r--r--tcg/sparc/tcg-target.c1581
-rw-r--r--tcg/sparc/tcg-target.h39
-rw-r--r--tcg/tcg-op.h683
-rw-r--r--tcg/tcg-opc.h34
-rw-r--r--tcg/tcg.c643
-rw-r--r--tcg/tcg.h150
-rw-r--r--tcg/tci/tcg-target.c48
-rw-r--r--tcg/tci/tcg-target.h9
26 files changed, 3926 insertions, 2604 deletions
diff --git a/tcg/README b/tcg/README
index cfdfd96..ec1ac79 100644
--- a/tcg/README
+++ b/tcg/README
@@ -77,19 +77,27 @@ destroyed, but local temporaries and globals are preserved.
Using the tcg_gen_helper_x_y it is possible to call any function
taking i32, i64 or pointer types. By default, before calling a helper,
all globals are stored at their canonical location and it is assumed
-that the function can modify them. This can be overridden by the
-TCG_CALL_CONST function modifier. By default, the helper is allowed to
-modify the CPU state or raise an exception. This can be overridden by
-the TCG_CALL_PURE function modifier, in which case the call to the
-function is removed if the return value is not used.
+that the function can modify them. By default, the helper is allowed to
+modify the CPU state or raise an exception.
+
+This can be overridden using the following function modifiers:
+- TCG_CALL_NO_READ_GLOBALS means that the helper does not read globals,
+ either directly or via an exception. They will not be saved to their
+ canonical locations before calling the helper.
+- TCG_CALL_NO_WRITE_GLOBALS means that the helper does not modify any globals.
+ They will only be saved to their canonical location before calling helpers,
+ but they won't be reloaded afterwise.
+- TCG_CALL_NO_SIDE_EFFECTS means that the call to the function is removed if
+ the return value is not used.
+
+Note that TCG_CALL_NO_READ_GLOBALS implies TCG_CALL_NO_WRITE_GLOBALS.
On some TCG targets (e.g. x86), several calling conventions are
supported.
* Branches:
-Use the instruction 'br' to jump to a label. Use 'jmp' to jump to an
-explicit address. Conditional branches can only jump to labels.
+Use the instruction 'br' to jump to a label.
3.3) Code Optimizations
@@ -129,10 +137,6 @@ call function 'ptr' (pointer type)
********* Jumps/Labels
-* jmp t0
-
-Absolute jump to address t0 (pointer type).
-
* set_label $label
Define label 'label' at the current program point.
@@ -141,7 +145,7 @@ Define label 'label' at the current program point.
Jump to label.
-* brcond_i32/i64 cond, t0, t1, label
+* brcond_i32/i64 t0, t1, cond, label
Conditional jump if t0 cond t1 is true. cond can be:
TCG_COND_EQ
@@ -301,12 +305,18 @@ This operation would be equivalent to
********* Conditional moves
-* setcond_i32/i64 cond, dest, t1, t2
+* setcond_i32/i64 dest, t1, t2, cond
dest = (t1 cond t2)
Set DEST to 1 if (T1 cond T2) is true, otherwise set to 0.
+* movcond_i32/i64 dest, c1, c2, v1, v2, cond
+
+dest = (c1 cond c2 ? v1 : v2)
+
+Set DEST to V1 if (C1 cond C2) is true, otherwise set to V2.
+
********* Type conversions
* ext_i32_i64 t0, t1
@@ -348,13 +358,16 @@ st32_i64 t0, t1, offset
write(t0, t1 + offset)
Write 8, 16, 32 or 64 bits to host memory.
+All this opcodes assume that the pointed host memory doesn't correspond
+to a global. In the latter case the behaviour is unpredictable.
+
********* 64-bit target on 32-bit host support
The following opcodes are internal to TCG. Thus they are to be implemented by
32-bit host code generators, but are not to be emitted by guest translators.
They are emitted as needed by inline functions within "tcg-op.h".
-* brcond2_i32 cond, t0_low, t0_high, t1_low, t1_high, label
+* brcond2_i32 t0_low, t0_high, t1_low, t1_high, cond, label
Similar to brcond, except that the 64-bit values T0 and T1
are formed from two 32-bit arguments.
@@ -371,7 +384,7 @@ is returned in two 32-bit outputs.
Similar to mul, except two 32-bit (unsigned) inputs T1 and T2 yielding
the full 64-bit product T0. The later is returned in two 32-bit outputs.
-* setcond2_i32 cond, dest, t1_low, t1_high, t2_low, t2_high
+* setcond2_i32 dest, t1_low, t1_high, t2_low, t2_high, cond
Similar to setcond, except that the 64-bit values T1 and T2 are
formed from two 32-bit arguments. The result is a 32-bit value.
@@ -386,7 +399,8 @@ Exit the current TB and return the value t0 (word type).
Exit the current TB and jump to the TB index 'index' (constant) if the
current TB was linked to this TB. Otherwise execute the next
-instructions.
+instructions. Only indices 0 and 1 are valid and tcg_gen_goto_tb may be issued
+at most once with each slot index per TB.
* qemu_ld8u t0, t1, flags
qemu_ld8s t0, t1, flags
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index cf0ca3d..47612fe 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -145,12 +145,6 @@ static void patch_reloc(uint8_t *code_ptr, int type,
}
}
-/* maximum number of register used for input function arguments */
-static inline int tcg_target_get_call_iarg_regs_count(int flags)
-{
- return 4;
-}
-
/* parse target specific constraints */
static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
{
@@ -176,7 +170,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
so don't use these. */
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
-#if defined(CONFIG_TCG_PASS_AREG0) && (TARGET_LONG_BITS == 64)
+#if TARGET_LONG_BITS == 64
/* If we're passing env to the helper as r0 and need a regpair
* for the address then r2 will be overwritten as we're setting
* up the args to the helper.
@@ -204,8 +198,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
use these. */
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
-#if defined(CONFIG_SOFTMMU) && \
- defined(CONFIG_TCG_PASS_AREG0) && (TARGET_LONG_BITS == 64)
+#if defined(CONFIG_SOFTMMU) && (TARGET_LONG_BITS == 64)
/* Avoid clashes with registers being used for helper args */
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
@@ -223,7 +216,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
#ifdef CONFIG_SOFTMMU
/* r2 is still needed to load data_reg, so don't use it. */
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
-#if defined(CONFIG_TCG_PASS_AREG0) && (TARGET_LONG_BITS == 64)
+#if TARGET_LONG_BITS == 64
/* Avoid clashes with registers being used for helper args */
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
#endif
@@ -342,7 +335,7 @@ enum arm_cond_code_e {
COND_AL = 0xe,
};
-static const uint8_t tcg_cond_to_arm_cond[10] = {
+static const uint8_t tcg_cond_to_arm_cond[] = {
[TCG_COND_EQ] = COND_EQ,
[TCG_COND_NE] = COND_NE,
[TCG_COND_LT] = COND_LT,
@@ -474,6 +467,21 @@ static inline void tcg_out_movi32(TCGContext *s,
}
}
+static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst,
+ TCGArg lhs, TCGArg rhs, int rhs_is_const)
+{
+ /* Emit either the reg,imm or reg,reg form of a data-processing insn.
+ * rhs must satisfy the "rI" constraint.
+ */
+ if (rhs_is_const) {
+ int rot = encode_imm(rhs);
+ assert(rot >= 0);
+ tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
+ } else {
+ tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
+ }
+}
+
static inline void tcg_out_mul32(TCGContext *s,
int cond, int rd, int rs, int rm)
{
@@ -603,6 +611,22 @@ static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn)
}
}
+/* swap the two low bytes assuming that the two high input bytes and the
+ two high output bit can hold any value. */
+static inline void tcg_out_bswap16st(TCGContext *s, int cond, int rd, int rn)
+{
+ if (use_armv6_instructions) {
+ /* rev16 */
+ tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
+ } else {
+ tcg_out_dat_reg(s, cond, ARITH_MOV,
+ TCG_REG_R8, 0, rn, SHIFT_IMM_LSR(8));
+ tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_R8, TCG_REG_R8, 0xff);
+ tcg_out_dat_reg(s, cond, ARITH_ORR,
+ rd, TCG_REG_R8, rn, SHIFT_IMM_LSL(8));
+ }
+}
+
static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn)
{
if (use_armv6_instructions) {
@@ -631,6 +655,22 @@ static inline void tcg_out_ld32_12(TCGContext *s, int cond,
(rn << 16) | (rd << 12) | ((-im) & 0xfff));
}
+/* Offset pre-increment with base writeback. */
+static inline void tcg_out_ld32_12wb(TCGContext *s, int cond,
+ int rd, int rn, tcg_target_long im)
+{
+ /* ldr with writeback and both register equals is UNPREDICTABLE */
+ assert(rd != rn);
+
+ if (im >= 0) {
+ tcg_out32(s, (cond << 28) | 0x05b00000 |
+ (rn << 16) | (rd << 12) | (im & 0xfff));
+ } else {
+ tcg_out32(s, (cond << 28) | 0x05300000 |
+ (rn << 16) | (rd << 12) | ((-im) & 0xfff));
+ }
+}
+
static inline void tcg_out_st32_12(TCGContext *s, int cond,
int rd, int rn, tcg_target_long im)
{
@@ -954,7 +994,6 @@ static inline void tcg_out_goto_label(TCGContext *s, int cond, int label_index)
#include "../../softmmu_defs.h"
-#ifdef CONFIG_TCG_PASS_AREG0
/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
int mmu_idx) */
static const void * const qemu_ld_helpers[4] = {
@@ -972,25 +1011,6 @@ static const void * const qemu_st_helpers[4] = {
helper_stl_mmu,
helper_stq_mmu,
};
-#else
-/* legacy helper signature: __ld_mmu(target_ulong addr, int
- mmu_idx) */
-static void *qemu_ld_helpers[4] = {
- __ldb_mmu,
- __ldw_mmu,
- __ldl_mmu,
- __ldq_mmu,
-};
-
-/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
- int mmu_idx) */
-static void *qemu_st_helpers[4] = {
- __stb_mmu,
- __stw_mmu,
- __stl_mmu,
- __stq_mmu,
-};
-#endif
/* Helper routines for marshalling helper function arguments into
* the correct registers and stack.
@@ -1083,7 +1103,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
{
int addr_reg, data_reg, data_reg2, bswap;
#ifdef CONFIG_SOFTMMU
- int mem_index, s_bits;
+ int mem_index, s_bits, tlb_offset;
TCGReg argreg;
# if TARGET_LONG_BITS == 64
int addr_reg2;
@@ -1123,19 +1143,15 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
TCG_REG_R0, TCG_REG_R8, CPU_TLB_SIZE - 1);
tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_AREG0,
TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
- /* In the
- * ldr r1 [r0, #(offsetof(CPUArchState, tlb_table[mem_index][0].addr_read))]
- * below, the offset is likely to exceed 12 bits if mem_index != 0 and
- * not exceed otherwise, so use an
- * add r0, r0, #(mem_index * sizeof *CPUArchState.tlb_table)
- * before.
- */
- if (mem_index)
+ /* We assume that the offset is contained within 20 bits. */
+ tlb_offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_read);
+ assert(tlb_offset & ~0xfffff == 0);
+ if (tlb_offset > 0xfff) {
tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_REG_R0,
- (mem_index << (TLB_SHIFT & 1)) |
- ((16 - (TLB_SHIFT >> 1)) << 8));
- tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R0,
- offsetof(CPUArchState, tlb_table[0][0].addr_read));
+ 0xa00 | (tlb_offset >> 12));
+ tlb_offset &= 0xfff;
+ }
+ tcg_out_ld32_12wb(s, COND_AL, TCG_REG_R1, TCG_REG_R0, tlb_offset);
tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R1,
TCG_REG_R8, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
/* Check alignment. */
@@ -1143,15 +1159,14 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
tcg_out_dat_imm(s, COND_EQ, ARITH_TST,
0, addr_reg, (1 << s_bits) - 1);
# if TARGET_LONG_BITS == 64
- /* XXX: possibly we could use a block data load or writeback in
- * the first access. */
- tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
- offsetof(CPUArchState, tlb_table[0][0].addr_read) + 4);
+ /* XXX: possibly we could use a block data load in the first access. */
+ tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, 4);
tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
TCG_REG_R1, addr_reg2, SHIFT_IMM_LSL(0));
# endif
tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
- offsetof(CPUArchState, tlb_table[0][0].addend));
+ offsetof(CPUTLBEntry, addend)
+ - offsetof(CPUTLBEntry, addr_read));
switch (opc) {
case 0:
@@ -1203,9 +1218,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
* trash by moving the earlier arguments into them.
*/
argreg = TCG_REG_R0;
-#ifdef CONFIG_TCG_PASS_AREG0
argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
-#endif
#if TARGET_LONG_BITS == 64
argreg = tcg_out_arg_reg64(s, argreg, addr_reg, addr_reg2);
#else
@@ -1226,20 +1239,11 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
case 1:
case 2:
default:
- if (data_reg != TCG_REG_R0) {
- tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
- data_reg, 0, TCG_REG_R0, SHIFT_IMM_LSL(0));
- }
+ tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_R0);
break;
case 3:
- if (data_reg != TCG_REG_R0) {
- tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
- data_reg, 0, TCG_REG_R0, SHIFT_IMM_LSL(0));
- }
- if (data_reg2 != TCG_REG_R1) {
- tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
- data_reg2, 0, TCG_REG_R1, SHIFT_IMM_LSL(0));
- }
+ tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_R0);
+ tcg_out_mov_reg(s, COND_AL, data_reg2, TCG_REG_R1);
break;
}
@@ -1311,7 +1315,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
{
int addr_reg, data_reg, data_reg2, bswap;
#ifdef CONFIG_SOFTMMU
- int mem_index, s_bits;
+ int mem_index, s_bits, tlb_offset;
TCGReg argreg;
# if TARGET_LONG_BITS == 64
int addr_reg2;
@@ -1348,19 +1352,15 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
TCG_REG_R0, TCG_REG_R8, CPU_TLB_SIZE - 1);
tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R0,
TCG_AREG0, TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
- /* In the
- * ldr r1 [r0, #(offsetof(CPUArchState, tlb_table[mem_index][0].addr_write))]
- * below, the offset is likely to exceed 12 bits if mem_index != 0 and
- * not exceed otherwise, so use an
- * add r0, r0, #(mem_index * sizeof *CPUArchState.tlb_table)
- * before.
- */
- if (mem_index)
+ /* We assume that the offset is contained within 20 bits. */
+ tlb_offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
+ assert(tlb_offset & ~0xfffff == 0);
+ if (tlb_offset > 0xfff) {
tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_REG_R0,
- (mem_index << (TLB_SHIFT & 1)) |
- ((16 - (TLB_SHIFT >> 1)) << 8));
- tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R0,
- offsetof(CPUArchState, tlb_table[0][0].addr_write));
+ 0xa00 | (tlb_offset >> 12));
+ tlb_offset &= 0xfff;
+ }
+ tcg_out_ld32_12wb(s, COND_AL, TCG_REG_R1, TCG_REG_R0, tlb_offset);
tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R1,
TCG_REG_R8, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
/* Check alignment. */
@@ -1368,15 +1368,14 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
tcg_out_dat_imm(s, COND_EQ, ARITH_TST,
0, addr_reg, (1 << s_bits) - 1);
# if TARGET_LONG_BITS == 64
- /* XXX: possibly we could use a block data load or writeback in
- * the first access. */
- tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
- offsetof(CPUArchState, tlb_table[0][0].addr_write) + 4);
+ /* XXX: possibly we could use a block data load in the first access. */
+ tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, 4);
tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
TCG_REG_R1, addr_reg2, SHIFT_IMM_LSL(0));
# endif
tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
- offsetof(CPUArchState, tlb_table[0][0].addend));
+ offsetof(CPUTLBEntry, addend)
+ - offsetof(CPUTLBEntry, addr_write));
switch (opc) {
case 0:
@@ -1384,7 +1383,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
break;
case 1:
if (bswap) {
- tcg_out_bswap16(s, COND_EQ, TCG_REG_R0, data_reg);
+ tcg_out_bswap16st(s, COND_EQ, TCG_REG_R0, data_reg);
tcg_out_st16_r(s, COND_EQ, TCG_REG_R0, addr_reg, TCG_REG_R1);
} else {
tcg_out_st16_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1);
@@ -1421,9 +1420,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
* trash by moving the earlier arguments into them.
*/
argreg = TCG_REG_R0;
-#ifdef CONFIG_TCG_PASS_AREG0
argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
-#endif
#if TARGET_LONG_BITS == 64
argreg = tcg_out_arg_reg64(s, argreg, addr_reg, addr_reg2);
#else
@@ -1472,7 +1469,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
break;
case 1:
if (bswap) {
- tcg_out_bswap16(s, COND_AL, TCG_REG_R0, data_reg);
+ tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, data_reg);
tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addr_reg, 0);
} else {
tcg_out_st16_8(s, COND_AL, data_reg, addr_reg, 0);
@@ -1561,12 +1558,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
else
tcg_out_callr(s, COND_AL, args[0]);
break;
- case INDEX_op_jmp:
- if (const_args[0])
- tcg_out_goto(s, COND_AL, args[0]);
- else
- tcg_out_bx(s, COND_AL, args[0]);
- break;
case INDEX_op_br:
tcg_out_goto_label(s, COND_AL, args[0]);
break;
@@ -1603,6 +1594,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_movi_i32:
tcg_out_movi32(s, COND_AL, args[0], args[1]);
break;
+ case INDEX_op_movcond_i32:
+ /* Constraints mean that v2 is always in the same register as dest,
+ * so we only need to do "if condition passed, move v1 to dest".
+ */
+ tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0,
+ args[1], args[2], const_args[2]);
+ tcg_out_dat_rI(s, tcg_cond_to_arm_cond[args[5]],
+ ARITH_MOV, args[0], 0, args[3], const_args[3]);
+ break;
case INDEX_op_add_i32:
c = ARITH_ADD;
goto gen_arith;
@@ -1622,14 +1622,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
c = ARITH_EOR;
/* Fall through. */
gen_arith:
- if (const_args[2]) {
- int rot;
- rot = encode_imm(args[2]);
- tcg_out_dat_imm(s, COND_AL, c,
- args[0], args[1], rotl(args[2], rot) | (rot << 7));
- } else
- tcg_out_dat_reg(s, COND_AL, c,
- args[0], args[1], args[2], SHIFT_IMM_LSL(0));
+ tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]);
break;
case INDEX_op_add2_i32:
tcg_out_dat_reg2(s, COND_AL, ARITH_ADD, ARITH_ADC,
@@ -1689,15 +1682,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_brcond_i32:
- if (const_args[1]) {
- int rot;
- rot = encode_imm(args[1]);
- tcg_out_dat_imm(s, COND_AL, ARITH_CMP, 0,
- args[0], rotl(args[1], rot) | (rot << 7));
- } else {
- tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0,
- args[0], args[1], SHIFT_IMM_LSL(0));
- }
+ tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0,
+ args[0], args[1], const_args[1]);
tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]], args[3]);
break;
case INDEX_op_brcond2_i32:
@@ -1716,15 +1702,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]], args[5]);
break;
case INDEX_op_setcond_i32:
- if (const_args[2]) {
- int rot;
- rot = encode_imm(args[2]);
- tcg_out_dat_imm(s, COND_AL, ARITH_CMP, 0,
- args[1], rotl(args[2], rot) | (rot << 7));
- } else {
- tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0,
- args[1], args[2], SHIFT_IMM_LSL(0));
- }
+ tcg_out_dat_rI(s, COND_AL, ARITH_CMP, 0,
+ args[1], args[2], const_args[2]);
tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]],
ARITH_MOV, args[0], 0, 1);
tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
@@ -1800,7 +1779,6 @@ static const TCGTargetOpDef arm_op_defs[] = {
{ INDEX_op_exit_tb, { } },
{ INDEX_op_goto_tb, { } },
{ INDEX_op_call, { "ri" } },
- { INDEX_op_jmp, { "ri" } },
{ INDEX_op_br, { } },
{ INDEX_op_mov_i32, { "r", "r" } },
@@ -1835,6 +1813,7 @@ static const TCGTargetOpDef arm_op_defs[] = {
{ INDEX_op_brcond_i32, { "r", "rI" } },
{ INDEX_op_setcond_i32, { "r", "r", "rI" } },
+ { INDEX_op_movcond_i32, { "r", "r", "rI", "rI", "0" } },
/* TODO: "r", "r", "r", "r", "ri", "ri" */
{ INDEX_op_add2_i32, { "r", "r", "r", "r", "r", "r" } },
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index f90b834..98fa11b 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -73,11 +73,9 @@ typedef enum {
#define TCG_TARGET_HAS_nand_i32 0
#define TCG_TARGET_HAS_nor_i32 0
#define TCG_TARGET_HAS_deposit_i32 0
-
-#define TCG_TARGET_HAS_GUEST_BASE
+#define TCG_TARGET_HAS_movcond_i32 1
enum {
- /* Note: must be synced with dyngen-exec.h */
TCG_AREG0 = TCG_REG_R6,
};
diff --git a/tcg/hppa/tcg-target.c b/tcg/hppa/tcg-target.c
index 2885212..de500ae 100644
--- a/tcg/hppa/tcg-target.c
+++ b/tcg/hppa/tcg-target.c
@@ -175,12 +175,6 @@ static void patch_reloc(uint8_t *code_ptr, int type,
*insn_ptr = insn;
}
-/* maximum number of register used for input function arguments */
-static inline int tcg_target_get_call_iarg_regs_count(int flags)
-{
- return 4;
-}
-
/* parse target specific constraints */
static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
{
@@ -738,7 +732,7 @@ static void tcg_out_branch(TCGContext *s, int label_index, int nul)
}
}
-static const uint8_t tcg_cond_to_cmp_cond[10] =
+static const uint8_t tcg_cond_to_cmp_cond[] =
{
[TCG_COND_EQ] = COND_EQ,
[TCG_COND_NE] = COND_EQ | COND_FALSE,
@@ -826,13 +820,15 @@ static void tcg_out_brcond2(TCGContext *s, int cond, TCGArg al, TCGArg ah,
{
switch (cond) {
case TCG_COND_EQ:
+ tcg_out_comclr(s, TCG_COND_NE, TCG_REG_R0, al, bl, blconst);
+ tcg_out_brcond(s, TCG_COND_EQ, ah, bh, bhconst, label_index);
+ break;
case TCG_COND_NE:
- tcg_out_comclr(s, tcg_invert_cond(cond), TCG_REG_R0, al, bl, blconst);
- tcg_out_brcond(s, cond, ah, bh, bhconst, label_index);
+ tcg_out_brcond(s, TCG_COND_NE, al, bl, bhconst, label_index);
+ tcg_out_brcond(s, TCG_COND_NE, ah, bh, bhconst, label_index);
break;
-
default:
- tcg_out_brcond(s, cond, ah, bh, bhconst, label_index);
+ tcg_out_brcond(s, tcg_high_cond(cond), ah, bh, bhconst, label_index);
tcg_out_comclr(s, TCG_COND_NE, TCG_REG_R0, ah, bh, bhconst);
tcg_out_brcond(s, tcg_unsigned_cond(cond),
al, bl, blconst, label_index);
@@ -853,9 +849,8 @@ static void tcg_out_setcond2(TCGContext *s, int cond, TCGArg ret,
{
int scratch = TCG_REG_R20;
- if (ret != al && ret != ah
- && (blconst || ret != bl)
- && (bhconst || ret != bh)) {
+ /* Note that the low parts are fully consumed before scratch is set. */
+ if (ret != ah && (bhconst || ret != bh)) {
scratch = ret;
}
@@ -867,22 +862,52 @@ static void tcg_out_setcond2(TCGContext *s, int cond, TCGArg ret,
tcg_out_movi(s, TCG_TYPE_I32, scratch, cond == TCG_COND_NE);
break;
- default:
+ case TCG_COND_GE:
+ case TCG_COND_GEU:
+ case TCG_COND_LT:
+ case TCG_COND_LTU:
+ /* Optimize compares with low part zero. */
+ if (bl == 0) {
+ tcg_out_setcond(s, cond, ret, ah, bh, bhconst);
+ return;
+ }
+ /* FALLTHRU */
+
+ case TCG_COND_LE:
+ case TCG_COND_LEU:
+ case TCG_COND_GT:
+ case TCG_COND_GTU:
+ /* <= : ah < bh | (ah == bh && al <= bl) */
tcg_out_setcond(s, tcg_unsigned_cond(cond), scratch, al, bl, blconst);
tcg_out_comclr(s, TCG_COND_EQ, TCG_REG_R0, ah, bh, bhconst);
tcg_out_movi(s, TCG_TYPE_I32, scratch, 0);
- tcg_out_comclr(s, cond, TCG_REG_R0, ah, bh, bhconst);
+ tcg_out_comclr(s, tcg_invert_cond(tcg_high_cond(cond)),
+ TCG_REG_R0, ah, bh, bhconst);
tcg_out_movi(s, TCG_TYPE_I32, scratch, 1);
break;
+
+ default:
+ tcg_abort();
}
tcg_out_mov(s, TCG_TYPE_I32, ret, scratch);
}
+static void tcg_out_movcond(TCGContext *s, int cond, TCGArg ret,
+ TCGArg c1, TCGArg c2, int c2const,
+ TCGArg v1, int v1const)
+{
+ tcg_out_comclr(s, tcg_invert_cond(cond), TCG_REG_R0, c1, c2, c2const);
+ if (v1const) {
+ tcg_out_movi(s, TCG_TYPE_I32, ret, v1);
+ } else {
+ tcg_out_mov(s, TCG_TYPE_I32, ret, v1);
+ }
+}
+
#if defined(CONFIG_SOFTMMU)
#include "../../softmmu_defs.h"
-#ifdef CONFIG_TCG_PASS_AREG0
/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
int mmu_idx) */
static const void * const qemu_ld_helpers[4] = {
@@ -900,25 +925,6 @@ static const void * const qemu_st_helpers[4] = {
helper_stl_mmu,
helper_stq_mmu,
};
-#else
-/* legacy helper signature: __ld_mmu(target_ulong addr, int
- mmu_idx) */
-static void *qemu_ld_helpers[4] = {
- __ldb_mmu,
- __ldw_mmu,
- __ldl_mmu,
- __ldq_mmu,
-};
-
-/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
- int mmu_idx) */
-static void *qemu_st_helpers[4] = {
- __stb_mmu,
- __stw_mmu,
- __stl_mmu,
- __stq_mmu,
-};
-#endif
/* Load and compare a TLB entry, and branch if TLB miss. OFFSET is set to
the offset of the first ADDR_READ or ADDR_WRITE member of the appropriate
@@ -963,10 +969,11 @@ static int tcg_out_tlb_read(TCGContext *s, int r0, int r1, int addrlo,
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, r1, offset);
}
- /* Compute the value that ought to appear in the TLB for a hit, namely, the page
- of the address. We include the low N bits of the address to catch unaligned
- accesses and force them onto the slow path. Do this computation after having
- issued the load from the TLB slot to give the load time to complete. */
+ /* Compute the value that ought to appear in the TLB for a hit, namely,
+ the page of the address. We include the low N bits of the address
+ to catch unaligned accesses and force them onto the slow path. Do
+ this computation after having issued the load from the TLB slot to
+ give the load time to complete. */
tcg_out_andi(s, r0, addrlo, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
/* If not equal, jump to lab_miss. */
@@ -979,6 +986,36 @@ static int tcg_out_tlb_read(TCGContext *s, int r0, int r1, int addrlo,
return ret;
}
+
+static int tcg_out_arg_reg32(TCGContext *s, int argno, TCGArg v, bool vconst)
+{
+ if (argno < 4) {
+ if (vconst) {
+ tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[argno], v);
+ } else {
+ tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[argno], v);
+ }
+ } else {
+ if (vconst && v != 0) {
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R20, v);
+ v = TCG_REG_R20;
+ }
+ tcg_out_st(s, TCG_TYPE_I32, v, TCG_REG_CALL_STACK,
+ TCG_TARGET_CALL_STACK_OFFSET - ((argno - 3) * 4));
+ }
+ return argno + 1;
+}
+
+static int tcg_out_arg_reg64(TCGContext *s, int argno, TCGArg vl, TCGArg vh)
+{
+ /* 64-bit arguments must go in even reg pairs and stack slots. */
+ if (argno & 1) {
+ argno++;
+ }
+ argno = tcg_out_arg_reg32(s, argno, vl, false);
+ argno = tcg_out_arg_reg32(s, argno, vh, false);
+ return argno;
+}
#endif
static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo_reg, int datahi_reg,
@@ -1059,41 +1096,36 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
/* Note that addrhi_reg is only used for 64-bit guests. */
int addrhi_reg = (TARGET_LONG_BITS == 64 ? *args++ : TCG_REG_R0);
int mem_index = *args;
- int lab1, lab2, argreg, offset;
+ int lab1, lab2, argno, offset;
lab1 = gen_new_label();
lab2 = gen_new_label();
offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_read);
- offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg, addrhi_reg,
- opc & 3, lab1, offset);
+ offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg,
+ addrhi_reg, opc & 3, lab1, offset);
/* TLB Hit. */
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, (offset ? TCG_REG_R1 : TCG_REG_R25),
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20,
+ (offset ? TCG_REG_R1 : TCG_REG_R25),
offsetof(CPUArchState, tlb_table[mem_index][0].addend) - offset);
- tcg_out_qemu_ld_direct(s, datalo_reg, datahi_reg, addrlo_reg, TCG_REG_R20, opc);
+ tcg_out_qemu_ld_direct(s, datalo_reg, datahi_reg, addrlo_reg,
+ TCG_REG_R20, opc);
tcg_out_branch(s, lab2, 1);
/* TLB Miss. */
/* label1: */
tcg_out_label(s, lab1, s->code_ptr);
- argreg = TCG_REG_R26;
- tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrlo_reg);
+ argno = 0;
+ argno = tcg_out_arg_reg32(s, argno, TCG_AREG0, false);
if (TARGET_LONG_BITS == 64) {
- tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrhi_reg);
+ argno = tcg_out_arg_reg64(s, argno, addrlo_reg, addrhi_reg);
+ } else {
+ argno = tcg_out_arg_reg32(s, argno, addrlo_reg, false);
}
- tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
-
-#ifdef CONFIG_TCG_PASS_AREG0
- /* XXX/FIXME: suboptimal */
- tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
- tcg_target_call_iarg_regs[1]);
- tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
- tcg_target_call_iarg_regs[0]);
- tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
- TCG_AREG0);
-#endif
+ argno = tcg_out_arg_reg32(s, argno, mem_index, true);
+
tcg_out_call(s, qemu_ld_helpers[opc & 3]);
switch (opc) {
@@ -1129,8 +1161,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
#endif
}
-static void tcg_out_qemu_st_direct(TCGContext *s, int datalo_reg, int datahi_reg,
- int addr_reg, int opc)
+static void tcg_out_qemu_st_direct(TCGContext *s, int datalo_reg,
+ int datahi_reg, int addr_reg, int opc)
{
#ifdef TARGET_WORDS_BIGENDIAN
const int bswap = 0;
@@ -1183,17 +1215,18 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
/* Note that addrhi_reg is only used for 64-bit guests. */
int addrhi_reg = (TARGET_LONG_BITS == 64 ? *args++ : TCG_REG_R0);
int mem_index = *args;
- int lab1, lab2, argreg, offset;
+ int lab1, lab2, argno, next, offset;
lab1 = gen_new_label();
lab2 = gen_new_label();
offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
- offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg, addrhi_reg,
- opc, lab1, offset);
+ offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg,
+ addrhi_reg, opc, lab1, offset);
/* TLB Hit. */
- tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, (offset ? TCG_REG_R1 : TCG_REG_R25),
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20,
+ (offset ? TCG_REG_R1 : TCG_REG_R25),
offsetof(CPUArchState, tlb_table[mem_index][0].addend) - offset);
/* There are no indexed stores, so we must do this addition explitly.
@@ -1206,65 +1239,46 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
/* label1: */
tcg_out_label(s, lab1, s->code_ptr);
- argreg = TCG_REG_R26;
- tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrlo_reg);
+ argno = 0;
+ argno = tcg_out_arg_reg32(s, argno, TCG_AREG0, false);
if (TARGET_LONG_BITS == 64) {
- tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrhi_reg);
+ argno = tcg_out_arg_reg64(s, argno, addrlo_reg, addrhi_reg);
+ } else {
+ argno = tcg_out_arg_reg32(s, argno, addrlo_reg, false);
}
+ next = (argno < 4 ? tcg_target_call_iarg_regs[argno] : TCG_REG_R20);
switch(opc) {
case 0:
- tcg_out_andi(s, argreg--, datalo_reg, 0xff);
- tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
+ tcg_out_andi(s, next, datalo_reg, 0xff);
+ argno = tcg_out_arg_reg32(s, argno, next, false);
break;
case 1:
- tcg_out_andi(s, argreg--, datalo_reg, 0xffff);
- tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
+ tcg_out_andi(s, next, datalo_reg, 0xffff);
+ argno = tcg_out_arg_reg32(s, argno, next, false);
break;
case 2:
- tcg_out_mov(s, TCG_TYPE_I32, argreg--, datalo_reg);
- tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
+ argno = tcg_out_arg_reg32(s, argno, datalo_reg, false);
break;
case 3:
- /* Because of the alignment required by the 64-bit data argument,
- we will always use R23/R24. Also, we will always run out of
- argument registers for storing mem_index, so that will have
- to go on the stack. */
- if (mem_index == 0) {
- argreg = TCG_REG_R0;
- } else {
- argreg = TCG_REG_R20;
- tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
- }
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R23, datahi_reg);
- tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R24, datalo_reg);
- tcg_out_st(s, TCG_TYPE_I32, argreg, TCG_REG_CALL_STACK,
- TCG_TARGET_CALL_STACK_OFFSET - 4);
+ argno = tcg_out_arg_reg64(s, argno, datalo_reg, datahi_reg);
break;
default:
tcg_abort();
}
+ argno = tcg_out_arg_reg32(s, argno, mem_index, true);
-#ifdef CONFIG_TCG_PASS_AREG0
- /* XXX/FIXME: suboptimal */
- tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
- tcg_target_call_iarg_regs[2]);
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
- tcg_target_call_iarg_regs[1]);
- tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
- tcg_target_call_iarg_regs[0]);
- tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
- TCG_AREG0);
-#endif
tcg_out_call(s, qemu_st_helpers[opc]);
/* label2: */
tcg_out_label(s, lab2, s->code_ptr);
#else
- /* There are no indexed stores, so if GUEST_BASE is set we must do the add
- explicitly. Careful to avoid R20, which is used for the bswaps to follow. */
+ /* There are no indexed stores, so if GUEST_BASE is set we must do
+ the add explicitly. Careful to avoid R20, which is used for the
+ bswaps to follow. */
if (GUEST_BASE != 0) {
- tcg_out_arith(s, TCG_REG_R31, addrlo_reg, TCG_GUEST_BASE_REG, INSN_ADDL);
+ tcg_out_arith(s, TCG_REG_R31, addrlo_reg,
+ TCG_GUEST_BASE_REG, INSN_ADDL);
addrlo_reg = TCG_REG_R31;
}
tcg_out_qemu_st_direct(s, datalo_reg, datahi_reg, addrlo_reg, opc);
@@ -1326,11 +1340,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
}
break;
- case INDEX_op_jmp:
- fprintf(stderr, "unimplemented jmp\n");
- tcg_abort();
- break;
-
case INDEX_op_br:
tcg_out_branch(s, args[0], 1);
break;
@@ -1499,6 +1508,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
args[3], const_args[3], args[4], const_args[4]);
break;
+ case INDEX_op_movcond_i32:
+ tcg_out_movcond(s, args[5], args[0], args[1], args[2], const_args[2],
+ args[3], const_args[3]);
+ break;
+
case INDEX_op_add2_i32:
tcg_out_add2(s, args[0], args[1], args[2], args[3],
args[4], args[5], const_args[4]);
@@ -1560,7 +1574,6 @@ static const TCGTargetOpDef hppa_op_defs[] = {
{ INDEX_op_goto_tb, { } },
{ INDEX_op_call, { "ri" } },
- { INDEX_op_jmp, { "r" } },
{ INDEX_op_br, { } },
{ INDEX_op_mov_i32, { "r", "r" } },
@@ -1607,6 +1620,10 @@ static const TCGTargetOpDef hppa_op_defs[] = {
{ INDEX_op_setcond_i32, { "r", "rZ", "rI" } },
{ INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rI", "rI" } },
+ /* ??? We can actually support a signed 14-bit arg3, but we
+ only have existing constraints for a signed 11-bit. */
+ { INDEX_op_movcond_i32, { "r", "rZ", "rI", "rI", "0" } },
+
{ INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rI", "rZ" } },
{ INDEX_op_sub2_i32, { "r", "r", "rI", "rZ", "rK", "rZ" } },
diff --git a/tcg/hppa/tcg-target.h b/tcg/hppa/tcg-target.h
index d4bf6fe..f43fb41 100644
--- a/tcg/hppa/tcg-target.h
+++ b/tcg/hppa/tcg-target.h
@@ -96,15 +96,13 @@ typedef enum {
#define TCG_TARGET_HAS_nand_i32 0
#define TCG_TARGET_HAS_nor_i32 0
#define TCG_TARGET_HAS_deposit_i32 1
+#define TCG_TARGET_HAS_movcond_i32 1
/* optional instructions automatically implemented */
#define TCG_TARGET_HAS_neg_i32 0 /* sub rd, 0, rs */
#define TCG_TARGET_HAS_ext8u_i32 0 /* and rd, rs, 0xff */
#define TCG_TARGET_HAS_ext16u_i32 0 /* and rd, rs, 0xffff */
-#define TCG_TARGET_HAS_GUEST_BASE
-
-/* Note: must be synced with dyngen-exec.h */
#define TCG_AREG0 TCG_REG_R17
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index da17bba..6f3ad3c 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -75,9 +75,7 @@ static const int tcg_target_call_iarg_regs[] = {
TCG_REG_R8,
TCG_REG_R9,
#else
- TCG_REG_EAX,
- TCG_REG_EDX,
- TCG_REG_ECX
+ /* 32 bit mode uses stack based calling convention (GCC default). */
#endif
};
@@ -88,6 +86,17 @@ static const int tcg_target_call_oarg_regs[] = {
#endif
};
+/* Registers used with L constraint, which are the first argument
+ registers on x86_64, and two random call clobbered registers on
+ i386. */
+#if TCG_TARGET_REG_BITS == 64
+# define TCG_REG_L0 tcg_target_call_iarg_regs[0]
+# define TCG_REG_L1 tcg_target_call_iarg_regs[1]
+#else
+# define TCG_REG_L0 TCG_REG_EAX
+# define TCG_REG_L1 TCG_REG_EDX
+#endif
+
static uint8_t *tb_ret_addr;
static void patch_reloc(uint8_t *code_ptr, int type,
@@ -114,16 +123,6 @@ static void patch_reloc(uint8_t *code_ptr, int type,
}
}
-/* maximum number of register used for input function arguments */
-static inline int tcg_target_get_call_iarg_regs_count(int flags)
-{
- if (TCG_TARGET_REG_BITS == 64) {
- return 6;
- }
-
- return 0;
-}
-
/* parse target specific constraints */
static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
{
@@ -179,18 +178,13 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
/* qemu_ld/st address constraint */
case 'L':
ct->ct |= TCG_CT_REG;
- if (TCG_TARGET_REG_BITS == 64) {
+#if TCG_TARGET_REG_BITS == 64
tcg_regset_set32(ct->u.regs, 0, 0xffff);
- tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[0]);
- tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[1]);
-#ifdef CONFIG_TCG_PASS_AREG0
- tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[2]);
-#endif
- } else {
+#else
tcg_regset_set32(ct->u.regs, 0, 0xff);
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX);
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_EDX);
- }
+#endif
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
break;
case 'e':
@@ -238,11 +232,13 @@ static inline int tcg_target_const_match(tcg_target_long val,
# define P_REXW 0x800 /* Set REX.W = 1 */
# define P_REXB_R 0x1000 /* REG field as byte register */
# define P_REXB_RM 0x2000 /* R/M field as byte register */
+# define P_GS 0x4000 /* gs segment override */
#else
# define P_ADDR32 0
# define P_REXW 0
# define P_REXB_R 0
# define P_REXB_RM 0
+# define P_GS 0
#endif
#define OPC_ARITH_EvIz (0x81)
@@ -251,6 +247,7 @@ static inline int tcg_target_const_match(tcg_target_long val,
#define OPC_ADD_GvEv (OPC_ARITH_GvEv | (ARITH_ADD << 3))
#define OPC_BSWAP (0xc8 | P_EXT)
#define OPC_CALL_Jz (0xe8)
+#define OPC_CMOVCC (0x40 | P_EXT) /* ... plus condition code */
#define OPC_CMP_GvEv (OPC_ARITH_GvEv | (ARITH_CMP << 3))
#define OPC_DEC_r32 (0x48)
#define OPC_IMUL_GvEv (0xaf | P_EXT)
@@ -265,6 +262,7 @@ static inline int tcg_target_const_match(tcg_target_long val,
#define OPC_MOVB_EvGv (0x88) /* stores, more or less */
#define OPC_MOVL_EvGv (0x89) /* stores, more or less */
#define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
+#define OPC_MOVB_EvIz (0xc6)
#define OPC_MOVL_EvIz (0xc7)
#define OPC_MOVL_Iv (0xb8)
#define OPC_MOVSBL (0xbe | P_EXT)
@@ -338,7 +336,7 @@ static inline int tcg_target_const_match(tcg_target_long val,
#define JCC_JLE 0xe
#define JCC_JG 0xf
-static const uint8_t tcg_cond_to_jcc[10] = {
+static const uint8_t tcg_cond_to_jcc[] = {
[TCG_COND_EQ] = JCC_JE,
[TCG_COND_NE] = JCC_JNE,
[TCG_COND_LT] = JCC_JL,
@@ -356,6 +354,9 @@ static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
{
int rex;
+ if (opc & P_GS) {
+ tcg_out8(s, 0x65);
+ }
if (opc & P_DATA16) {
/* We should never be asking for both 16 and 64-bit operation. */
assert((opc & P_REXW) == 0);
@@ -937,6 +938,24 @@ static void tcg_out_setcond2(TCGContext *s, const TCGArg *args,
}
#endif
+static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
+ TCGArg c1, TCGArg c2, int const_c2,
+ TCGArg v1)
+{
+ tcg_out_cmp(s, c1, c2, const_c2, 0);
+ tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
+}
+
+#if TCG_TARGET_REG_BITS == 64
+static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest,
+ TCGArg c1, TCGArg c2, int const_c2,
+ TCGArg v1)
+{
+ tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
+ tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1);
+}
+#endif
+
static void tcg_out_branch(TCGContext *s, int call, tcg_target_long dest)
{
tcg_target_long disp = dest - (tcg_target_long)s->code_ptr - 5;
@@ -965,7 +984,6 @@ static void tcg_out_jmp(TCGContext *s, tcg_target_long dest)
#include "../../softmmu_defs.h"
-#ifdef CONFIG_TCG_PASS_AREG0
/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
int mmu_idx) */
static const void *qemu_ld_helpers[4] = {
@@ -983,25 +1001,17 @@ static const void *qemu_st_helpers[4] = {
helper_stl_mmu,
helper_stq_mmu,
};
-#else
-/* legacy helper signature: __ld_mmu(target_ulong addr, int
- mmu_idx) */
-static void *qemu_ld_helpers[4] = {
- __ldb_mmu,
- __ldw_mmu,
- __ldl_mmu,
- __ldq_mmu,
-};
-/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
- int mmu_idx) */
-static void *qemu_st_helpers[4] = {
- __stb_mmu,
- __stw_mmu,
- __stl_mmu,
- __stq_mmu,
-};
-#endif
+static void add_qemu_ldst_label(TCGContext *s,
+ int is_ld,
+ int opc,
+ int data_reg,
+ int data_reg2,
+ int addrlo_reg,
+ int addrhi_reg,
+ int mem_index,
+ uint8_t *raddr,
+ uint8_t **label_ptr);
/* Perform the TLB load and compare.
@@ -1018,12 +1028,12 @@ static void *qemu_st_helpers[4] = {
LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
positions of the displacements of forward jumps to the TLB miss case.
- First argument register is loaded with the low part of the address.
+ Second argument register is loaded with the low part of the address.
In the TLB hit case, it has been adjusted as indicated by the TLB
and so is a host address. In the TLB miss case, it continues to
hold a guest address.
- Second argument register is clobbered. */
+ First argument register is clobbered. */
static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
int mem_index, int s_bits,
@@ -1031,8 +1041,8 @@ static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
uint8_t **label_ptr, int which)
{
const int addrlo = args[addrlo_idx];
- const int r0 = tcg_target_call_iarg_regs[0];
- const int r1 = tcg_target_call_iarg_regs[1];
+ const int r0 = TCG_REG_L0;
+ const int r1 = TCG_REG_L1;
TCGType type = TCG_TYPE_I32;
int rexw = 0;
@@ -1041,51 +1051,68 @@ static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
rexw = P_REXW;
}
- tcg_out_mov(s, type, r1, addrlo);
tcg_out_mov(s, type, r0, addrlo);
+ tcg_out_mov(s, type, r1, addrlo);
- tcg_out_shifti(s, SHIFT_SHR + rexw, r1,
+ tcg_out_shifti(s, SHIFT_SHR + rexw, r0,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
- tgen_arithi(s, ARITH_AND + rexw, r0,
- TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
tgen_arithi(s, ARITH_AND + rexw, r1,
+ TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
+ tgen_arithi(s, ARITH_AND + rexw, r0,
(CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
- tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r1, TCG_AREG0, r1, 0,
+ tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r0, TCG_AREG0, r0, 0,
offsetof(CPUArchState, tlb_table[mem_index][0])
+ which);
- /* cmp 0(r1), r0 */
- tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r0, r1, 0);
+ /* cmp 0(r0), r1 */
+ tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r1, r0, 0);
- tcg_out_mov(s, type, r0, addrlo);
+ tcg_out_mov(s, type, r1, addrlo);
- /* jne label1 */
- tcg_out8(s, OPC_JCC_short + JCC_JNE);
+ /* jne slow_path */
+ tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
label_ptr[0] = s->code_ptr;
- s->code_ptr++;
+ s->code_ptr += 4;
if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
- /* cmp 4(r1), addrhi */
- tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r1, 4);
+ /* cmp 4(r0), addrhi */
+ tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r0, 4);
- /* jne label1 */
- tcg_out8(s, OPC_JCC_short + JCC_JNE);
+ /* jne slow_path */
+ tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
label_ptr[1] = s->code_ptr;
- s->code_ptr++;
+ s->code_ptr += 4;
}
/* TLB Hit. */
- /* add addend(r1), r0 */
- tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r0, r1,
+ /* add addend(r0), r1 */
+ tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r1, r0,
offsetof(CPUTLBEntry, addend) - which);
}
-#endif
+#elif defined(__x86_64__) && defined(__linux__)
+# include <asm/prctl.h>
+# include <sys/prctl.h>
+
+int arch_prctl(int code, unsigned long addr);
+
+static int guest_base_flags;
+static inline void setup_guest_base_seg(void)
+{
+ if (arch_prctl(ARCH_SET_GS, GUEST_BASE) == 0) {
+ guest_base_flags = P_GS;
+ }
+}
+#else
+# define guest_base_flags 0
+static inline void setup_guest_base_seg(void) { }
+#endif /* SOFTMMU */
static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
- int base, tcg_target_long ofs, int sizeop)
+ int base, tcg_target_long ofs, int seg,
+ int sizeop)
{
#ifdef TARGET_WORDS_BIGENDIAN
const int bswap = 1;
@@ -1094,28 +1121,29 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
#endif
switch (sizeop) {
case 0:
- tcg_out_modrm_offset(s, OPC_MOVZBL, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVZBL + seg, datalo, base, ofs);
break;
case 0 | 4:
- tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW + seg, datalo, base, ofs);
break;
case 1:
- tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
if (bswap) {
tcg_out_rolw_8(s, datalo);
}
break;
case 1 | 4:
if (bswap) {
- tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
tcg_out_rolw_8(s, datalo);
tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo);
} else {
- tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW + seg,
+ datalo, base, ofs);
}
break;
case 2:
- tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
if (bswap) {
tcg_out_bswap32(s, datalo);
}
@@ -1123,17 +1151,18 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
#if TCG_TARGET_REG_BITS == 64
case 2 | 4:
if (bswap) {
- tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
tcg_out_bswap32(s, datalo);
tcg_out_ext32s(s, datalo, datalo);
} else {
- tcg_out_modrm_offset(s, OPC_MOVSLQ, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVSLQ + seg, datalo, base, ofs);
}
break;
#endif
case 3:
if (TCG_TARGET_REG_BITS == 64) {
- tcg_out_ld(s, TCG_TYPE_I64, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv + P_REXW + seg,
+ datalo, base, ofs);
if (bswap) {
tcg_out_bswap64(s, datalo);
}
@@ -1144,11 +1173,15 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
datahi = t;
}
if (base != datalo) {
- tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
- tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
+ datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
+ datahi, base, ofs + 4);
} else {
- tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4);
- tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
+ datahi, base, ofs + 4);
+ tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg,
+ datalo, base, ofs);
}
if (bswap) {
tcg_out_bswap32(s, datalo);
@@ -1171,12 +1204,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
int addrlo_idx;
#if defined(CONFIG_SOFTMMU)
int mem_index, s_bits;
-#if TCG_TARGET_REG_BITS == 64
- int arg_idx;
-#else
- int stack_adjust;
-#endif
- uint8_t *label_ptr[3];
+ uint8_t *label_ptr[2];
#endif
data_reg = args[0];
@@ -1194,133 +1222,48 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
label_ptr, offsetof(CPUTLBEntry, addr_read));
/* TLB Hit. */
- tcg_out_qemu_ld_direct(s, data_reg, data_reg2,
- tcg_target_call_iarg_regs[0], 0, opc);
-
- /* jmp label2 */
- tcg_out8(s, OPC_JMP_short);
- label_ptr[2] = s->code_ptr;
- s->code_ptr++;
-
- /* TLB Miss. */
-
- /* label1: */
- *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
- if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
- *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
- }
-
- /* XXX: move that code at the end of the TB */
-#if TCG_TARGET_REG_BITS == 32
- tcg_out_pushi(s, mem_index);
- stack_adjust = 4;
- if (TARGET_LONG_BITS == 64) {
- tcg_out_push(s, args[addrlo_idx + 1]);
- stack_adjust += 4;
- }
- tcg_out_push(s, args[addrlo_idx]);
- stack_adjust += 4;
-#ifdef CONFIG_TCG_PASS_AREG0
- tcg_out_push(s, TCG_AREG0);
- stack_adjust += 4;
-#endif
-#else
- /* The first argument is already loaded with addrlo. */
- arg_idx = 1;
- tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx],
- mem_index);
-#ifdef CONFIG_TCG_PASS_AREG0
- /* XXX/FIXME: suboptimal */
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
- tcg_target_call_iarg_regs[2]);
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
- tcg_target_call_iarg_regs[1]);
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
- tcg_target_call_iarg_regs[0]);
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
- TCG_AREG0);
-#endif
-#endif
-
- tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
-
-#if TCG_TARGET_REG_BITS == 32
- if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
- /* Pop and discard. This is 2 bytes smaller than the add. */
- tcg_out_pop(s, TCG_REG_ECX);
- } else if (stack_adjust != 0) {
- tcg_out_addi(s, TCG_REG_CALL_STACK, stack_adjust);
- }
-#endif
-
- switch(opc) {
- case 0 | 4:
- tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
- break;
- case 1 | 4:
- tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
- break;
- case 0:
- tcg_out_ext8u(s, data_reg, TCG_REG_EAX);
- break;
- case 1:
- tcg_out_ext16u(s, data_reg, TCG_REG_EAX);
- break;
- case 2:
- tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
- break;
-#if TCG_TARGET_REG_BITS == 64
- case 2 | 4:
- tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
- break;
-#endif
- case 3:
- if (TCG_TARGET_REG_BITS == 64) {
- tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
- } else if (data_reg == TCG_REG_EDX) {
- /* xchg %edx, %eax */
- tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
- tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EAX);
- } else {
- tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
- tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EDX);
- }
- break;
- default:
- tcg_abort();
- }
-
- /* label2: */
- *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
+ tcg_out_qemu_ld_direct(s, data_reg, data_reg2, TCG_REG_L1, 0, 0, opc);
+
+ /* Record the current context of a load into ldst label */
+ add_qemu_ldst_label(s,
+ 1,
+ opc,
+ data_reg,
+ data_reg2,
+ args[addrlo_idx],
+ args[addrlo_idx + 1],
+ mem_index,
+ s->code_ptr,
+ label_ptr);
#else
{
int32_t offset = GUEST_BASE;
int base = args[addrlo_idx];
-
- if (TCG_TARGET_REG_BITS == 64) {
- /* ??? We assume all operations have left us with register
- contents that are zero extended. So far this appears to
- be true. If we want to enforce this, we can either do
- an explicit zero-extension here, or (if GUEST_BASE == 0)
- use the ADDR32 prefix. For now, do nothing. */
-
- if (offset != GUEST_BASE) {
- tcg_out_movi(s, TCG_TYPE_I64,
- tcg_target_call_iarg_regs[0], GUEST_BASE);
- tgen_arithr(s, ARITH_ADD + P_REXW,
- tcg_target_call_iarg_regs[0], base);
- base = tcg_target_call_iarg_regs[0];
- offset = 0;
- }
+ int seg = 0;
+
+ /* ??? We assume all operations have left us with register contents
+ that are zero extended. So far this appears to be true. If we
+ want to enforce this, we can either do an explicit zero-extension
+ here, or (if GUEST_BASE == 0, or a segment register is in use)
+ use the ADDR32 prefix. For now, do nothing. */
+ if (GUEST_BASE && guest_base_flags) {
+ seg = guest_base_flags;
+ offset = 0;
+ } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
+ tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
+ base = TCG_REG_L1;
+ offset = 0;
}
- tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, opc);
+ tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, seg, opc);
}
#endif
}
static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
- int base, tcg_target_long ofs, int sizeop)
+ int base, tcg_target_long ofs, int seg,
+ int sizeop)
{
#ifdef TARGET_WORDS_BIGENDIAN
const int bswap = 1;
@@ -1330,12 +1273,13 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
/* ??? Ideally we wouldn't need a scratch register. For user-only,
we could perform the bswap twice to restore the original value
instead of moving to the scratch. But as it is, the L constraint
- means that the second argument reg is definitely free here. */
- int scratch = tcg_target_call_iarg_regs[1];
+ means that TCG_REG_L0 is definitely free here. */
+ const int scratch = TCG_REG_L0;
switch (sizeop) {
case 0:
- tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
+ datalo, base, ofs);
break;
case 1:
if (bswap) {
@@ -1343,7 +1287,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
tcg_out_rolw_8(s, scratch);
datalo = scratch;
}
- tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16 + seg,
+ datalo, base, ofs);
break;
case 2:
if (bswap) {
@@ -1351,7 +1296,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
tcg_out_bswap32(s, scratch);
datalo = scratch;
}
- tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
break;
case 3:
if (TCG_TARGET_REG_BITS == 64) {
@@ -1360,17 +1305,18 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
tcg_out_bswap64(s, scratch);
datalo = scratch;
}
- tcg_out_st(s, TCG_TYPE_I64, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_REXW + seg,
+ datalo, base, ofs);
} else if (bswap) {
tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi);
tcg_out_bswap32(s, scratch);
- tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs);
tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
tcg_out_bswap32(s, scratch);
- tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs + 4);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4);
} else {
- tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs);
- tcg_out_st(s, TCG_TYPE_I32, datahi, base, ofs + 4);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs);
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datahi, base, ofs+4);
}
break;
default:
@@ -1385,8 +1331,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
int addrlo_idx;
#if defined(CONFIG_SOFTMMU)
int mem_index, s_bits;
- int stack_adjust;
- uint8_t *label_ptr[3];
+ uint8_t *label_ptr[2];
#endif
data_reg = args[0];
@@ -1404,23 +1349,223 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
label_ptr, offsetof(CPUTLBEntry, addr_write));
/* TLB Hit. */
- tcg_out_qemu_st_direct(s, data_reg, data_reg2,
- tcg_target_call_iarg_regs[0], 0, opc);
+ tcg_out_qemu_st_direct(s, data_reg, data_reg2, TCG_REG_L1, 0, 0, opc);
+
+ /* Record the current context of a store into ldst label */
+ add_qemu_ldst_label(s,
+ 0,
+ opc,
+ data_reg,
+ data_reg2,
+ args[addrlo_idx],
+ args[addrlo_idx + 1],
+ mem_index,
+ s->code_ptr,
+ label_ptr);
+#else
+ {
+ int32_t offset = GUEST_BASE;
+ int base = args[addrlo_idx];
+ int seg = 0;
+
+ /* ??? We assume all operations have left us with register contents
+ that are zero extended. So far this appears to be true. If we
+ want to enforce this, we can either do an explicit zero-extension
+ here, or (if GUEST_BASE == 0, or a segment register is in use)
+ use the ADDR32 prefix. For now, do nothing. */
+ if (GUEST_BASE && guest_base_flags) {
+ seg = guest_base_flags;
+ offset = 0;
+ } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
+ tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
+ base = TCG_REG_L1;
+ offset = 0;
+ }
+
+ tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, seg, opc);
+ }
+#endif
+}
+
+#if defined(CONFIG_SOFTMMU)
+/*
+ * Record the context of a call to the out of line helper code for the slow path
+ * for a load or store, so that we can later generate the correct helper code
+ */
+static void add_qemu_ldst_label(TCGContext *s,
+ int is_ld,
+ int opc,
+ int data_reg,
+ int data_reg2,
+ int addrlo_reg,
+ int addrhi_reg,
+ int mem_index,
+ uint8_t *raddr,
+ uint8_t **label_ptr)
+{
+ int idx;
+ TCGLabelQemuLdst *label;
+
+ if (s->nb_qemu_ldst_labels >= TCG_MAX_QEMU_LDST) {
+ tcg_abort();
+ }
+
+ idx = s->nb_qemu_ldst_labels++;
+ label = (TCGLabelQemuLdst *)&s->qemu_ldst_labels[idx];
+ label->is_ld = is_ld;
+ label->opc = opc;
+ label->datalo_reg = data_reg;
+ label->datahi_reg = data_reg2;
+ label->addrlo_reg = addrlo_reg;
+ label->addrhi_reg = addrhi_reg;
+ label->mem_index = mem_index;
+ label->raddr = raddr;
+ label->label_ptr[0] = label_ptr[0];
+ if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+ label->label_ptr[1] = label_ptr[1];
+ }
+}
+
+/*
+ * Generate code for the slow path for a load at the end of block
+ */
+static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *label)
+{
+ int s_bits;
+ int opc = label->opc;
+ int mem_index = label->mem_index;
+#if TCG_TARGET_REG_BITS == 32
+ int stack_adjust;
+ int addrlo_reg = label->addrlo_reg;
+ int addrhi_reg = label->addrhi_reg;
+#endif
+ int data_reg = label->datalo_reg;
+ int data_reg2 = label->datahi_reg;
+ uint8_t *raddr = label->raddr;
+ uint8_t **label_ptr = &label->label_ptr[0];
+
+ s_bits = opc & 3;
+
+ /* resolve label address */
+ *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
+ if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+ *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
+ }
+
+#if TCG_TARGET_REG_BITS == 32
+ tcg_out_pushi(s, mem_index);
+ stack_adjust = 4;
+ if (TARGET_LONG_BITS == 64) {
+ tcg_out_push(s, addrhi_reg);
+ stack_adjust += 4;
+ }
+ tcg_out_push(s, addrlo_reg);
+ stack_adjust += 4;
+ tcg_out_push(s, TCG_AREG0);
+ stack_adjust += 4;
+#else
+ tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0);
+ /* The second argument is already loaded with addrlo. */
+ tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], mem_index);
+#endif
+
+ /* Code generation of qemu_ld/st's slow path calling MMU helper
+
+ PRE_PROC ...
+ call MMU helper
+ jmp POST_PROC (2b) : short forward jump <- GETRA()
+ jmp next_code (5b) : dummy long backward jump which is never executed
+ POST_PROC ... : do post-processing <- GETRA() + 7
+ jmp next_code : jump to the code corresponding to next IR of qemu_ld/st
+ */
- /* jmp label2 */
+ tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
+
+ /* Jump to post-processing code */
tcg_out8(s, OPC_JMP_short);
- label_ptr[2] = s->code_ptr;
- s->code_ptr++;
+ tcg_out8(s, 5);
+ /* Dummy backward jump having information of fast path'pc for MMU helpers */
+ tcg_out8(s, OPC_JMP_long);
+ *(int32_t *)s->code_ptr = (int32_t)(raddr - s->code_ptr - 4);
+ s->code_ptr += 4;
+
+#if TCG_TARGET_REG_BITS == 32
+ if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
+ /* Pop and discard. This is 2 bytes smaller than the add. */
+ tcg_out_pop(s, TCG_REG_ECX);
+ } else if (stack_adjust != 0) {
+ tcg_out_addi(s, TCG_REG_CALL_STACK, stack_adjust);
+ }
+#endif
+
+ switch(opc) {
+ case 0 | 4:
+ tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
+ break;
+ case 1 | 4:
+ tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
+ break;
+ case 0:
+ tcg_out_ext8u(s, data_reg, TCG_REG_EAX);
+ break;
+ case 1:
+ tcg_out_ext16u(s, data_reg, TCG_REG_EAX);
+ break;
+ case 2:
+ tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
+ break;
+#if TCG_TARGET_REG_BITS == 64
+ case 2 | 4:
+ tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
+ break;
+#endif
+ case 3:
+ if (TCG_TARGET_REG_BITS == 64) {
+ tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
+ } else if (data_reg == TCG_REG_EDX) {
+ /* xchg %edx, %eax */
+ tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
+ tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EAX);
+ } else {
+ tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
+ tcg_out_mov(s, TCG_TYPE_I32, data_reg2, TCG_REG_EDX);
+ }
+ break;
+ default:
+ tcg_abort();
+ }
- /* TLB Miss. */
+ /* Jump to the code corresponding to next IR of qemu_st */
+ tcg_out_jmp(s, (tcg_target_long)raddr);
+}
- /* label1: */
- *label_ptr[0] = s->code_ptr - label_ptr[0] - 1;
+/*
+ * Generate code for the slow path for a store at the end of block
+ */
+static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *label)
+{
+ int s_bits;
+ int stack_adjust;
+ int opc = label->opc;
+ int mem_index = label->mem_index;
+ int data_reg = label->datalo_reg;
+#if TCG_TARGET_REG_BITS == 32
+ int data_reg2 = label->datahi_reg;
+ int addrlo_reg = label->addrlo_reg;
+ int addrhi_reg = label->addrhi_reg;
+#endif
+ uint8_t *raddr = label->raddr;
+ uint8_t **label_ptr = &label->label_ptr[0];
+
+ s_bits = opc & 3;
+
+ /* resolve label address */
+ *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
- *label_ptr[1] = s->code_ptr - label_ptr[1] - 1;
+ *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
}
- /* XXX: move that code at the end of the TB */
#if TCG_TARGET_REG_BITS == 32
tcg_out_pushi(s, mem_index);
stack_adjust = 4;
@@ -1431,35 +1576,42 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
tcg_out_push(s, data_reg);
stack_adjust += 4;
if (TARGET_LONG_BITS == 64) {
- tcg_out_push(s, args[addrlo_idx + 1]);
+ tcg_out_push(s, addrhi_reg);
stack_adjust += 4;
}
- tcg_out_push(s, args[addrlo_idx]);
+ tcg_out_push(s, addrlo_reg);
stack_adjust += 4;
-#ifdef CONFIG_TCG_PASS_AREG0
tcg_out_push(s, TCG_AREG0);
stack_adjust += 4;
-#endif
#else
+ tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0);
+ /* The second argument is already loaded with addrlo. */
tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
- tcg_target_call_iarg_regs[1], data_reg);
- tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], mem_index);
+ tcg_target_call_iarg_regs[2], data_reg);
+ tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], mem_index);
stack_adjust = 0;
-#ifdef CONFIG_TCG_PASS_AREG0
- /* XXX/FIXME: suboptimal */
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
- tcg_target_call_iarg_regs[2]);
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
- tcg_target_call_iarg_regs[1]);
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
- tcg_target_call_iarg_regs[0]);
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
- TCG_AREG0);
-#endif
#endif
+ /* Code generation of qemu_ld/st's slow path calling MMU helper
+
+ PRE_PROC ...
+ call MMU helper
+ jmp POST_PROC (2b) : short forward jump <- GETRA()
+ jmp next_code (5b) : dummy long backward jump which is never executed
+ POST_PROC ... : do post-processing <- GETRA() + 7
+ jmp next_code : jump to the code corresponding to next IR of qemu_ld/st
+ */
+
tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
+ /* Jump to post-processing code */
+ tcg_out8(s, OPC_JMP_short);
+ tcg_out8(s, 5);
+ /* Dummy backward jump having information of fast path'pc for MMU helpers */
+ tcg_out8(s, OPC_JMP_long);
+ *(int32_t *)s->code_ptr = (int32_t)(raddr - s->code_ptr - 4);
+ s->code_ptr += 4;
+
if (stack_adjust == (TCG_TARGET_REG_BITS / 8)) {
/* Pop and discard. This is 2 bytes smaller than the add. */
tcg_out_pop(s, TCG_REG_ECX);
@@ -1467,34 +1619,29 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
tcg_out_addi(s, TCG_REG_CALL_STACK, stack_adjust);
}
- /* label2: */
- *label_ptr[2] = s->code_ptr - label_ptr[2] - 1;
-#else
- {
- int32_t offset = GUEST_BASE;
- int base = args[addrlo_idx];
+ /* Jump to the code corresponding to next IR of qemu_st */
+ tcg_out_jmp(s, (tcg_target_long)raddr);
+}
- if (TCG_TARGET_REG_BITS == 64) {
- /* ??? We assume all operations have left us with register
- contents that are zero extended. So far this appears to
- be true. If we want to enforce this, we can either do
- an explicit zero-extension here, or (if GUEST_BASE == 0)
- use the ADDR32 prefix. For now, do nothing. */
-
- if (offset != GUEST_BASE) {
- tcg_out_movi(s, TCG_TYPE_I64,
- tcg_target_call_iarg_regs[0], GUEST_BASE);
- tgen_arithr(s, ARITH_ADD + P_REXW,
- tcg_target_call_iarg_regs[0], base);
- base = tcg_target_call_iarg_regs[0];
- offset = 0;
- }
+/*
+ * Generate TB finalization at the end of block
+ */
+void tcg_out_tb_finalize(TCGContext *s)
+{
+ int i;
+ TCGLabelQemuLdst *label;
+
+ /* qemu_ld/st slow paths */
+ for (i = 0; i < s->nb_qemu_ldst_labels; i++) {
+ label = (TCGLabelQemuLdst *)&s->qemu_ldst_labels[i];
+ if (label->is_ld) {
+ tcg_out_qemu_ld_slow_path(s, label);
+ } else {
+ tcg_out_qemu_st_slow_path(s, label);
}
-
- tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, opc);
}
-#endif
}
+#endif /* CONFIG_SOFTMMU */
static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
const TCGArg *args, const int *const_args)
@@ -1537,14 +1684,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
}
break;
- case INDEX_op_jmp:
- if (const_args[0]) {
- tcg_out_jmp(s, args[0]);
- } else {
- /* jmp *reg */
- tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, args[0]);
- }
- break;
case INDEX_op_br:
tcg_out_jxx(s, JCC_JMP, args[0], 0);
break;
@@ -1573,18 +1712,35 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
OP_32_64(st8):
- tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
- args[0], args[1], args[2]);
+ if (const_args[0]) {
+ tcg_out_modrm_offset(s, OPC_MOVB_EvIz,
+ 0, args[1], args[2]);
+ tcg_out8(s, args[0]);
+ } else {
+ tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
+ args[0], args[1], args[2]);
+ }
break;
OP_32_64(st16):
- tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
- args[0], args[1], args[2]);
+ if (const_args[0]) {
+ tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16,
+ 0, args[1], args[2]);
+ tcg_out16(s, args[0]);
+ } else {
+ tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
+ args[0], args[1], args[2]);
+ }
break;
#if TCG_TARGET_REG_BITS == 64
case INDEX_op_st32_i64:
#endif
case INDEX_op_st_i32:
- tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
+ if (const_args[0]) {
+ tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]);
+ tcg_out32(s, args[0]);
+ } else {
+ tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
+ }
break;
OP_32_64(add):
@@ -1680,6 +1836,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_setcond32(s, args[3], args[0], args[1],
args[2], const_args[2]);
break;
+ case INDEX_op_movcond_i32:
+ tcg_out_movcond32(s, args[5], args[0], args[1],
+ args[2], const_args[2], args[3]);
+ break;
OP_32_64(bswap16):
tcg_out_rolw_8(s, args[0]);
@@ -1788,7 +1948,13 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
break;
case INDEX_op_st_i64:
- tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
+ if (const_args[0]) {
+ tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW,
+ 0, args[1], args[2]);
+ tcg_out32(s, args[0]);
+ } else {
+ tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
+ }
break;
case INDEX_op_qemu_ld32s:
tcg_out_qemu_ld(s, args, 2 | 4);
@@ -1802,6 +1968,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_setcond64(s, args[3], args[0], args[1],
args[2], const_args[2]);
break;
+ case INDEX_op_movcond_i64:
+ tcg_out_movcond64(s, args[5], args[0], args[1],
+ args[2], const_args[2], args[3]);
+ break;
case INDEX_op_bswap64_i64:
tcg_out_bswap64(s, args[0]);
@@ -1841,7 +2011,6 @@ static const TCGTargetOpDef x86_op_defs[] = {
{ INDEX_op_exit_tb, { } },
{ INDEX_op_goto_tb, { } },
{ INDEX_op_call, { "ri" } },
- { INDEX_op_jmp, { "ri" } },
{ INDEX_op_br, { } },
{ INDEX_op_mov_i32, { "r", "r" } },
{ INDEX_op_movi_i32, { "r" } },
@@ -1850,9 +2019,9 @@ static const TCGTargetOpDef x86_op_defs[] = {
{ INDEX_op_ld16u_i32, { "r", "r" } },
{ INDEX_op_ld16s_i32, { "r", "r" } },
{ INDEX_op_ld_i32, { "r", "r" } },
- { INDEX_op_st8_i32, { "q", "r" } },
- { INDEX_op_st16_i32, { "r", "r" } },
- { INDEX_op_st_i32, { "r", "r" } },
+ { INDEX_op_st8_i32, { "qi", "r" } },
+ { INDEX_op_st16_i32, { "ri", "r" } },
+ { INDEX_op_st_i32, { "ri", "r" } },
{ INDEX_op_add_i32, { "r", "r", "ri" } },
{ INDEX_op_sub_i32, { "r", "0", "ri" } },
@@ -1886,6 +2055,9 @@ static const TCGTargetOpDef x86_op_defs[] = {
{ INDEX_op_setcond_i32, { "q", "r", "ri" } },
{ INDEX_op_deposit_i32, { "Q", "0", "Q" } },
+#if TCG_TARGET_HAS_movcond_i32
+ { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } },
+#endif
#if TCG_TARGET_REG_BITS == 32
{ INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
@@ -1903,10 +2075,10 @@ static const TCGTargetOpDef x86_op_defs[] = {
{ INDEX_op_ld32u_i64, { "r", "r" } },
{ INDEX_op_ld32s_i64, { "r", "r" } },
{ INDEX_op_ld_i64, { "r", "r" } },
- { INDEX_op_st8_i64, { "r", "r" } },
- { INDEX_op_st16_i64, { "r", "r" } },
- { INDEX_op_st32_i64, { "r", "r" } },
- { INDEX_op_st_i64, { "r", "r" } },
+ { INDEX_op_st8_i64, { "ri", "r" } },
+ { INDEX_op_st16_i64, { "ri", "r" } },
+ { INDEX_op_st32_i64, { "ri", "r" } },
+ { INDEX_op_st_i64, { "re", "r" } },
{ INDEX_op_add_i64, { "r", "0", "re" } },
{ INDEX_op_mul_i64, { "r", "0", "re" } },
@@ -1940,6 +2112,7 @@ static const TCGTargetOpDef x86_op_defs[] = {
{ INDEX_op_ext32u_i64, { "r", "r" } },
{ INDEX_op_deposit_i64, { "Q", "0", "Q" } },
+ { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },
#endif
#if TCG_TARGET_REG_BITS == 64
@@ -2038,15 +2211,17 @@ static void tcg_target_qemu_prologue(TCGContext *s)
#if TCG_TARGET_REG_BITS == 32
tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
(ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
- tcg_out_ld(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[1], TCG_REG_ESP,
- (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4);
+ tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
+ /* jmp *tb. */
+ tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
+ (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
+ + stack_addend);
#else
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
-#endif
tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
-
/* jmp *tb. */
tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
+#endif
/* TB epilogue */
tb_ret_addr = s->code_ptr;
@@ -2057,6 +2232,13 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out_pop(s, tcg_target_callee_save_regs[i]);
}
tcg_out_opc(s, OPC_RET, 0, 0, 0);
+
+#if !defined(CONFIG_SOFTMMU)
+ /* Try to set up a segment register to point to GUEST_BASE. */
+ if (GUEST_BASE) {
+ setup_guest_base_seg();
+ }
+#endif
}
static void tcg_target_init(TCGContext *s)
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index c3cfe05..dbc6756 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -67,7 +67,11 @@ typedef enum {
/* used for function call generation */
#define TCG_REG_CALL_STACK TCG_REG_ESP
#define TCG_TARGET_STACK_ALIGN 16
+#if defined(_WIN64)
+#define TCG_TARGET_CALL_STACK_OFFSET 32
+#else
#define TCG_TARGET_CALL_STACK_OFFSET 0
+#endif
/* optional instructions */
#define TCG_TARGET_HAS_div2_i32 1
@@ -86,6 +90,12 @@ typedef enum {
#define TCG_TARGET_HAS_nand_i32 0
#define TCG_TARGET_HAS_nor_i32 0
#define TCG_TARGET_HAS_deposit_i32 1
+#if defined(__x86_64__) || defined(__i686__)
+/* Use cmov only if the compiler is already doing so. */
+#define TCG_TARGET_HAS_movcond_i32 1
+#else
+#define TCG_TARGET_HAS_movcond_i32 0
+#endif
#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_div2_i64 1
@@ -107,6 +117,7 @@ typedef enum {
#define TCG_TARGET_HAS_nand_i64 0
#define TCG_TARGET_HAS_nor_i64 0
#define TCG_TARGET_HAS_deposit_i64 1
+#define TCG_TARGET_HAS_movcond_i64 1
#endif
#define TCG_TARGET_deposit_i32_valid(ofs, len) \
@@ -114,9 +125,6 @@ typedef enum {
((ofs) == 0 && (len) == 16))
#define TCG_TARGET_deposit_i64_valid TCG_TARGET_deposit_i32_valid
-#define TCG_TARGET_HAS_GUEST_BASE
-
-/* Note: must be synced with dyngen-exec.h */
#if TCG_TARGET_REG_BITS == 64
# define TCG_AREG0 TCG_REG_R14
#else
diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index dc588db..06570be 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -176,12 +176,6 @@ static const int tcg_target_call_oarg_regs[] = {
TCG_REG_R8
};
-/* maximum number of register used for input function arguments */
-static inline int tcg_target_get_call_iarg_regs_count(int flags)
-{
- return 8;
-}
-
/*
* opcode formation
*/
@@ -236,6 +230,8 @@ enum {
OPC_CMP4_LT_A6 = 0x18400000000ull,
OPC_CMP4_LTU_A6 = 0x1a400000000ull,
OPC_CMP4_EQ_A6 = 0x1c400000000ull,
+ OPC_DEP_I14 = 0x0ae00000000ull,
+ OPC_DEP_I15 = 0x08000000000ull,
OPC_DEP_Z_I12 = 0x0a600000000ull,
OPC_EXTR_I11 = 0x0a400002000ull,
OPC_EXTR_U_I11 = 0x0a400000000ull,
@@ -507,6 +503,30 @@ static inline uint64_t tcg_opc_i12(int qp, uint64_t opc, int r1,
| (qp & 0x3f);
}
+static inline uint64_t tcg_opc_i14(int qp, uint64_t opc, int r1, uint64_t imm,
+ int r3, uint64_t pos, uint64_t len)
+{
+ return opc
+ | ((imm & 0x01) << 36)
+ | ((len & 0x3f) << 27)
+ | ((r3 & 0x7f) << 20)
+ | ((pos & 0x3f) << 14)
+ | ((r1 & 0x7f) << 6)
+ | (qp & 0x3f);
+}
+
+static inline uint64_t tcg_opc_i15(int qp, uint64_t opc, int r1, int r2,
+ int r3, uint64_t pos, uint64_t len)
+{
+ return opc
+ | ((pos & 0x3f) << 31)
+ | ((len & 0x0f) << 27)
+ | ((r3 & 0x7f) << 20)
+ | ((r2 & 0x7f) << 13)
+ | ((r1 & 0x7f) << 6)
+ | (qp & 0x3f);
+}
+
static inline uint64_t tcg_opc_i18(int qp, uint64_t opc, uint64_t imm)
{
return opc
@@ -1318,6 +1338,37 @@ static inline void tcg_out_bswap64(TCGContext *s, TCGArg ret, TCGArg arg)
tcg_opc_i3 (TCG_REG_P0, OPC_MUX1_I3, ret, arg, 0xb));
}
+static inline void tcg_out_deposit(TCGContext *s, TCGArg ret, TCGArg a1,
+ TCGArg a2, int const_a2, int pos, int len)
+{
+ uint64_t i1 = 0, i2 = 0;
+ int cpos = 63 - pos, lm1 = len - 1;
+
+ if (const_a2) {
+ /* Truncate the value of a constant a2 to the width of the field. */
+ int mask = (1u << len) - 1;
+ a2 &= mask;
+
+ if (a2 == 0 || a2 == mask) {
+ /* 1-bit signed constant inserted into register. */
+ i2 = tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, ret, a2, a1, cpos, lm1);
+ } else {
+ /* Otherwise, load any constant into a temporary. Do this into
+ the first I slot to help out with cross-unit delays. */
+ i1 = tcg_opc_a5(TCG_REG_P0, OPC_ADDL_A5,
+ TCG_REG_R2, a2, TCG_REG_R0);
+ a2 = TCG_REG_R2;
+ }
+ }
+ if (i2 == 0) {
+ i2 = tcg_opc_i15(TCG_REG_P0, OPC_DEP_I15, ret, a2, a1, cpos, lm1);
+ }
+ tcg_out_bundle(s, (i1 ? mII : miI),
+ tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+ i1 ? i1 : tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+ i2);
+}
+
static inline uint64_t tcg_opc_cmp_a(int qp, TCGCond cond, TCGArg arg1,
TCGArg arg2, int cmp4)
{
@@ -1410,21 +1461,47 @@ static inline void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGArg ret,
tcg_opc_a5(TCG_REG_P7, OPC_ADDL_A5, ret, 0, TCG_REG_R0));
}
+static inline void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGArg ret,
+ TCGArg c1, TCGArg c2,
+ TCGArg v1, int const_v1,
+ TCGArg v2, int const_v2, int cmp4)
+{
+ uint64_t opc1, opc2;
+
+ if (const_v1) {
+ opc1 = tcg_opc_a5(TCG_REG_P6, OPC_ADDL_A5, ret, v1, TCG_REG_R0);
+ } else if (ret == v1) {
+ opc1 = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
+ } else {
+ opc1 = tcg_opc_a4(TCG_REG_P6, OPC_ADDS_A4, ret, 0, v1);
+ }
+ if (const_v2) {
+ opc2 = tcg_opc_a5(TCG_REG_P7, OPC_ADDL_A5, ret, v2, TCG_REG_R0);
+ } else if (ret == v2) {
+ opc2 = tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0);
+ } else {
+ opc2 = tcg_opc_a4(TCG_REG_P7, OPC_ADDS_A4, ret, 0, v2);
+ }
+
+ tcg_out_bundle(s, MmI,
+ tcg_opc_cmp_a(TCG_REG_P0, cond, c1, c2, cmp4),
+ opc1,
+ opc2);
+}
+
#if defined(CONFIG_SOFTMMU)
#include "../../softmmu_defs.h"
/* Load and compare a TLB entry, and return the result in (p6, p7).
R2 is loaded with the address of the addend TLB entry.
- R56 is loaded with the address, zero extented on 32-bit targets. */
+ R57 is loaded with the address, zero extented on 32-bit targets. */
static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg,
int s_bits, uint64_t offset_rw,
uint64_t offset_addend)
{
tcg_out_bundle(s, mII,
- tcg_opc_a5 (TCG_REG_P0, OPC_ADDL_A5, TCG_REG_R3,
- TARGET_PAGE_MASK | ((1 << s_bits) - 1),
- TCG_REG_R0),
+ tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, TCG_REG_R2,
addr_reg, TARGET_PAGE_BITS, CPU_TLB_BITS - 1),
tcg_opc_i12(TCG_REG_P0, OPC_DEP_Z_I12, TCG_REG_R2,
@@ -1434,9 +1511,9 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg,
tcg_opc_a5 (TCG_REG_P0, OPC_ADDL_A5, TCG_REG_R2,
offset_rw, TCG_REG_R2),
#if TARGET_LONG_BITS == 32
- tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R56, addr_reg),
+ tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, TCG_REG_R57, addr_reg),
#else
- tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, TCG_REG_R56,
+ tcg_opc_a4(TCG_REG_P0, OPC_ADDS_A4, TCG_REG_R57,
0, addr_reg),
#endif
tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2,
@@ -1444,15 +1521,15 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGArg addr_reg,
tcg_out_bundle(s, mII,
tcg_opc_m3 (TCG_REG_P0,
(TARGET_LONG_BITS == 32
- ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R57,
+ ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R56,
TCG_REG_R2, offset_addend - offset_rw),
- tcg_opc_a1 (TCG_REG_P0, OPC_AND_A1, TCG_REG_R3,
- TCG_REG_R3, TCG_REG_R56),
+ tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, TCG_REG_R3, 0,
+ TCG_REG_R57, 63 - s_bits,
+ TARGET_PAGE_BITS - s_bits - 1),
tcg_opc_a6 (TCG_REG_P0, OPC_CMP_EQ_A6, TCG_REG_P6,
- TCG_REG_P7, TCG_REG_R3, TCG_REG_R57));
+ TCG_REG_P7, TCG_REG_R3, TCG_REG_R56));
}
-#ifdef CONFIG_TCG_PASS_AREG0
/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
int mmu_idx) */
static const void * const qemu_ld_helpers[4] = {
@@ -1461,16 +1538,6 @@ static const void * const qemu_ld_helpers[4] = {
helper_ldl_mmu,
helper_ldq_mmu,
};
-#else
-/* legacy helper signature: __ld_mmu(target_ulong addr, int
- mmu_idx) */
-static void *qemu_ld_helpers[4] = {
- __ldb_mmu,
- __ldw_mmu,
- __ldl_mmu,
- __ldq_mmu,
-};
-#endif
static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
{
@@ -1497,8 +1564,8 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
/* P6 is the fast path, and P7 the slow path */
tcg_out_bundle(s, mLX,
- tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R57,
- mem_index, TCG_REG_R0),
+ tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
+ TCG_REG_R56, 0, TCG_AREG0),
tcg_opc_l2 ((tcg_target_long) qemu_ld_helpers[s_bits]),
tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2,
(tcg_target_long) qemu_ld_helpers[s_bits]));
@@ -1506,7 +1573,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
tcg_opc_m3 (TCG_REG_P0, OPC_LD8_M3, TCG_REG_R3,
TCG_REG_R2, 8),
tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R3,
- TCG_REG_R3, TCG_REG_R56),
+ TCG_REG_R3, TCG_REG_R57),
tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6,
TCG_REG_R3, 0));
if (bswap && s_bits == 1) {
@@ -1530,25 +1597,17 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
}
-#ifdef CONFIG_TCG_PASS_AREG0
- /* XXX/FIXME: suboptimal */
- tcg_out_bundle(s, mII,
- tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58,
- mem_index, TCG_REG_R0),
- tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
- TCG_REG_R57, 0, TCG_REG_R56),
- tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
- TCG_REG_R56, 0, TCG_AREG0));
-#endif
if (!bswap || s_bits == 0) {
tcg_out_bundle(s, miB,
- tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+ tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58,
+ mem_index, TCG_REG_R0),
tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
TCG_REG_B0, TCG_REG_B6));
} else {
tcg_out_bundle(s, miB,
- tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
+ tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58,
+ mem_index, TCG_REG_R0),
tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
TCG_REG_R8, TCG_REG_R8, 0xb),
tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
@@ -1570,7 +1629,6 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
}
}
-#ifdef CONFIG_TCG_PASS_AREG0
/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
uintxx_t val, int mmu_idx) */
static const void * const qemu_st_helpers[4] = {
@@ -1579,16 +1637,6 @@ static const void * const qemu_st_helpers[4] = {
helper_stl_mmu,
helper_stq_mmu,
};
-#else
-/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
- int mmu_idx) */
-static void *qemu_st_helpers[4] = {
- __stb_mmu,
- __stw_mmu,
- __stl_mmu,
- __stq_mmu,
-};
-#endif
static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
{
@@ -1611,8 +1659,8 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
/* P6 is the fast path, and P7 the slow path */
tcg_out_bundle(s, mLX,
- tcg_opc_a4(TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R57,
- 0, data_reg),
+ tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
+ TCG_REG_R56, 0, TCG_AREG0),
tcg_opc_l2 ((tcg_target_long) qemu_st_helpers[opc]),
tcg_opc_x2 (TCG_REG_P7, OPC_MOVL_X2, TCG_REG_R2,
(tcg_target_long) qemu_st_helpers[opc]));
@@ -1620,31 +1668,42 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
tcg_opc_m3 (TCG_REG_P0, OPC_LD8_M3, TCG_REG_R3,
TCG_REG_R2, 8),
tcg_opc_a1 (TCG_REG_P6, OPC_ADD_A1, TCG_REG_R3,
- TCG_REG_R3, TCG_REG_R56),
+ TCG_REG_R3, TCG_REG_R57),
tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6,
TCG_REG_R3, 0));
if (!bswap || opc == 0) {
- tcg_out_bundle(s, mII,
+ tcg_out_bundle(s, mii,
tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
TCG_REG_R1, TCG_REG_R2),
- tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+ tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
+ 0, data_reg),
tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
} else if (opc == 1) {
- tcg_out_bundle(s, mII,
+ tcg_out_bundle(s, miI,
tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
TCG_REG_R1, TCG_REG_R2),
+ tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
- TCG_REG_R2, data_reg, 15, 15),
+ TCG_REG_R2, data_reg, 15, 15));
+ tcg_out_bundle(s, miI,
+ tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
+ 0, data_reg),
+ tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
TCG_REG_R2, TCG_REG_R2, 0xb));
data_reg = TCG_REG_R2;
} else if (opc == 2) {
- tcg_out_bundle(s, mII,
+ tcg_out_bundle(s, miI,
tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
TCG_REG_R1, TCG_REG_R2),
+ tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
- TCG_REG_R2, data_reg, 31, 31),
+ TCG_REG_R2, data_reg, 31, 31));
+ tcg_out_bundle(s, miI,
+ tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
+ 0, data_reg),
+ tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
TCG_REG_R2, TCG_REG_R2, 0xb));
data_reg = TCG_REG_R2;
@@ -1652,37 +1711,20 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
tcg_out_bundle(s, miI,
tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
TCG_REG_R1, TCG_REG_R2),
- tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0),
+ tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4, TCG_REG_R58,
+ 0, data_reg),
tcg_opc_i3 (TCG_REG_P6, OPC_MUX1_I3,
TCG_REG_R2, data_reg, 0xb));
data_reg = TCG_REG_R2;
}
-#ifdef CONFIG_TCG_PASS_AREG0
- /* XXX/FIXME: suboptimal */
- tcg_out_bundle(s, mII,
- tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R59,
- mem_index, TCG_REG_R0),
- tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
- TCG_REG_R58, 0, TCG_REG_R57),
- tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
- TCG_REG_R57, 0, TCG_REG_R56));
tcg_out_bundle(s, miB,
tcg_opc_m4 (TCG_REG_P6, opc_st_m4[opc],
data_reg, TCG_REG_R3),
- tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
- TCG_REG_R56, 0, TCG_AREG0),
- tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
- TCG_REG_B0, TCG_REG_B6));
-#else
- tcg_out_bundle(s, miB,
- tcg_opc_m4 (TCG_REG_P6, opc_st_m4[opc],
- data_reg, TCG_REG_R3),
- tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58,
+ tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R59,
mem_index, TCG_REG_R0),
tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
TCG_REG_B0, TCG_REG_B6));
-#endif
}
#else /* !CONFIG_SOFTMMU */
@@ -1956,9 +1998,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_goto_tb:
tcg_out_goto_tb(s, args[0]);
break;
- case INDEX_op_jmp:
- tcg_out_jmp(s, args[0]);
- break;
case INDEX_op_movi_i32:
tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
@@ -2135,6 +2174,12 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_bswap64(s, args[0], args[1]);
break;
+ case INDEX_op_deposit_i32:
+ case INDEX_op_deposit_i64:
+ tcg_out_deposit(s, args[0], args[1], args[2], const_args[2],
+ args[3], args[4]);
+ break;
+
case INDEX_op_brcond_i32:
tcg_out_brcond(s, args[2], args[0], const_args[0],
args[1], const_args[1], args[3], 1);
@@ -2149,6 +2194,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_setcond_i64:
tcg_out_setcond(s, args[3], args[0], args[1], args[2], 0);
break;
+ case INDEX_op_movcond_i32:
+ tcg_out_movcond(s, args[5], args[0], args[1], args[2],
+ args[3], const_args[3], args[4], const_args[4], 1);
+ break;
+ case INDEX_op_movcond_i64:
+ tcg_out_movcond(s, args[5], args[0], args[1], args[2],
+ args[3], const_args[3], args[4], const_args[4], 0);
+ break;
case INDEX_op_qemu_ld8u:
tcg_out_qemu_ld(s, args, 0);
@@ -2196,7 +2249,6 @@ static const TCGTargetOpDef ia64_op_defs[] = {
{ INDEX_op_call, { "r" } },
{ INDEX_op_exit_tb, { } },
{ INDEX_op_goto_tb, { } },
- { INDEX_op_jmp, { "r" } },
{ INDEX_op_mov_i32, { "r", "r" } },
{ INDEX_op_movi_i32, { "r" } },
@@ -2240,6 +2292,7 @@ static const TCGTargetOpDef ia64_op_defs[] = {
{ INDEX_op_brcond_i32, { "rI", "rI" } },
{ INDEX_op_setcond_i32, { "r", "rZ", "rZ" } },
+ { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rI", "rI" } },
{ INDEX_op_mov_i64, { "r", "r" } },
{ INDEX_op_movi_i64, { "r" } },
@@ -2289,6 +2342,10 @@ static const TCGTargetOpDef ia64_op_defs[] = {
{ INDEX_op_brcond_i64, { "rI", "rI" } },
{ INDEX_op_setcond_i64, { "r", "rZ", "rZ" } },
+ { INDEX_op_movcond_i64, { "r", "rZ", "rZ", "rI", "rI" } },
+
+ { INDEX_op_deposit_i32, { "r", "rZ", "ri" } },
+ { INDEX_op_deposit_i64, { "r", "rZ", "ri" } },
{ INDEX_op_qemu_ld8u, { "r", "r" } },
{ INDEX_op_qemu_ld8s, { "r", "r" } },
@@ -2313,9 +2370,12 @@ static void tcg_target_qemu_prologue(TCGContext *s)
int frame_size;
/* reserve some stack space */
- frame_size = TCG_STATIC_CALL_ARGS_SIZE;
+ frame_size = TCG_STATIC_CALL_ARGS_SIZE +
+ CPU_TEMP_BUF_NLONGS * sizeof(long);
frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
~(TCG_TARGET_STACK_ALIGN - 1);
+ tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
+ CPU_TEMP_BUF_NLONGS * sizeof(long));
/* First emit adhoc function descriptor */
*(uint64_t *)(s->code_ptr) = (uint64_t)s->code_ptr + 16; /* entry point */
@@ -2422,6 +2482,4 @@ static void tcg_target_init(TCGContext *s)
tcg_regset_set_reg(s->reserved_regs, TCG_REG_R6);
tcg_add_target_add_op_defs(ia64_op_defs);
- tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf),
- CPU_TEMP_BUF_NLONGS * sizeof(long));
}
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index 0631b9f..91fe7a3 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -131,8 +131,13 @@ typedef enum {
#define TCG_TARGET_HAS_orc_i64 1
#define TCG_TARGET_HAS_rot_i32 1
#define TCG_TARGET_HAS_rot_i64 1
-#define TCG_TARGET_HAS_deposit_i32 0
-#define TCG_TARGET_HAS_deposit_i64 0
+#define TCG_TARGET_HAS_movcond_i32 1
+#define TCG_TARGET_HAS_movcond_i64 1
+#define TCG_TARGET_HAS_deposit_i32 1
+#define TCG_TARGET_HAS_deposit_i64 1
+
+#define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16)
+#define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16)
/* optional instructions automatically implemented */
#define TCG_TARGET_HAS_neg_i32 0 /* sub r1, r0, r3 */
@@ -140,12 +145,8 @@ typedef enum {
#define TCG_TARGET_HAS_not_i32 0 /* xor r1, -1, r3 */
#define TCG_TARGET_HAS_not_i64 0 /* xor r1, -1, r3 */
-/* Note: must be synced with dyngen-exec.h */
#define TCG_AREG0 TCG_REG_R7
-/* Guest base is supported */
-#define TCG_TARGET_HAS_GUEST_BASE
-
static inline void flush_icache_range(tcg_target_ulong start,
tcg_target_ulong stop)
{
diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
index 1006e28..ae2b274 100644
--- a/tcg/mips/tcg-target.c
+++ b/tcg/mips/tcg-target.c
@@ -68,7 +68,7 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
#endif
/* check if we really need so many registers :P */
-static const int tcg_target_reg_alloc_order[] = {
+static const TCGReg tcg_target_reg_alloc_order[] = {
TCG_REG_S0,
TCG_REG_S1,
TCG_REG_S2,
@@ -94,14 +94,14 @@ static const int tcg_target_reg_alloc_order[] = {
TCG_REG_V1
};
-static const int tcg_target_call_iarg_regs[4] = {
+static const TCGReg tcg_target_call_iarg_regs[4] = {
TCG_REG_A0,
TCG_REG_A1,
TCG_REG_A2,
TCG_REG_A3
};
-static const int tcg_target_call_oarg_regs[2] = {
+static const TCGReg tcg_target_call_oarg_regs[2] = {
TCG_REG_V0,
TCG_REG_V1
};
@@ -185,12 +185,6 @@ static void patch_reloc(uint8_t *code_ptr, int type,
}
}
-/* maximum number of register used for input function arguments */
-static inline int tcg_target_get_call_iarg_regs_count(int flags)
-{
- return 4;
-}
-
/* parse target specific constraints */
static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
{
@@ -217,7 +211,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
tcg_regset_set(ct->u.regs, 0xffffffff);
#if defined(CONFIG_SOFTMMU)
tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0);
-# if defined(CONFIG_TCG_PASS_AREG0) && (TARGET_LONG_BITS == 64)
+# if (TARGET_LONG_BITS == 64)
tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2);
# endif
#endif
@@ -227,12 +221,11 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
tcg_regset_set(ct->u.regs, 0xffffffff);
tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0);
#if defined(CONFIG_SOFTMMU)
-# if (defined(CONFIG_TCG_PASS_AREG0) && TARGET_LONG_BITS == 32) || \
- (!defined(CONFIG_TCG_PASS_AREG0) && TARGET_LONG_BITS == 64)
+# if (TARGET_LONG_BITS == 32)
tcg_regset_reset_reg(ct->u.regs, TCG_REG_A1);
# endif
tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2);
-# if defined(CONFIG_TCG_PASS_AREG0) && TARGET_LONG_BITS == 64
+# if TARGET_LONG_BITS == 64
tcg_regset_reset_reg(ct->u.regs, TCG_REG_A3);
# endif
#endif
@@ -279,6 +272,8 @@ static inline int tcg_target_const_match(tcg_target_long val,
enum {
OPC_BEQ = 0x04 << 26,
OPC_BNE = 0x05 << 26,
+ OPC_BLEZ = 0x06 << 26,
+ OPC_BGTZ = 0x07 << 26,
OPC_ADDIU = 0x09 << 26,
OPC_SLTI = 0x0A << 26,
OPC_SLTIU = 0x0B << 26,
@@ -299,12 +294,16 @@ enum {
OPC_SPECIAL = 0x00 << 26,
OPC_SLL = OPC_SPECIAL | 0x00,
OPC_SRL = OPC_SPECIAL | 0x02,
+ OPC_ROTR = OPC_SPECIAL | (0x01 << 21) | 0x02,
OPC_SRA = OPC_SPECIAL | 0x03,
OPC_SLLV = OPC_SPECIAL | 0x04,
OPC_SRLV = OPC_SPECIAL | 0x06,
+ OPC_ROTRV = OPC_SPECIAL | (0x01 << 6) | 0x06,
OPC_SRAV = OPC_SPECIAL | 0x07,
OPC_JR = OPC_SPECIAL | 0x08,
OPC_JALR = OPC_SPECIAL | 0x09,
+ OPC_MOVZ = OPC_SPECIAL | 0x0A,
+ OPC_MOVN = OPC_SPECIAL | 0x0B,
OPC_MFHI = OPC_SPECIAL | 0x10,
OPC_MFLO = OPC_SPECIAL | 0x12,
OPC_MULT = OPC_SPECIAL | 0x18,
@@ -320,7 +319,16 @@ enum {
OPC_SLT = OPC_SPECIAL | 0x2A,
OPC_SLTU = OPC_SPECIAL | 0x2B,
+ OPC_REGIMM = 0x01 << 26,
+ OPC_BLTZ = OPC_REGIMM | (0x00 << 16),
+ OPC_BGEZ = OPC_REGIMM | (0x01 << 16),
+
+ OPC_SPECIAL2 = 0x1c << 26,
+ OPC_MUL = OPC_SPECIAL2 | 0x002,
+
OPC_SPECIAL3 = 0x1f << 26,
+ OPC_INS = OPC_SPECIAL3 | 0x004,
+ OPC_WSBH = OPC_SPECIAL3 | 0x0a0,
OPC_SEB = OPC_SPECIAL3 | 0x420,
OPC_SEH = OPC_SPECIAL3 | 0x620,
};
@@ -328,7 +336,8 @@ enum {
/*
* Type reg
*/
-static inline void tcg_out_opc_reg(TCGContext *s, int opc, int rd, int rs, int rt)
+static inline void tcg_out_opc_reg(TCGContext *s, int opc,
+ TCGReg rd, TCGReg rs, TCGReg rt)
{
int32_t inst;
@@ -342,7 +351,8 @@ static inline void tcg_out_opc_reg(TCGContext *s, int opc, int rd, int rs, int r
/*
* Type immediate
*/
-static inline void tcg_out_opc_imm(TCGContext *s, int opc, int rt, int rs, int imm)
+static inline void tcg_out_opc_imm(TCGContext *s, int opc,
+ TCGReg rt, TCGReg rs, TCGArg imm)
{
int32_t inst;
@@ -356,7 +366,8 @@ static inline void tcg_out_opc_imm(TCGContext *s, int opc, int rt, int rs, int i
/*
* Type branch
*/
-static inline void tcg_out_opc_br(TCGContext *s, int opc, int rt, int rs)
+static inline void tcg_out_opc_br(TCGContext *s, int opc,
+ TCGReg rt, TCGReg rs)
{
/* We pay attention here to not modify the branch target by reading
the existing value and using it again. This ensure that caches and
@@ -369,7 +380,8 @@ static inline void tcg_out_opc_br(TCGContext *s, int opc, int rt, int rs)
/*
* Type sa
*/
-static inline void tcg_out_opc_sa(TCGContext *s, int opc, int rd, int rt, int sa)
+static inline void tcg_out_opc_sa(TCGContext *s, int opc,
+ TCGReg rd, TCGReg rt, TCGArg sa)
{
int32_t inst;
@@ -408,38 +420,47 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type,
}
}
-static inline void tcg_out_bswap16(TCGContext *s, int ret, int arg)
+static inline void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg)
{
+#if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+ tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg);
+#else
/* ret and arg can't be register at */
if (ret == TCG_REG_AT || arg == TCG_REG_AT) {
tcg_abort();
}
tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8);
- tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_AT, TCG_REG_AT, 0x00ff);
-
tcg_out_opc_sa(s, OPC_SLL, ret, arg, 8);
tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xff00);
tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT);
+#endif
}
-static inline void tcg_out_bswap16s(TCGContext *s, int ret, int arg)
+static inline void tcg_out_bswap16s(TCGContext *s, TCGReg ret, TCGReg arg)
{
+#if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+ tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg);
+ tcg_out_opc_reg(s, OPC_SEH, ret, 0, ret);
+#else
/* ret and arg can't be register at */
if (ret == TCG_REG_AT || arg == TCG_REG_AT) {
tcg_abort();
}
tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8);
- tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_AT, TCG_REG_AT, 0xff);
-
tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24);
tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16);
tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT);
+#endif
}
-static inline void tcg_out_bswap32(TCGContext *s, int ret, int arg)
+static inline void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg)
{
+#if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+ tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg);
+ tcg_out_opc_sa(s, OPC_ROTR, ret, ret, 16);
+#else
/* ret and arg must be different and can't be register at */
if (ret == arg || ret == TCG_REG_AT || arg == TCG_REG_AT) {
tcg_abort();
@@ -457,11 +478,12 @@ static inline void tcg_out_bswap32(TCGContext *s, int ret, int arg)
tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8);
tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_AT, TCG_REG_AT, 0xff00);
tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT);
+#endif
}
-static inline void tcg_out_ext8s(TCGContext *s, int ret, int arg)
+static inline void tcg_out_ext8s(TCGContext *s, TCGReg ret, TCGReg arg)
{
-#ifdef _MIPS_ARCH_MIPS32R2
+#if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
tcg_out_opc_reg(s, OPC_SEB, ret, 0, arg);
#else
tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24);
@@ -469,9 +491,9 @@ static inline void tcg_out_ext8s(TCGContext *s, int ret, int arg)
#endif
}
-static inline void tcg_out_ext16s(TCGContext *s, int ret, int arg)
+static inline void tcg_out_ext16s(TCGContext *s, TCGReg ret, TCGReg arg)
{
-#ifdef _MIPS_ARCH_MIPS32R2
+#if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
tcg_out_opc_reg(s, OPC_SEH, ret, 0, arg);
#else
tcg_out_opc_sa(s, OPC_SLL, ret, arg, 16);
@@ -479,8 +501,8 @@ static inline void tcg_out_ext16s(TCGContext *s, int ret, int arg)
#endif
}
-static inline void tcg_out_ldst(TCGContext *s, int opc, int arg,
- int arg1, tcg_target_long arg2)
+static inline void tcg_out_ldst(TCGContext *s, int opc, TCGArg arg,
+ TCGReg arg1, TCGArg arg2)
{
if (arg2 == (int16_t) arg2) {
tcg_out_opc_imm(s, opc, arg, arg1, arg2);
@@ -503,7 +525,7 @@ static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
tcg_out_ldst(s, OPC_SW, arg, arg1, arg2);
}
-static inline void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
+static inline void tcg_out_addi(TCGContext *s, TCGReg reg, TCGArg val)
{
if (val == (int16_t)val) {
tcg_out_opc_imm(s, OPC_ADDIU, reg, reg, val);
@@ -544,7 +566,7 @@ DEFINE_TCG_OUT_CALL_IARG(tcg_out_call_iarg_reg16, TCGReg arg)
#undef DEFINE_TCG_OUT_CALL_IARG_GET_ARG
#define DEFINE_TCG_OUT_CALL_IARG_GET_ARG(A) \
tcg_out_movi(s, TCG_TYPE_I32, A, arg);
-DEFINE_TCG_OUT_CALL_IARG(tcg_out_call_iarg_imm32, uint32_t arg)
+DEFINE_TCG_OUT_CALL_IARG(tcg_out_call_iarg_imm32, TCGArg arg)
#undef DEFINE_TCG_OUT_CALL_IARG_GET_ARG
/* We don't use the macro for this one to avoid an unnecessary reg-reg
@@ -574,8 +596,8 @@ static inline void tcg_out_call_iarg_reg64(TCGContext *s, int *arg_num,
#endif
}
-static void tcg_out_brcond(TCGContext *s, TCGCond cond, int arg1,
- int arg2, int label_index)
+static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGArg arg1,
+ TCGArg arg2, int label_index)
{
TCGLabel *l = &s->labels[label_index];
@@ -587,32 +609,48 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, int arg1,
tcg_out_opc_br(s, OPC_BNE, arg1, arg2);
break;
case TCG_COND_LT:
- tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg1, arg2);
- tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO);
+ if (arg2 == 0) {
+ tcg_out_opc_br(s, OPC_BLTZ, 0, arg1);
+ } else {
+ tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg1, arg2);
+ tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO);
+ }
break;
case TCG_COND_LTU:
tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg1, arg2);
tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO);
break;
case TCG_COND_GE:
- tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg1, arg2);
- tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO);
+ if (arg2 == 0) {
+ tcg_out_opc_br(s, OPC_BGEZ, 0, arg1);
+ } else {
+ tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg1, arg2);
+ tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO);
+ }
break;
case TCG_COND_GEU:
tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg1, arg2);
tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO);
break;
case TCG_COND_LE:
- tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg2, arg1);
- tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO);
+ if (arg2 == 0) {
+ tcg_out_opc_br(s, OPC_BLEZ, 0, arg1);
+ } else {
+ tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg2, arg1);
+ tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO);
+ }
break;
case TCG_COND_LEU:
tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg2, arg1);
tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO);
break;
case TCG_COND_GT:
- tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg2, arg1);
- tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO);
+ if (arg2 == 0) {
+ tcg_out_opc_br(s, OPC_BGTZ, 0, arg1);
+ } else {
+ tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg2, arg1);
+ tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO);
+ }
break;
case TCG_COND_GTU:
tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg2, arg1);
@@ -632,8 +670,9 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, int arg1,
/* XXX: we implement it at the target level to avoid having to
handle cross basic blocks temporaries */
-static void tcg_out_brcond2(TCGContext *s, TCGCond cond, int arg1,
- int arg2, int arg3, int arg4, int label_index)
+static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGArg arg1,
+ TCGArg arg2, TCGArg arg3, TCGArg arg4,
+ int label_index)
{
void *label_ptr;
@@ -695,8 +734,70 @@ static void tcg_out_brcond2(TCGContext *s, TCGCond cond, int arg1,
reloc_pc16(label_ptr, (tcg_target_long) s->code_ptr);
}
-static void tcg_out_setcond(TCGContext *s, TCGCond cond, int ret,
- int arg1, int arg2)
+static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret,
+ TCGArg c1, TCGArg c2, TCGArg v)
+{
+ switch (cond) {
+ case TCG_COND_EQ:
+ if (c1 == 0) {
+ tcg_out_opc_reg(s, OPC_MOVZ, ret, v, c2);
+ } else if (c2 == 0) {
+ tcg_out_opc_reg(s, OPC_MOVZ, ret, v, c1);
+ } else {
+ tcg_out_opc_reg(s, OPC_XOR, TCG_REG_AT, c1, c2);
+ tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT);
+ }
+ break;
+ case TCG_COND_NE:
+ if (c1 == 0) {
+ tcg_out_opc_reg(s, OPC_MOVN, ret, v, c2);
+ } else if (c2 == 0) {
+ tcg_out_opc_reg(s, OPC_MOVN, ret, v, c1);
+ } else {
+ tcg_out_opc_reg(s, OPC_XOR, TCG_REG_AT, c1, c2);
+ tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT);
+ }
+ break;
+ case TCG_COND_LT:
+ tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c1, c2);
+ tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT);
+ break;
+ case TCG_COND_LTU:
+ tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c1, c2);
+ tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT);
+ break;
+ case TCG_COND_GE:
+ tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c1, c2);
+ tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT);
+ break;
+ case TCG_COND_GEU:
+ tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c1, c2);
+ tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT);
+ break;
+ case TCG_COND_LE:
+ tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c2, c1);
+ tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT);
+ break;
+ case TCG_COND_LEU:
+ tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c2, c1);
+ tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT);
+ break;
+ case TCG_COND_GT:
+ tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c2, c1);
+ tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT);
+ break;
+ case TCG_COND_GTU:
+ tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c2, c1);
+ tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT);
+ break;
+ default:
+ tcg_abort();
+ break;
+ }
+}
+
+static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
+ TCGArg arg1, TCGArg arg2)
{
switch (cond) {
case TCG_COND_EQ:
@@ -755,8 +856,8 @@ static void tcg_out_setcond(TCGContext *s, TCGCond cond, int ret,
/* XXX: we implement it at the target level to avoid having to
handle cross basic blocks temporaries */
-static void tcg_out_setcond2(TCGContext *s, TCGCond cond, int ret,
- int arg1, int arg2, int arg3, int arg4)
+static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret,
+ TCGArg arg1, TCGArg arg2, TCGArg arg3, TCGArg arg4)
{
switch (cond) {
case TCG_COND_EQ:
@@ -821,7 +922,6 @@ static void tcg_out_setcond2(TCGContext *s, TCGCond cond, int ret,
#include "../../softmmu_defs.h"
-#ifdef CONFIG_TCG_PASS_AREG0
/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
int mmu_idx) */
static const void * const qemu_ld_helpers[4] = {
@@ -839,42 +939,22 @@ static const void * const qemu_st_helpers[4] = {
helper_stl_mmu,
helper_stq_mmu,
};
-#else
-/* legacy helper signature: __ld_mmu(target_ulong addr, int
- mmu_idx) */
-static void *qemu_ld_helpers[4] = {
- __ldb_mmu,
- __ldw_mmu,
- __ldl_mmu,
- __ldq_mmu,
-};
-
-/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
- int mmu_idx) */
-static void *qemu_st_helpers[4] = {
- __stb_mmu,
- __stw_mmu,
- __stl_mmu,
- __stq_mmu,
-};
-#endif
#endif
static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
int opc)
{
- int addr_regl, addr_meml;
- int data_regl, data_regh, data_reg1, data_reg2;
- int mem_index, s_bits;
+ TCGReg addr_regl, data_regl, data_regh, data_reg1, data_reg2;
#if defined(CONFIG_SOFTMMU)
void *label1_ptr, *label2_ptr;
int arg_num;
-#endif
-#if TARGET_LONG_BITS == 64
-# if defined(CONFIG_SOFTMMU)
+ int mem_index, s_bits;
+ int addr_meml;
+# if TARGET_LONG_BITS == 64
uint8_t *label3_ptr;
+ TCGReg addr_regh;
+ int addr_memh;
# endif
- int addr_regh, addr_memh;
#endif
data_regl = *args++;
if (opc == 3)
@@ -882,11 +962,22 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
else
data_regh = 0;
addr_regl = *args++;
-#if TARGET_LONG_BITS == 64
+#if defined(CONFIG_SOFTMMU)
+# if TARGET_LONG_BITS == 64
addr_regh = *args++;
-#endif
+# if defined(TCG_TARGET_WORDS_BIGENDIAN)
+ addr_memh = 0;
+ addr_meml = 4;
+# else
+ addr_memh = 4;
+ addr_meml = 0;
+# endif
+# else
+ addr_meml = 0;
+# endif
mem_index = *args;
s_bits = opc & 3;
+#endif
if (opc == 3) {
#if defined(TCG_TARGET_WORDS_BIGENDIAN)
@@ -900,18 +991,6 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
data_reg1 = data_regl;
data_reg2 = 0;
}
-#if TARGET_LONG_BITS == 64
-# if defined(TCG_TARGET_WORDS_BIGENDIAN)
- addr_memh = 0;
- addr_meml = 4;
-# else
- addr_memh = 4;
- addr_meml = 0;
-# endif
-#else
- addr_meml = 0;
-#endif
-
#if defined(CONFIG_SOFTMMU)
tcg_out_opc_sa(s, OPC_SRL, TCG_REG_A0, addr_regl, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_A0, TCG_REG_A0, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
@@ -942,9 +1021,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
/* slow path */
arg_num = 0;
-# ifdef CONFIG_TCG_PASS_AREG0
tcg_out_call_iarg_reg32(s, &arg_num, TCG_AREG0);
-# endif
# if TARGET_LONG_BITS == 64
tcg_out_call_iarg_reg64(s, &arg_num, addr_regl, addr_regh);
# else
@@ -1052,50 +1129,56 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
int opc)
{
- int addr_regl, addr_meml;
- int data_regl, data_regh, data_reg1, data_reg2;
- int mem_index, s_bits;
+ TCGReg addr_regl, data_regl, data_regh, data_reg1, data_reg2;
#if defined(CONFIG_SOFTMMU)
uint8_t *label1_ptr, *label2_ptr;
int arg_num;
+ int mem_index, s_bits;
+ int addr_meml;
#endif
#if TARGET_LONG_BITS == 64
# if defined(CONFIG_SOFTMMU)
uint8_t *label3_ptr;
+ TCGReg addr_regh;
+ int addr_memh;
# endif
- int addr_regh, addr_memh;
#endif
-
data_regl = *args++;
if (opc == 3) {
data_regh = *args++;
-#if defined(TCG_TARGET_WORDS_BIGENDIAN)
- data_reg1 = data_regh;
- data_reg2 = data_regl;
-#else
- data_reg1 = data_regl;
- data_reg2 = data_regh;
-#endif
} else {
- data_reg1 = data_regl;
- data_reg2 = 0;
data_regh = 0;
}
addr_regl = *args++;
-#if TARGET_LONG_BITS == 64
+#if defined(CONFIG_SOFTMMU)
+# if TARGET_LONG_BITS == 64
addr_regh = *args++;
-# if defined(TCG_TARGET_WORDS_BIGENDIAN)
+# if defined(TCG_TARGET_WORDS_BIGENDIAN)
addr_memh = 0;
addr_meml = 4;
-# else
+# else
addr_memh = 4;
addr_meml = 0;
-# endif
-#else
+# endif
+# else
addr_meml = 0;
-#endif
+# endif
mem_index = *args;
s_bits = opc;
+#endif
+
+ if (opc == 3) {
+#if defined(TCG_TARGET_WORDS_BIGENDIAN)
+ data_reg1 = data_regh;
+ data_reg2 = data_regl;
+#else
+ data_reg1 = data_regl;
+ data_reg2 = data_regh;
+#endif
+ } else {
+ data_reg1 = data_regl;
+ data_reg2 = 0;
+ }
#if defined(CONFIG_SOFTMMU)
tcg_out_opc_sa(s, OPC_SRL, TCG_REG_A0, addr_regl, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
@@ -1127,9 +1210,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
/* slow path */
arg_num = 0;
-# ifdef CONFIG_TCG_PASS_AREG0
tcg_out_call_iarg_reg32(s, &arg_num, TCG_AREG0);
-# endif
# if TARGET_LONG_BITS == 64
tcg_out_call_iarg_reg64(s, &arg_num, addr_regl, addr_regh);
# else
@@ -1182,7 +1263,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
break;
case 1:
if (TCG_NEED_BSWAP) {
- tcg_out_bswap16(s, TCG_REG_T0, data_reg1);
+ tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_T0, data_reg1, 0xffff);
+ tcg_out_bswap16(s, TCG_REG_T0, TCG_REG_T0);
tcg_out_opc_imm(s, OPC_SH, TCG_REG_T0, TCG_REG_A0, 0);
} else {
tcg_out_opc_imm(s, OPC_SH, data_reg1, TCG_REG_A0, 0);
@@ -1243,10 +1325,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, args[0], 0);
tcg_out_nop(s);
break;
- case INDEX_op_jmp:
- tcg_out_opc_reg(s, OPC_JR, 0, args[0], 0);
- tcg_out_nop(s);
- break;
case INDEX_op_br:
tcg_out_brcond(s, TCG_COND_EQ, TCG_REG_ZERO, TCG_REG_ZERO, args[0]);
break;
@@ -1328,8 +1406,12 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_mov(s, TCG_TYPE_I32, args[0], TCG_REG_AT);
break;
case INDEX_op_mul_i32:
+#if defined(__mips_isa_rev) && (__mips_isa_rev >= 1)
+ tcg_out_opc_reg(s, OPC_MUL, args[0], args[1], args[2]);
+#else
tcg_out_opc_reg(s, OPC_MULT, 0, args[1], args[2]);
tcg_out_opc_reg(s, OPC_MFLO, args[0], 0, 0);
+#endif
break;
case INDEX_op_mulu2_i32:
tcg_out_opc_reg(s, OPC_MULTU, 0, args[2], args[3]);
@@ -1402,6 +1484,31 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_opc_reg(s, OPC_SRLV, args[0], args[2], args[1]);
}
break;
+ case INDEX_op_rotl_i32:
+ if (const_args[2]) {
+ tcg_out_opc_sa(s, OPC_ROTR, args[0], args[1], 0x20 - args[2]);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_AT, 32);
+ tcg_out_opc_reg(s, OPC_SUBU, TCG_REG_AT, TCG_REG_AT, args[2]);
+ tcg_out_opc_reg(s, OPC_ROTRV, args[0], TCG_REG_AT, args[1]);
+ }
+ break;
+ case INDEX_op_rotr_i32:
+ if (const_args[2]) {
+ tcg_out_opc_sa(s, OPC_ROTR, args[0], args[1], args[2]);
+ } else {
+ tcg_out_opc_reg(s, OPC_ROTRV, args[0], args[2], args[1]);
+ }
+ break;
+
+ /* The bswap routines do not work on non-R2 CPU. In that case
+ we let TCG generating the corresponding code. */
+ case INDEX_op_bswap16_i32:
+ tcg_out_bswap16(s, args[0], args[1]);
+ break;
+ case INDEX_op_bswap32_i32:
+ tcg_out_bswap32(s, args[0], args[1]);
+ break;
case INDEX_op_ext8s_i32:
tcg_out_ext8s(s, args[0], args[1]);
@@ -1410,6 +1517,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_ext16s(s, args[0], args[1]);
break;
+ case INDEX_op_deposit_i32:
+ tcg_out_opc_imm(s, OPC_INS, args[0], args[2],
+ ((args[3] + args[4] - 1) << 11) | (args[3] << 6));
+ break;
+
case INDEX_op_brcond_i32:
tcg_out_brcond(s, args[2], args[0], args[1], args[3]);
break;
@@ -1417,6 +1529,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
tcg_out_brcond2(s, args[4], args[0], args[1], args[2], args[3], args[5]);
break;
+ case INDEX_op_movcond_i32:
+ tcg_out_movcond(s, args[5], args[0], args[1], args[2], args[3]);
+ break;
+
case INDEX_op_setcond_i32:
tcg_out_setcond(s, args[3], args[0], args[1], args[2]);
break;
@@ -1464,7 +1580,6 @@ static const TCGTargetOpDef mips_op_defs[] = {
{ INDEX_op_exit_tb, { } },
{ INDEX_op_goto_tb, { } },
{ INDEX_op_call, { "C" } },
- { INDEX_op_jmp, { "r" } },
{ INDEX_op_br, { } },
{ INDEX_op_mov_i32, { "r", "r" } },
@@ -1478,34 +1593,42 @@ static const TCGTargetOpDef mips_op_defs[] = {
{ INDEX_op_st16_i32, { "rZ", "r" } },
{ INDEX_op_st_i32, { "rZ", "r" } },
- { INDEX_op_add_i32, { "r", "rZ", "rJZ" } },
+ { INDEX_op_add_i32, { "r", "rZ", "rJ" } },
{ INDEX_op_mul_i32, { "r", "rZ", "rZ" } },
{ INDEX_op_mulu2_i32, { "r", "r", "rZ", "rZ" } },
{ INDEX_op_div_i32, { "r", "rZ", "rZ" } },
{ INDEX_op_divu_i32, { "r", "rZ", "rZ" } },
{ INDEX_op_rem_i32, { "r", "rZ", "rZ" } },
{ INDEX_op_remu_i32, { "r", "rZ", "rZ" } },
- { INDEX_op_sub_i32, { "r", "rZ", "rJZ" } },
+ { INDEX_op_sub_i32, { "r", "rZ", "rJ" } },
- { INDEX_op_and_i32, { "r", "rZ", "rIZ" } },
+ { INDEX_op_and_i32, { "r", "rZ", "rI" } },
{ INDEX_op_nor_i32, { "r", "rZ", "rZ" } },
{ INDEX_op_not_i32, { "r", "rZ" } },
{ INDEX_op_or_i32, { "r", "rZ", "rIZ" } },
{ INDEX_op_xor_i32, { "r", "rZ", "rIZ" } },
- { INDEX_op_shl_i32, { "r", "rZ", "riZ" } },
- { INDEX_op_shr_i32, { "r", "rZ", "riZ" } },
- { INDEX_op_sar_i32, { "r", "rZ", "riZ" } },
+ { INDEX_op_shl_i32, { "r", "rZ", "ri" } },
+ { INDEX_op_shr_i32, { "r", "rZ", "ri" } },
+ { INDEX_op_sar_i32, { "r", "rZ", "ri" } },
+ { INDEX_op_rotr_i32, { "r", "rZ", "ri" } },
+ { INDEX_op_rotl_i32, { "r", "rZ", "ri" } },
+
+ { INDEX_op_bswap16_i32, { "r", "r" } },
+ { INDEX_op_bswap32_i32, { "r", "r" } },
{ INDEX_op_ext8s_i32, { "r", "rZ" } },
{ INDEX_op_ext16s_i32, { "r", "rZ" } },
+ { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
+
{ INDEX_op_brcond_i32, { "rZ", "rZ" } },
+ { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rZ", "0" } },
{ INDEX_op_setcond_i32, { "r", "rZ", "rZ" } },
{ INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rZ", "rZ" } },
- { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJZ", "rJZ" } },
- { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJZ", "rJZ" } },
+ { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } },
+ { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } },
{ INDEX_op_brcond2_i32, { "rZ", "rZ", "rZ", "rZ" } },
#if TARGET_LONG_BITS == 32
@@ -1545,7 +1668,6 @@ static int tcg_target_callee_save_regs[] = {
TCG_REG_S5,
TCG_REG_S6,
TCG_REG_S7,
- TCG_REG_GP,
TCG_REG_FP,
TCG_REG_RA, /* should be last for ABI compliance */
};
@@ -1555,11 +1677,15 @@ static void tcg_target_qemu_prologue(TCGContext *s)
{
int i, frame_size;
- /* reserve some stack space */
+ /* reserve some stack space, also for TCG temps. */
frame_size = ARRAY_SIZE(tcg_target_callee_save_regs) * 4
- + TCG_STATIC_CALL_ARGS_SIZE;
+ + TCG_STATIC_CALL_ARGS_SIZE
+ + CPU_TEMP_BUF_NLONGS * sizeof(long);
frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
~(TCG_TARGET_STACK_ALIGN - 1);
+ tcg_set_frame(s, TCG_REG_SP, ARRAY_SIZE(tcg_target_callee_save_regs) * 4
+ + TCG_STATIC_CALL_ARGS_SIZE,
+ CPU_TEMP_BUF_NLONGS * sizeof(long));
/* TB prologue */
tcg_out_addi(s, TCG_REG_SP, -frame_size);
@@ -1611,8 +1737,7 @@ static void tcg_target_init(TCGContext *s)
tcg_regset_set_reg(s->reserved_regs, TCG_REG_T0); /* internal use */
tcg_regset_set_reg(s->reserved_regs, TCG_REG_RA); /* return address */
tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); /* stack pointer */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP); /* global pointer */
tcg_add_target_add_op_defs(mips_op_defs);
- tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf),
- CPU_TEMP_BUF_NLONGS * sizeof(long));
}
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index d3c804d..65b5c59 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -80,28 +80,42 @@ typedef enum {
#define TCG_TARGET_HAS_div_i32 1
#define TCG_TARGET_HAS_not_i32 1
#define TCG_TARGET_HAS_nor_i32 1
-#define TCG_TARGET_HAS_rot_i32 0
#define TCG_TARGET_HAS_ext8s_i32 1
#define TCG_TARGET_HAS_ext16s_i32 1
-#define TCG_TARGET_HAS_bswap32_i32 0
-#define TCG_TARGET_HAS_bswap16_i32 0
#define TCG_TARGET_HAS_andc_i32 0
#define TCG_TARGET_HAS_orc_i32 0
#define TCG_TARGET_HAS_eqv_i32 0
#define TCG_TARGET_HAS_nand_i32 0
+
+/* optional instructions only implemented on MIPS4, MIPS32 and Loongson 2 */
+#if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \
+ defined(_MIPS_ARCH_LOONGSON2E) || defined(_MIPS_ARCH_LOONGSON2F) || \
+ defined(_MIPS_ARCH_MIPS4)
+#define TCG_TARGET_HAS_movcond_i32 1
+#else
+#define TCG_TARGET_HAS_movcond_i32 0
+#endif
+
+/* optional instructions only implemented on MIPS32R2 */
+#if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+#define TCG_TARGET_HAS_bswap16_i32 1
+#define TCG_TARGET_HAS_bswap32_i32 1
+#define TCG_TARGET_HAS_rot_i32 1
+#define TCG_TARGET_HAS_deposit_i32 1
+#else
+#define TCG_TARGET_HAS_bswap16_i32 0
+#define TCG_TARGET_HAS_bswap32_i32 0
+#define TCG_TARGET_HAS_rot_i32 0
#define TCG_TARGET_HAS_deposit_i32 0
+#endif
/* optional instructions automatically implemented */
#define TCG_TARGET_HAS_neg_i32 0 /* sub rd, zero, rt */
#define TCG_TARGET_HAS_ext8u_i32 0 /* andi rt, rs, 0xff */
#define TCG_TARGET_HAS_ext16u_i32 0 /* andi rt, rs, 0xffff */
-/* Note: must be synced with dyngen-exec.h */
#define TCG_AREG0 TCG_REG_S0
-/* guest base is supported */
-#define TCG_TARGET_HAS_GUEST_BASE
-
#ifdef __OpenBSD__
#include <machine/sysarch.h>
#else
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 9c65474..9109b81 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -39,8 +39,6 @@ typedef enum {
TCG_TEMP_UNDEF = 0,
TCG_TEMP_CONST,
TCG_TEMP_COPY,
- TCG_TEMP_HAS_COPY,
- TCG_TEMP_ANY
} tcg_temp_state;
struct tcg_temp_info {
@@ -52,39 +50,19 @@ struct tcg_temp_info {
static struct tcg_temp_info temps[TCG_MAX_TEMPS];
-/* Reset TEMP's state to TCG_TEMP_ANY. If TEMP was a representative of some
- class of equivalent temp's, a new representative should be chosen in this
- class. */
-static void reset_temp(TCGArg temp, int nb_temps, int nb_globals)
+/* Reset TEMP's state to TCG_TEMP_UNDEF. If TEMP only had one copy, remove
+ the copy flag from the left temp. */
+static void reset_temp(TCGArg temp)
{
- int i;
- TCGArg new_base = (TCGArg)-1;
- if (temps[temp].state == TCG_TEMP_HAS_COPY) {
- for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) {
- if (i >= nb_globals) {
- temps[i].state = TCG_TEMP_HAS_COPY;
- new_base = i;
- break;
- }
+ if (temps[temp].state == TCG_TEMP_COPY) {
+ if (temps[temp].prev_copy == temps[temp].next_copy) {
+ temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF;
+ } else {
+ temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
+ temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
}
- for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) {
- if (new_base == (TCGArg)-1) {
- temps[i].state = TCG_TEMP_ANY;
- } else {
- temps[i].val = new_base;
- }
- }
- temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
- temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
- } else if (temps[temp].state == TCG_TEMP_COPY) {
- temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
- temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
- new_base = temps[temp].val;
- }
- temps[temp].state = TCG_TEMP_ANY;
- if (new_base != (TCGArg)-1 && temps[new_base].next_copy == new_base) {
- temps[new_base].state = TCG_TEMP_ANY;
}
+ temps[temp].state = TCG_TEMP_UNDEF;
}
static int op_bits(TCGOpcode op)
@@ -107,36 +85,83 @@ static TCGOpcode op_to_movi(TCGOpcode op)
}
}
-static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, TCGArg dst,
- TCGArg src, int nb_temps, int nb_globals)
+static TCGArg find_better_copy(TCGContext *s, TCGArg temp)
+{
+ TCGArg i;
+
+ /* If this is already a global, we can't do better. */
+ if (temp < s->nb_globals) {
+ return temp;
+ }
+
+ /* Search for a global first. */
+ for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
+ if (i < s->nb_globals) {
+ return i;
+ }
+ }
+
+ /* If it is a temp, search for a temp local. */
+ if (!s->temps[temp].temp_local) {
+ for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
+ if (s->temps[i].temp_local) {
+ return i;
+ }
+ }
+ }
+
+ /* Failure to find a better representation, return the same temp. */
+ return temp;
+}
+
+static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
+{
+ TCGArg i;
+
+ if (arg1 == arg2) {
+ return true;
+ }
+
+ if (temps[arg1].state != TCG_TEMP_COPY
+ || temps[arg2].state != TCG_TEMP_COPY) {
+ return false;
+ }
+
+ for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) {
+ if (i == arg2) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args,
+ TCGArg dst, TCGArg src)
{
- reset_temp(dst, nb_temps, nb_globals);
- assert(temps[src].state != TCG_TEMP_COPY);
- /* Don't try to copy if one of temps is a global or either one
- is local and another is register */
- if (src >= nb_globals && dst >= nb_globals &&
- tcg_arg_is_local(s, src) == tcg_arg_is_local(s, dst)) {
- assert(temps[src].state != TCG_TEMP_CONST);
- if (temps[src].state != TCG_TEMP_HAS_COPY) {
- temps[src].state = TCG_TEMP_HAS_COPY;
+ reset_temp(dst);
+ assert(temps[src].state != TCG_TEMP_CONST);
+
+ if (s->temps[src].type == s->temps[dst].type) {
+ if (temps[src].state != TCG_TEMP_COPY) {
+ temps[src].state = TCG_TEMP_COPY;
temps[src].next_copy = src;
temps[src].prev_copy = src;
}
temps[dst].state = TCG_TEMP_COPY;
- temps[dst].val = src;
temps[dst].next_copy = temps[src].next_copy;
temps[dst].prev_copy = src;
temps[temps[dst].next_copy].prev_copy = dst;
temps[src].next_copy = dst;
}
+
gen_args[0] = dst;
gen_args[1] = src;
}
-static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val,
- int nb_temps, int nb_globals)
+static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val)
{
- reset_temp(dst, nb_temps, nb_globals);
+ reset_temp(dst);
temps[dst].state = TCG_TEMP_CONST;
temps[dst].val = val;
gen_args[0] = dst;
@@ -267,6 +292,179 @@ static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
return res;
}
+static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
+{
+ switch (c) {
+ case TCG_COND_EQ:
+ return x == y;
+ case TCG_COND_NE:
+ return x != y;
+ case TCG_COND_LT:
+ return (int32_t)x < (int32_t)y;
+ case TCG_COND_GE:
+ return (int32_t)x >= (int32_t)y;
+ case TCG_COND_LE:
+ return (int32_t)x <= (int32_t)y;
+ case TCG_COND_GT:
+ return (int32_t)x > (int32_t)y;
+ case TCG_COND_LTU:
+ return x < y;
+ case TCG_COND_GEU:
+ return x >= y;
+ case TCG_COND_LEU:
+ return x <= y;
+ case TCG_COND_GTU:
+ return x > y;
+ default:
+ tcg_abort();
+ }
+}
+
+static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
+{
+ switch (c) {
+ case TCG_COND_EQ:
+ return x == y;
+ case TCG_COND_NE:
+ return x != y;
+ case TCG_COND_LT:
+ return (int64_t)x < (int64_t)y;
+ case TCG_COND_GE:
+ return (int64_t)x >= (int64_t)y;
+ case TCG_COND_LE:
+ return (int64_t)x <= (int64_t)y;
+ case TCG_COND_GT:
+ return (int64_t)x > (int64_t)y;
+ case TCG_COND_LTU:
+ return x < y;
+ case TCG_COND_GEU:
+ return x >= y;
+ case TCG_COND_LEU:
+ return x <= y;
+ case TCG_COND_GTU:
+ return x > y;
+ default:
+ tcg_abort();
+ }
+}
+
+static bool do_constant_folding_cond_eq(TCGCond c)
+{
+ switch (c) {
+ case TCG_COND_GT:
+ case TCG_COND_LTU:
+ case TCG_COND_LT:
+ case TCG_COND_GTU:
+ case TCG_COND_NE:
+ return 0;
+ case TCG_COND_GE:
+ case TCG_COND_GEU:
+ case TCG_COND_LE:
+ case TCG_COND_LEU:
+ case TCG_COND_EQ:
+ return 1;
+ default:
+ tcg_abort();
+ }
+}
+
+/* Return 2 if the condition can't be simplified, and the result
+ of the condition (0 or 1) if it can */
+static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
+ TCGArg y, TCGCond c)
+{
+ if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) {
+ switch (op_bits(op)) {
+ case 32:
+ return do_constant_folding_cond_32(temps[x].val, temps[y].val, c);
+ case 64:
+ return do_constant_folding_cond_64(temps[x].val, temps[y].val, c);
+ default:
+ tcg_abort();
+ }
+ } else if (temps_are_copies(x, y)) {
+ return do_constant_folding_cond_eq(c);
+ } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) {
+ switch (c) {
+ case TCG_COND_LTU:
+ return 0;
+ case TCG_COND_GEU:
+ return 1;
+ default:
+ return 2;
+ }
+ } else {
+ return 2;
+ }
+}
+
+/* Return 2 if the condition can't be simplified, and the result
+ of the condition (0 or 1) if it can */
+static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
+{
+ TCGArg al = p1[0], ah = p1[1];
+ TCGArg bl = p2[0], bh = p2[1];
+
+ if (temps[bl].state == TCG_TEMP_CONST
+ && temps[bh].state == TCG_TEMP_CONST) {
+ uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val;
+
+ if (temps[al].state == TCG_TEMP_CONST
+ && temps[ah].state == TCG_TEMP_CONST) {
+ uint64_t a;
+ a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val;
+ return do_constant_folding_cond_64(a, b, c);
+ }
+ if (b == 0) {
+ switch (c) {
+ case TCG_COND_LTU:
+ return 0;
+ case TCG_COND_GEU:
+ return 1;
+ default:
+ break;
+ }
+ }
+ }
+ if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) {
+ return do_constant_folding_cond_eq(c);
+ }
+ return 2;
+}
+
+static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
+{
+ TCGArg a1 = *p1, a2 = *p2;
+ int sum = 0;
+ sum += temps[a1].state == TCG_TEMP_CONST;
+ sum -= temps[a2].state == TCG_TEMP_CONST;
+
+ /* Prefer the constant in second argument, and then the form
+ op a, a, b, which is better handled on non-RISC hosts. */
+ if (sum > 0 || (sum == 0 && dest == a2)) {
+ *p1 = a2;
+ *p2 = a1;
+ return true;
+ }
+ return false;
+}
+
+static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
+{
+ int sum = 0;
+ sum += temps[p1[0]].state == TCG_TEMP_CONST;
+ sum += temps[p1[1]].state == TCG_TEMP_CONST;
+ sum -= temps[p2[0]].state == TCG_TEMP_CONST;
+ sum -= temps[p2[1]].state == TCG_TEMP_CONST;
+ if (sum > 0) {
+ TCGArg t;
+ t = p1[0], p1[0] = p2[0], p2[0] = t;
+ t = p1[1], p1[1] = p2[1], p2[1] = t;
+ return true;
+ }
+ return false;
+}
+
/* Propagate constants and copies, fold constant expressions. */
static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
TCGArg *args, TCGOpDef *tcg_op_defs)
@@ -276,28 +474,34 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
const TCGOpDef *def;
TCGArg *gen_args;
TCGArg tmp;
+
/* Array VALS has an element for each temp.
If this temp holds a constant then its value is kept in VALS' element.
- If this temp is a copy of other ones then this equivalence class'
- representative is kept in VALS' element.
- If this temp is neither copy nor constant then corresponding VALS'
- element is unused. */
+ If this temp is a copy of other ones then the other copies are
+ available through the doubly linked circular list. */
nb_temps = s->nb_temps;
nb_globals = s->nb_globals;
memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
- nb_ops = tcg_opc_ptr - gen_opc_buf;
+ nb_ops = tcg_opc_ptr - s->gen_opc_buf;
gen_args = args;
for (op_index = 0; op_index < nb_ops; op_index++) {
- op = gen_opc_buf[op_index];
+ op = s->gen_opc_buf[op_index];
def = &tcg_op_defs[op];
/* Do copy propagation */
- if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS))) {
- assert(op != INDEX_op_call);
+ if (op == INDEX_op_call) {
+ int nb_oargs = args[0] >> 16;
+ int nb_iargs = args[0] & 0xffff;
+ for (i = nb_oargs + 1; i < nb_oargs + nb_iargs + 1; i++) {
+ if (temps[args[i]].state == TCG_TEMP_COPY) {
+ args[i] = find_better_copy(s, args[i]);
+ }
+ }
+ } else {
for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
if (temps[args[i]].state == TCG_TEMP_COPY) {
- args[i] = temps[args[i]].val;
+ args[i] = find_better_copy(s, args[i]);
}
}
}
@@ -312,17 +516,71 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
CASE_OP_32_64(eqv):
CASE_OP_32_64(nand):
CASE_OP_32_64(nor):
- if (temps[args[1]].state == TCG_TEMP_CONST) {
- tmp = args[1];
- args[1] = args[2];
- args[2] = tmp;
+ swap_commutative(args[0], &args[1], &args[2]);
+ break;
+ CASE_OP_32_64(brcond):
+ if (swap_commutative(-1, &args[0], &args[1])) {
+ args[2] = tcg_swap_cond(args[2]);
+ }
+ break;
+ CASE_OP_32_64(setcond):
+ if (swap_commutative(args[0], &args[1], &args[2])) {
+ args[3] = tcg_swap_cond(args[3]);
+ }
+ break;
+ CASE_OP_32_64(movcond):
+ if (swap_commutative(-1, &args[1], &args[2])) {
+ args[5] = tcg_swap_cond(args[5]);
+ }
+ /* For movcond, we canonicalize the "false" input reg to match
+ the destination reg so that the tcg backend can implement
+ a "move if true" operation. */
+ if (swap_commutative(args[0], &args[4], &args[3])) {
+ args[5] = tcg_invert_cond(args[5]);
+ }
+ break;
+ case INDEX_op_add2_i32:
+ swap_commutative(args[0], &args[2], &args[4]);
+ swap_commutative(args[1], &args[3], &args[5]);
+ break;
+ case INDEX_op_mulu2_i32:
+ swap_commutative(args[0], &args[2], &args[3]);
+ break;
+ case INDEX_op_brcond2_i32:
+ if (swap_commutative2(&args[0], &args[2])) {
+ args[4] = tcg_swap_cond(args[4]);
+ }
+ break;
+ case INDEX_op_setcond2_i32:
+ if (swap_commutative2(&args[1], &args[3])) {
+ args[5] = tcg_swap_cond(args[5]);
+ }
+ break;
+ default:
+ break;
+ }
+
+ /* Simplify expressions for "shift/rot r, 0, a => movi r, 0" */
+ switch (op) {
+ CASE_OP_32_64(shl):
+ CASE_OP_32_64(shr):
+ CASE_OP_32_64(sar):
+ CASE_OP_32_64(rotl):
+ CASE_OP_32_64(rotr):
+ if (temps[args[1]].state == TCG_TEMP_CONST
+ && temps[args[1]].val == 0) {
+ s->gen_opc_buf[op_index] = op_to_movi(op);
+ tcg_opt_gen_movi(gen_args, args[0], 0);
+ args += 3;
+ gen_args += 2;
+ continue;
}
break;
default:
break;
}
- /* Simplify expression if possible. */
+ /* Simplify expression for "op r, a, 0 => mov r, a" cases */
switch (op) {
CASE_OP_32_64(add):
CASE_OP_32_64(sub):
@@ -331,50 +589,75 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
CASE_OP_32_64(sar):
CASE_OP_32_64(rotl):
CASE_OP_32_64(rotr):
+ CASE_OP_32_64(or):
+ CASE_OP_32_64(xor):
if (temps[args[1]].state == TCG_TEMP_CONST) {
/* Proceed with possible constant folding. */
break;
}
if (temps[args[2]].state == TCG_TEMP_CONST
&& temps[args[2]].val == 0) {
- if ((temps[args[0]].state == TCG_TEMP_COPY
- && temps[args[0]].val == args[1])
- || args[0] == args[1]) {
- args += 3;
- gen_opc_buf[op_index] = INDEX_op_nop;
+ if (temps_are_copies(args[0], args[1])) {
+ s->gen_opc_buf[op_index] = INDEX_op_nop;
} else {
- gen_opc_buf[op_index] = op_to_mov(op);
- tcg_opt_gen_mov(s, gen_args, args[0], args[1],
- nb_temps, nb_globals);
+ s->gen_opc_buf[op_index] = op_to_mov(op);
+ tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
gen_args += 2;
- args += 3;
}
+ args += 3;
continue;
}
break;
+ default:
+ break;
+ }
+
+ /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
+ switch (op) {
+ CASE_OP_32_64(and):
CASE_OP_32_64(mul):
if ((temps[args[2]].state == TCG_TEMP_CONST
&& temps[args[2]].val == 0)) {
- gen_opc_buf[op_index] = op_to_movi(op);
- tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals);
+ s->gen_opc_buf[op_index] = op_to_movi(op);
+ tcg_opt_gen_movi(gen_args, args[0], 0);
args += 3;
gen_args += 2;
continue;
}
break;
+ default:
+ break;
+ }
+
+ /* Simplify expression for "op r, a, a => mov r, a" cases */
+ switch (op) {
CASE_OP_32_64(or):
CASE_OP_32_64(and):
- if (args[1] == args[2]) {
- if (args[1] == args[0]) {
- args += 3;
- gen_opc_buf[op_index] = INDEX_op_nop;
+ if (temps_are_copies(args[1], args[2])) {
+ if (temps_are_copies(args[0], args[1])) {
+ s->gen_opc_buf[op_index] = INDEX_op_nop;
} else {
- gen_opc_buf[op_index] = op_to_mov(op);
- tcg_opt_gen_mov(s, gen_args, args[0], args[1], nb_temps,
- nb_globals);
+ s->gen_opc_buf[op_index] = op_to_mov(op);
+ tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
gen_args += 2;
- args += 3;
}
+ args += 3;
+ continue;
+ }
+ break;
+ default:
+ break;
+ }
+
+ /* Simplify expression for "op r, a, a => movi r, 0" cases */
+ switch (op) {
+ CASE_OP_32_64(sub):
+ CASE_OP_32_64(xor):
+ if (temps_are_copies(args[1], args[2])) {
+ s->gen_opc_buf[op_index] = op_to_movi(op);
+ tcg_opt_gen_movi(gen_args, args[0], 0);
+ gen_args += 2;
+ args += 3;
continue;
}
break;
@@ -387,16 +670,13 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
allocator where needed and possible. Also detect copies. */
switch (op) {
CASE_OP_32_64(mov):
- if ((temps[args[1]].state == TCG_TEMP_COPY
- && temps[args[1]].val == args[0])
- || args[0] == args[1]) {
+ if (temps_are_copies(args[0], args[1])) {
args += 2;
- gen_opc_buf[op_index] = INDEX_op_nop;
+ s->gen_opc_buf[op_index] = INDEX_op_nop;
break;
}
if (temps[args[1]].state != TCG_TEMP_CONST) {
- tcg_opt_gen_mov(s, gen_args, args[0], args[1],
- nb_temps, nb_globals);
+ tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
gen_args += 2;
args += 2;
break;
@@ -404,14 +684,15 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
/* Source argument is constant. Rewrite the operation and
let movi case handle it. */
op = op_to_movi(op);
- gen_opc_buf[op_index] = op;
+ s->gen_opc_buf[op_index] = op;
args[1] = temps[args[1]].val;
/* fallthrough */
CASE_OP_32_64(movi):
- tcg_opt_gen_movi(gen_args, args[0], args[1], nb_temps, nb_globals);
+ tcg_opt_gen_movi(gen_args, args[0], args[1]);
gen_args += 2;
args += 2;
break;
+
CASE_OP_32_64(not):
CASE_OP_32_64(neg):
CASE_OP_32_64(ext8s):
@@ -421,20 +702,15 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
case INDEX_op_ext32s_i64:
case INDEX_op_ext32u_i64:
if (temps[args[1]].state == TCG_TEMP_CONST) {
- gen_opc_buf[op_index] = op_to_movi(op);
+ s->gen_opc_buf[op_index] = op_to_movi(op);
tmp = do_constant_folding(op, temps[args[1]].val, 0);
- tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
- gen_args += 2;
- args += 2;
- break;
- } else {
- reset_temp(args[0], nb_temps, nb_globals);
- gen_args[0] = args[0];
- gen_args[1] = args[1];
+ tcg_opt_gen_movi(gen_args, args[0], tmp);
gen_args += 2;
args += 2;
break;
}
+ goto do_default;
+
CASE_OP_32_64(add):
CASE_OP_32_64(sub):
CASE_OP_32_64(mul):
@@ -453,31 +729,200 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
CASE_OP_32_64(nor):
if (temps[args[1]].state == TCG_TEMP_CONST
&& temps[args[2]].state == TCG_TEMP_CONST) {
- gen_opc_buf[op_index] = op_to_movi(op);
+ s->gen_opc_buf[op_index] = op_to_movi(op);
tmp = do_constant_folding(op, temps[args[1]].val,
temps[args[2]].val);
- tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
+ tcg_opt_gen_movi(gen_args, args[0], tmp);
gen_args += 2;
args += 3;
break;
+ }
+ goto do_default;
+
+ CASE_OP_32_64(deposit):
+ if (temps[args[1]].state == TCG_TEMP_CONST
+ && temps[args[2]].state == TCG_TEMP_CONST) {
+ s->gen_opc_buf[op_index] = op_to_movi(op);
+ tmp = ((1ull << args[4]) - 1);
+ tmp = (temps[args[1]].val & ~(tmp << args[3]))
+ | ((temps[args[2]].val & tmp) << args[3]);
+ tcg_opt_gen_movi(gen_args, args[0], tmp);
+ gen_args += 2;
+ args += 5;
+ break;
+ }
+ goto do_default;
+
+ CASE_OP_32_64(setcond):
+ tmp = do_constant_folding_cond(op, args[1], args[2], args[3]);
+ if (tmp != 2) {
+ s->gen_opc_buf[op_index] = op_to_movi(op);
+ tcg_opt_gen_movi(gen_args, args[0], tmp);
+ gen_args += 2;
+ args += 4;
+ break;
+ }
+ goto do_default;
+
+ CASE_OP_32_64(brcond):
+ tmp = do_constant_folding_cond(op, args[0], args[1], args[2]);
+ if (tmp != 2) {
+ if (tmp) {
+ memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
+ s->gen_opc_buf[op_index] = INDEX_op_br;
+ gen_args[0] = args[3];
+ gen_args += 1;
+ } else {
+ s->gen_opc_buf[op_index] = INDEX_op_nop;
+ }
+ args += 4;
+ break;
+ }
+ goto do_default;
+
+ CASE_OP_32_64(movcond):
+ tmp = do_constant_folding_cond(op, args[1], args[2], args[5]);
+ if (tmp != 2) {
+ if (temps_are_copies(args[0], args[4-tmp])) {
+ s->gen_opc_buf[op_index] = INDEX_op_nop;
+ } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) {
+ s->gen_opc_buf[op_index] = op_to_movi(op);
+ tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val);
+ gen_args += 2;
+ } else {
+ s->gen_opc_buf[op_index] = op_to_mov(op);
+ tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]);
+ gen_args += 2;
+ }
+ args += 6;
+ break;
+ }
+ goto do_default;
+
+ case INDEX_op_add2_i32:
+ case INDEX_op_sub2_i32:
+ if (temps[args[2]].state == TCG_TEMP_CONST
+ && temps[args[3]].state == TCG_TEMP_CONST
+ && temps[args[4]].state == TCG_TEMP_CONST
+ && temps[args[5]].state == TCG_TEMP_CONST) {
+ uint32_t al = temps[args[2]].val;
+ uint32_t ah = temps[args[3]].val;
+ uint32_t bl = temps[args[4]].val;
+ uint32_t bh = temps[args[5]].val;
+ uint64_t a = ((uint64_t)ah << 32) | al;
+ uint64_t b = ((uint64_t)bh << 32) | bl;
+ TCGArg rl, rh;
+
+ if (op == INDEX_op_add2_i32) {
+ a += b;
+ } else {
+ a -= b;
+ }
+
+ /* We emit the extra nop when we emit the add2/sub2. */
+ assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
+
+ rl = args[0];
+ rh = args[1];
+ s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
+ s->gen_opc_buf[++op_index] = INDEX_op_movi_i32;
+ tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)a);
+ tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(a >> 32));
+ gen_args += 4;
+ args += 6;
+ break;
+ }
+ goto do_default;
+
+ case INDEX_op_mulu2_i32:
+ if (temps[args[2]].state == TCG_TEMP_CONST
+ && temps[args[3]].state == TCG_TEMP_CONST) {
+ uint32_t a = temps[args[2]].val;
+ uint32_t b = temps[args[3]].val;
+ uint64_t r = (uint64_t)a * b;
+ TCGArg rl, rh;
+
+ /* We emit the extra nop when we emit the mulu2. */
+ assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
+
+ rl = args[0];
+ rh = args[1];
+ s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
+ s->gen_opc_buf[++op_index] = INDEX_op_movi_i32;
+ tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)r);
+ tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(r >> 32));
+ gen_args += 4;
+ args += 4;
+ break;
+ }
+ goto do_default;
+
+ case INDEX_op_brcond2_i32:
+ tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]);
+ if (tmp != 2) {
+ if (tmp) {
+ memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
+ s->gen_opc_buf[op_index] = INDEX_op_br;
+ gen_args[0] = args[5];
+ gen_args += 1;
+ } else {
+ s->gen_opc_buf[op_index] = INDEX_op_nop;
+ }
+ } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE)
+ && temps[args[2]].state == TCG_TEMP_CONST
+ && temps[args[3]].state == TCG_TEMP_CONST
+ && temps[args[2]].val == 0
+ && temps[args[3]].val == 0) {
+ /* Simplify LT/GE comparisons vs zero to a single compare
+ vs the high word of the input. */
+ memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
+ s->gen_opc_buf[op_index] = INDEX_op_brcond_i32;
+ gen_args[0] = args[1];
+ gen_args[1] = args[3];
+ gen_args[2] = args[4];
+ gen_args[3] = args[5];
+ gen_args += 4;
} else {
- reset_temp(args[0], nb_temps, nb_globals);
+ goto do_default;
+ }
+ args += 6;
+ break;
+
+ case INDEX_op_setcond2_i32:
+ tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]);
+ if (tmp != 2) {
+ s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
+ tcg_opt_gen_movi(gen_args, args[0], tmp);
+ gen_args += 2;
+ } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE)
+ && temps[args[3]].state == TCG_TEMP_CONST
+ && temps[args[4]].state == TCG_TEMP_CONST
+ && temps[args[3]].val == 0
+ && temps[args[4]].val == 0) {
+ /* Simplify LT/GE comparisons vs zero to a single compare
+ vs the high word of the input. */
+ s->gen_opc_buf[op_index] = INDEX_op_setcond_i32;
gen_args[0] = args[0];
- gen_args[1] = args[1];
- gen_args[2] = args[2];
- gen_args += 3;
- args += 3;
- break;
+ gen_args[1] = args[2];
+ gen_args[2] = args[4];
+ gen_args[3] = args[5];
+ gen_args += 4;
+ } else {
+ goto do_default;
}
+ args += 6;
+ break;
+
case INDEX_op_call:
nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
- if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) {
+ if (!(args[nb_call_args + 1] & (TCG_CALL_NO_READ_GLOBALS |
+ TCG_CALL_NO_WRITE_GLOBALS))) {
for (i = 0; i < nb_globals; i++) {
- reset_temp(i, nb_temps, nb_globals);
+ reset_temp(i);
}
}
for (i = 0; i < (args[0] >> 16); i++) {
- reset_temp(args[i + 1], nb_temps, nb_globals);
+ reset_temp(args[i + 1]);
}
i = nb_call_args + 3;
while (i) {
@@ -487,22 +932,19 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
i--;
}
break;
- case INDEX_op_set_label:
- case INDEX_op_jmp:
- case INDEX_op_br:
- CASE_OP_32_64(brcond):
- memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
- for (i = 0; i < def->nb_args; i++) {
- *gen_args = *args;
- args++;
- gen_args++;
- }
- break;
+
default:
- /* Default case: we do know nothing about operation so no
- propagation is done. We only trash output args. */
- for (i = 0; i < def->nb_oargs; i++) {
- reset_temp(args[i], nb_temps, nb_globals);
+ do_default:
+ /* Default case: we know nothing about operation (or were unable
+ to compute the operation result) so no propagation is done.
+ We trash everything if the operation is the end of a basic
+ block, otherwise we only trash the output args. */
+ if (def->flags & TCG_OPF_BB_END) {
+ memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
+ } else {
+ for (i = 0; i < def->nb_oargs; i++) {
+ reset_temp(args[i]);
+ }
}
for (i = 0; i < def->nb_args; i++) {
gen_args[i] = args[i];
diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index 0cff181..d72d396 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -39,8 +39,6 @@ static uint8_t *tb_ret_addr;
#define LR_OFFSET 4
#endif
-#define FAST_PATH
-
#ifndef GUEST_BASE
#define GUEST_BASE 0
#endif
@@ -221,12 +219,6 @@ static void patch_reloc(uint8_t *code_ptr, int type,
}
}
-/* maximum number of register used for input function arguments */
-static int tcg_target_get_call_iarg_regs_count(int flags)
-{
- return ARRAY_SIZE (tcg_target_call_iarg_regs);
-}
-
/* parse target specific constraints */
static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
{
@@ -248,7 +240,6 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
-#ifdef CONFIG_TCG_PASS_AREG0
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
#if TARGET_LONG_BITS == 64
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6);
@@ -256,11 +247,6 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R7);
#endif
#endif
-#else /* !AREG0 */
-#if TARGET_LONG_BITS == 64
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
-#endif
-#endif
break;
case 'K': /* qemu_st[8..32] constraint */
ct->ct |= TCG_CT_REG;
@@ -268,7 +254,6 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
-#ifdef CONFIG_TCG_PASS_AREG0
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6);
#if TARGET_LONG_BITS == 64
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R7);
@@ -276,11 +261,6 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R8);
#endif
#endif
-#else /* !AREG0 */
-#if TARGET_LONG_BITS == 64
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6);
-#endif
-#endif
break;
case 'M': /* qemu_st64 constraint */
ct->ct |= TCG_CT_REG;
@@ -290,12 +270,10 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6);
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R7);
-#if defined(CONFIG_TCG_PASS_AREG0)
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R8);
#ifdef TCG_TARGET_CALL_ALIGN_ARGS
tcg_regset_reset_reg(ct->u.regs, TCG_REG_R9);
#endif
-#endif
break;
#else
case 'L':
@@ -404,6 +382,7 @@ static int tcg_target_const_match(tcg_target_long val,
#define ORC XO31(412)
#define EQV XO31(284)
#define NAND XO31(476)
+#define ISEL XO31( 15)
#define LBZX XO31( 87)
#define LHZX XO31(279)
@@ -456,7 +435,7 @@ enum {
CR_SO
};
-static const uint32_t tcg_to_bc[10] = {
+static const uint32_t tcg_to_bc[] = {
[TCG_COND_EQ] = BC | BI (7, CR_EQ) | BO_COND_TRUE,
[TCG_COND_NE] = BC | BI (7, CR_EQ) | BO_COND_FALSE,
[TCG_COND_LT] = BC | BI (7, CR_LT) | BO_COND_TRUE,
@@ -539,9 +518,39 @@ static void tcg_out_call (TCGContext *s, tcg_target_long arg, int const_arg)
#if defined(CONFIG_SOFTMMU)
+static void add_qemu_ldst_label (TCGContext *s,
+ int is_ld,
+ int opc,
+ int data_reg,
+ int data_reg2,
+ int addrlo_reg,
+ int addrhi_reg,
+ int mem_index,
+ uint8_t *raddr,
+ uint8_t *label_ptr)
+{
+ int idx;
+ TCGLabelQemuLdst *label;
+
+ if (s->nb_qemu_ldst_labels >= TCG_MAX_QEMU_LDST) {
+ tcg_abort();
+ }
+
+ idx = s->nb_qemu_ldst_labels++;
+ label = (TCGLabelQemuLdst *)&s->qemu_ldst_labels[idx];
+ label->is_ld = is_ld;
+ label->opc = opc;
+ label->datalo_reg = data_reg;
+ label->datahi_reg = data_reg2;
+ label->addrlo_reg = addrlo_reg;
+ label->addrhi_reg = addrhi_reg;
+ label->mem_index = mem_index;
+ label->raddr = raddr;
+ label->label_ptr[0] = label_ptr;
+}
+
#include "../../softmmu_defs.h"
-#ifdef CONFIG_TCG_PASS_AREG0
/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
int mmu_idx) */
static const void * const qemu_ld_helpers[4] = {
@@ -559,55 +568,15 @@ static const void * const qemu_st_helpers[4] = {
helper_stl_mmu,
helper_stq_mmu,
};
-#else
-/* legacy helper signature: __ld_mmu(target_ulong addr, int
- mmu_idx) */
-static void *qemu_ld_helpers[4] = {
- __ldb_mmu,
- __ldw_mmu,
- __ldl_mmu,
- __ldq_mmu,
-};
-/* legacy helper signature: __ld_mmu(target_ulong addr, int
- mmu_idx) */
-static void *qemu_st_helpers[4] = {
- __stb_mmu,
- __stw_mmu,
- __stl_mmu,
- __stq_mmu,
-};
-#endif
-#endif
+static void *ld_trampolines[4];
+static void *st_trampolines[4];
-static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
+static void tcg_out_tlb_check (TCGContext *s, int r0, int r1, int r2,
+ int addr_reg, int addr_reg2, int s_bits,
+ int offset1, int offset2, uint8_t **label_ptr)
{
- int addr_reg, data_reg, data_reg2, r0, r1, rbase, bswap;
-#ifdef CONFIG_SOFTMMU
- int mem_index, s_bits, r2, ir;
- void *label1_ptr, *label2_ptr;
-#if TARGET_LONG_BITS == 64
- int addr_reg2;
-#endif
-#endif
-
- data_reg = *args++;
- if (opc == 3)
- data_reg2 = *args++;
- else
- data_reg2 = 0;
- addr_reg = *args++;
-
-#ifdef CONFIG_SOFTMMU
-#if TARGET_LONG_BITS == 64
- addr_reg2 = *args++;
-#endif
- mem_index = *args;
- s_bits = opc & 3;
- r0 = 3;
- r1 = 4;
- r2 = 0;
- rbase = 0;
+ uint16_t retranst;
tcg_out32 (s, (RLWINM
| RA (r0)
@@ -621,7 +590,7 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
tcg_out32 (s, (LWZU
| RT (r1)
| RA (r0)
- | offsetof (CPUArchState, tlb_table[mem_index][0].addr_read)
+ | offset1
)
);
tcg_out32 (s, (RLWINM
@@ -639,79 +608,58 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
tcg_out32 (s, CMP | BF (6) | RA (addr_reg2) | RB (r1));
tcg_out32 (s, CRAND | BT (7, CR_EQ) | BA (6, CR_EQ) | BB (7, CR_EQ));
#endif
+ *label_ptr = s->code_ptr;
+ retranst = ((uint16_t *) s->code_ptr)[1] & ~3;
+ tcg_out32 (s, BC | BI (7, CR_EQ) | retranst | BO_COND_FALSE);
- label1_ptr = s->code_ptr;
-#ifdef FAST_PATH
- tcg_out32 (s, BC | BI (7, CR_EQ) | BO_COND_TRUE);
-#endif
-
- /* slow path */
- ir = 3;
-#ifdef CONFIG_TCG_PASS_AREG0
- tcg_out_mov (s, TCG_TYPE_I32, ir++, TCG_AREG0);
-#endif
-#if TARGET_LONG_BITS == 32
- tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
-#else
-#ifdef TCG_TARGET_CALL_ALIGN_ARGS
- ir |= 1;
-#endif
- tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg2);
- tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
-#endif
- tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index);
-
- tcg_out_call (s, (tcg_target_long) qemu_ld_helpers[s_bits], 1);
- switch (opc) {
- case 0|4:
- tcg_out32 (s, EXTSB | RA (data_reg) | RS (3));
- break;
- case 1|4:
- tcg_out32 (s, EXTSH | RA (data_reg) | RS (3));
- break;
- case 0:
- case 1:
- case 2:
- if (data_reg != 3)
- tcg_out_mov (s, TCG_TYPE_I32, data_reg, 3);
- break;
- case 3:
- if (data_reg == 3) {
- if (data_reg2 == 4) {
- tcg_out_mov (s, TCG_TYPE_I32, 0, 4);
- tcg_out_mov (s, TCG_TYPE_I32, 4, 3);
- tcg_out_mov (s, TCG_TYPE_I32, 3, 0);
- }
- else {
- tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 3);
- tcg_out_mov (s, TCG_TYPE_I32, 3, 4);
- }
- }
- else {
- if (data_reg != 4) tcg_out_mov (s, TCG_TYPE_I32, data_reg, 4);
- if (data_reg2 != 3) tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 3);
- }
- break;
- }
- label2_ptr = s->code_ptr;
- tcg_out32 (s, B);
-
- /* label1: fast path */
-#ifdef FAST_PATH
- reloc_pc14 (label1_ptr, (tcg_target_long) s->code_ptr);
-#endif
-
- /* r0 now contains &env->tlb_table[mem_index][index].addr_read */
+ /* r0 now contains &env->tlb_table[mem_index][index].addr_x */
tcg_out32 (s, (LWZ
| RT (r0)
| RA (r0)
- | (offsetof (CPUTLBEntry, addend)
- - offsetof (CPUTLBEntry, addr_read))
- ));
+ | offset2
+ )
+ );
/* r0 = env->tlb_table[mem_index][index].addend */
tcg_out32 (s, ADD | RT (r0) | RA (r0) | RB (addr_reg));
/* r0 = env->tlb_table[mem_index][index].addend + addr */
+}
+#endif
+
+static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
+{
+ int addr_reg, data_reg, data_reg2, r0, r1, rbase, bswap;
+#ifdef CONFIG_SOFTMMU
+ int mem_index, s_bits, r2, addr_reg2;
+ uint8_t *label_ptr;
+#endif
+
+ data_reg = *args++;
+ if (opc == 3)
+ data_reg2 = *args++;
+ else
+ data_reg2 = 0;
+ addr_reg = *args++;
+
+#ifdef CONFIG_SOFTMMU
+#if TARGET_LONG_BITS == 64
+ addr_reg2 = *args++;
+#else
+ addr_reg2 = 0;
+#endif
+ mem_index = *args;
+ s_bits = opc & 3;
+ r0 = 3;
+ r1 = 4;
+ r2 = 0;
+ rbase = 0;
+
+ tcg_out_tlb_check (
+ s, r0, r1, r2, addr_reg, addr_reg2, s_bits,
+ offsetof (CPUArchState, tlb_table[mem_index][0].addr_read),
+ offsetof (CPUTLBEntry, addend) - offsetof (CPUTLBEntry, addr_read),
+ &label_ptr
+ );
#else /* !CONFIG_SOFTMMU */
r0 = addr_reg;
r1 = 3;
@@ -777,9 +725,17 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
}
break;
}
-
#ifdef CONFIG_SOFTMMU
- reloc_pc24 (label2_ptr, (tcg_target_long) s->code_ptr);
+ add_qemu_ldst_label (s,
+ 1,
+ opc,
+ data_reg,
+ data_reg2,
+ addr_reg,
+ addr_reg2,
+ mem_index,
+ s->code_ptr,
+ label_ptr);
#endif
}
@@ -787,11 +743,8 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
{
int addr_reg, r0, r1, data_reg, data_reg2, bswap, rbase;
#ifdef CONFIG_SOFTMMU
- int mem_index, r2, ir;
- void *label1_ptr, *label2_ptr;
-#if TARGET_LONG_BITS == 64
- int addr_reg2;
-#endif
+ int mem_index, r2, addr_reg2;
+ uint8_t *label_ptr;
#endif
data_reg = *args++;
@@ -804,6 +757,8 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
#ifdef CONFIG_SOFTMMU
#if TARGET_LONG_BITS == 64
addr_reg2 = *args++;
+#else
+ addr_reg2 = 0;
#endif
mem_index = *args;
r0 = 3;
@@ -811,54 +766,162 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
r2 = 0;
rbase = 0;
- tcg_out32 (s, (RLWINM
- | RA (r0)
- | RS (addr_reg)
- | SH (32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS))
- | MB (32 - (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS))
- | ME (31 - CPU_TLB_ENTRY_BITS)
- )
- );
- tcg_out32 (s, ADD | RT (r0) | RA (r0) | RB (TCG_AREG0));
- tcg_out32 (s, (LWZU
- | RT (r1)
- | RA (r0)
- | offsetof (CPUArchState, tlb_table[mem_index][0].addr_write)
- )
- );
- tcg_out32 (s, (RLWINM
- | RA (r2)
- | RS (addr_reg)
- | SH (0)
- | MB ((32 - opc) & 31)
- | ME (31 - TARGET_PAGE_BITS)
- )
+ tcg_out_tlb_check (
+ s, r0, r1, r2, addr_reg, addr_reg2, opc & 3,
+ offsetof (CPUArchState, tlb_table[mem_index][0].addr_write),
+ offsetof (CPUTLBEntry, addend) - offsetof (CPUTLBEntry, addr_write),
+ &label_ptr
);
+#else /* !CONFIG_SOFTMMU */
+ r0 = addr_reg;
+ r1 = 3;
+ rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0;
+#endif
- tcg_out32 (s, CMP | (7 << 23) | RA (r2) | RB (r1));
-#if TARGET_LONG_BITS == 64
- tcg_out32 (s, LWZ | RT (r1) | RA (r0) | 4);
- tcg_out32 (s, CMP | BF (6) | RA (addr_reg2) | RB (r1));
- tcg_out32 (s, CRAND | BT (7, CR_EQ) | BA (6, CR_EQ) | BB (7, CR_EQ));
+#ifdef TARGET_WORDS_BIGENDIAN
+ bswap = 0;
+#else
+ bswap = 1;
+#endif
+ switch (opc) {
+ case 0:
+ tcg_out32 (s, STBX | SAB (data_reg, rbase, r0));
+ break;
+ case 1:
+ if (bswap)
+ tcg_out32 (s, STHBRX | SAB (data_reg, rbase, r0));
+ else
+ tcg_out32 (s, STHX | SAB (data_reg, rbase, r0));
+ break;
+ case 2:
+ if (bswap)
+ tcg_out32 (s, STWBRX | SAB (data_reg, rbase, r0));
+ else
+ tcg_out32 (s, STWX | SAB (data_reg, rbase, r0));
+ break;
+ case 3:
+ if (bswap) {
+ tcg_out32 (s, ADDI | RT (r1) | RA (r0) | 4);
+ tcg_out32 (s, STWBRX | SAB (data_reg, rbase, r0));
+ tcg_out32 (s, STWBRX | SAB (data_reg2, rbase, r1));
+ }
+ else {
+#ifdef CONFIG_USE_GUEST_BASE
+ tcg_out32 (s, STWX | SAB (data_reg2, rbase, r0));
+ tcg_out32 (s, ADDI | RT (r1) | RA (r0) | 4);
+ tcg_out32 (s, STWX | SAB (data_reg, rbase, r1));
+#else
+ tcg_out32 (s, STW | RS (data_reg2) | RA (r0));
+ tcg_out32 (s, STW | RS (data_reg) | RA (r0) | 4);
#endif
+ }
+ break;
+ }
- label1_ptr = s->code_ptr;
-#ifdef FAST_PATH
- tcg_out32 (s, BC | BI (7, CR_EQ) | BO_COND_TRUE);
+#ifdef CONFIG_SOFTMMU
+ add_qemu_ldst_label (s,
+ 0,
+ opc,
+ data_reg,
+ data_reg2,
+ addr_reg,
+ addr_reg2,
+ mem_index,
+ s->code_ptr,
+ label_ptr);
#endif
+}
+
+#if defined(CONFIG_SOFTMMU)
+static void tcg_out_qemu_ld_slow_path (TCGContext *s, TCGLabelQemuLdst *label)
+{
+ int s_bits;
+ int ir;
+ int opc = label->opc;
+ int mem_index = label->mem_index;
+ int data_reg = label->datalo_reg;
+ int data_reg2 = label->datahi_reg;
+ int addr_reg = label->addrlo_reg;
+ uint8_t *raddr = label->raddr;
+ uint8_t **label_ptr = &label->label_ptr[0];
+
+ s_bits = opc & 3;
+
+ /* resolve label address */
+ reloc_pc14 (label_ptr[0], (tcg_target_long) s->code_ptr);
/* slow path */
- ir = 3;
-#ifdef CONFIG_TCG_PASS_AREG0
- tcg_out_mov (s, TCG_TYPE_I32, ir++, TCG_AREG0);
+ ir = 4;
+#if TARGET_LONG_BITS == 32
+ tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
+#else
+#ifdef TCG_TARGET_CALL_ALIGN_ARGS
+ ir |= 1;
+#endif
+ tcg_out_mov (s, TCG_TYPE_I32, ir++, label->addrhi_reg);
+ tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
#endif
+ tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index);
+ tcg_out_call (s, (tcg_target_long) ld_trampolines[s_bits], 1);
+ tcg_out32 (s, (tcg_target_long) raddr);
+ switch (opc) {
+ case 0|4:
+ tcg_out32 (s, EXTSB | RA (data_reg) | RS (3));
+ break;
+ case 1|4:
+ tcg_out32 (s, EXTSH | RA (data_reg) | RS (3));
+ break;
+ case 0:
+ case 1:
+ case 2:
+ if (data_reg != 3)
+ tcg_out_mov (s, TCG_TYPE_I32, data_reg, 3);
+ break;
+ case 3:
+ if (data_reg == 3) {
+ if (data_reg2 == 4) {
+ tcg_out_mov (s, TCG_TYPE_I32, 0, 4);
+ tcg_out_mov (s, TCG_TYPE_I32, 4, 3);
+ tcg_out_mov (s, TCG_TYPE_I32, 3, 0);
+ }
+ else {
+ tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 3);
+ tcg_out_mov (s, TCG_TYPE_I32, 3, 4);
+ }
+ }
+ else {
+ if (data_reg != 4) tcg_out_mov (s, TCG_TYPE_I32, data_reg, 4);
+ if (data_reg2 != 3) tcg_out_mov (s, TCG_TYPE_I32, data_reg2, 3);
+ }
+ break;
+ }
+ /* Jump to the code corresponding to next IR of qemu_st */
+ tcg_out_b (s, 0, (tcg_target_long) raddr);
+}
+
+static void tcg_out_qemu_st_slow_path (TCGContext *s, TCGLabelQemuLdst *label)
+{
+ int ir;
+ int opc = label->opc;
+ int mem_index = label->mem_index;
+ int data_reg = label->datalo_reg;
+ int data_reg2 = label->datahi_reg;
+ int addr_reg = label->addrlo_reg;
+ uint8_t *raddr = label->raddr;
+ uint8_t **label_ptr = &label->label_ptr[0];
+
+ /* resolve label address */
+ reloc_pc14 (label_ptr[0], (tcg_target_long) s->code_ptr);
+
+ /* slow path */
+ ir = 4;
#if TARGET_LONG_BITS == 32
tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
#else
#ifdef TCG_TARGET_CALL_ALIGN_ARGS
ir |= 1;
#endif
- tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg2);
+ tcg_out_mov (s, TCG_TYPE_I32, ir++, label->addrhi_reg);
tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
#endif
@@ -893,75 +956,39 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
ir++;
tcg_out_movi (s, TCG_TYPE_I32, ir, mem_index);
- tcg_out_call (s, (tcg_target_long) qemu_st_helpers[opc], 1);
- label2_ptr = s->code_ptr;
- tcg_out32 (s, B);
-
- /* label1: fast path */
-#ifdef FAST_PATH
- reloc_pc14 (label1_ptr, (tcg_target_long) s->code_ptr);
-#endif
-
- tcg_out32 (s, (LWZ
- | RT (r0)
- | RA (r0)
- | (offsetof (CPUTLBEntry, addend)
- - offsetof (CPUTLBEntry, addr_write))
- ));
- /* r0 = env->tlb_table[mem_index][index].addend */
- tcg_out32 (s, ADD | RT (r0) | RA (r0) | RB (addr_reg));
- /* r0 = env->tlb_table[mem_index][index].addend + addr */
-
-#else /* !CONFIG_SOFTMMU */
- r0 = addr_reg;
- r1 = 3;
- rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0;
-#endif
+ tcg_out_call (s, (tcg_target_long) st_trampolines[opc], 1);
+ tcg_out32 (s, (tcg_target_long) raddr);
+ tcg_out_b (s, 0, (tcg_target_long) raddr);
+}
-#ifdef TARGET_WORDS_BIGENDIAN
- bswap = 0;
-#else
- bswap = 1;
-#endif
- switch (opc) {
- case 0:
- tcg_out32 (s, STBX | SAB (data_reg, rbase, r0));
- break;
- case 1:
- if (bswap)
- tcg_out32 (s, STHBRX | SAB (data_reg, rbase, r0));
- else
- tcg_out32 (s, STHX | SAB (data_reg, rbase, r0));
- break;
- case 2:
- if (bswap)
- tcg_out32 (s, STWBRX | SAB (data_reg, rbase, r0));
- else
- tcg_out32 (s, STWX | SAB (data_reg, rbase, r0));
- break;
- case 3:
- if (bswap) {
- tcg_out32 (s, ADDI | RT (r1) | RA (r0) | 4);
- tcg_out32 (s, STWBRX | SAB (data_reg, rbase, r0));
- tcg_out32 (s, STWBRX | SAB (data_reg2, rbase, r1));
+void tcg_out_tb_finalize(TCGContext *s)
+{
+ int i;
+ TCGLabelQemuLdst *label;
+
+ /* qemu_ld/st slow paths */
+ for (i = 0; i < s->nb_qemu_ldst_labels; i++) {
+ label = (TCGLabelQemuLdst *) &s->qemu_ldst_labels[i];
+ if (label->is_ld) {
+ tcg_out_qemu_ld_slow_path (s, label);
}
else {
-#ifdef CONFIG_USE_GUEST_BASE
- tcg_out32 (s, STWX | SAB (data_reg2, rbase, r0));
- tcg_out32 (s, ADDI | RT (r1) | RA (r0) | 4);
- tcg_out32 (s, STWX | SAB (data_reg, rbase, r1));
-#else
- tcg_out32 (s, STW | RS (data_reg2) | RA (r0));
- tcg_out32 (s, STW | RS (data_reg) | RA (r0) | 4);
-#endif
+ tcg_out_qemu_st_slow_path (s, label);
}
- break;
}
+}
+#endif
#ifdef CONFIG_SOFTMMU
- reloc_pc24 (label2_ptr, (tcg_target_long) s->code_ptr);
-#endif
+static void emit_ldst_trampoline (TCGContext *s, const void *ptr)
+{
+ tcg_out32 (s, MFSPR | RT (3) | LR);
+ tcg_out32 (s, ADDI | RT (3) | RA (3) | 4);
+ tcg_out32 (s, MTSPR | RS (3) | LR);
+ tcg_out_mov (s, TCG_TYPE_I32, 3, TCG_AREG0);
+ tcg_out_b (s, 0, (tcg_target_long) ptr);
}
+#endif
static void tcg_target_qemu_prologue (TCGContext *s)
{
@@ -1023,6 +1050,16 @@ static void tcg_target_qemu_prologue (TCGContext *s)
tcg_out32 (s, MTSPR | RS (0) | LR);
tcg_out32 (s, ADDI | RT (1) | RA (1) | frame_size);
tcg_out32 (s, BCLR | BO_ALWAYS);
+
+#ifdef CONFIG_SOFTMMU
+ for (i = 0; i < 4; ++i) {
+ ld_trampolines[i] = s->code_ptr;
+ emit_ldst_trampoline (s, qemu_ld_helpers[i]);
+
+ st_trampolines[i] = s->code_ptr;
+ emit_ldst_trampoline (s, qemu_st_helpers[i]);
+ }
+#endif
}
static void tcg_out_ld (TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
@@ -1307,6 +1344,72 @@ static void tcg_out_setcond2 (TCGContext *s, const TCGArg *args,
);
}
+static void tcg_out_movcond (TCGContext *s, TCGCond cond,
+ TCGArg dest,
+ TCGArg c1, TCGArg c2,
+ TCGArg v1, TCGArg v2,
+ int const_c2)
+{
+ tcg_out_cmp (s, cond, c1, c2, const_c2, 7);
+
+ if (1) {
+ /* At least here on 7747A bit twiddling hacks are outperformed
+ by jumpy code (the testing was not scientific) */
+ if (dest == v2) {
+ cond = tcg_invert_cond (cond);
+ v2 = v1;
+ }
+ else {
+ if (dest != v1) {
+ tcg_out_mov (s, TCG_TYPE_I32, dest, v1);
+ }
+ }
+ /* Branch forward over one insn */
+ tcg_out32 (s, tcg_to_bc[cond] | 8);
+ tcg_out_mov (s, TCG_TYPE_I32, dest, v2);
+ }
+ else {
+ /* isel version, "if (1)" above should be replaced once a way
+ to figure out availability of isel on the underlying
+ hardware is found */
+ int tab, bc;
+
+ switch (cond) {
+ case TCG_COND_EQ:
+ tab = TAB (dest, v1, v2);
+ bc = CR_EQ;
+ break;
+ case TCG_COND_NE:
+ tab = TAB (dest, v2, v1);
+ bc = CR_EQ;
+ break;
+ case TCG_COND_LTU:
+ case TCG_COND_LT:
+ tab = TAB (dest, v1, v2);
+ bc = CR_LT;
+ break;
+ case TCG_COND_GEU:
+ case TCG_COND_GE:
+ tab = TAB (dest, v2, v1);
+ bc = CR_LT;
+ break;
+ case TCG_COND_LEU:
+ case TCG_COND_LE:
+ tab = TAB (dest, v2, v1);
+ bc = CR_GT;
+ break;
+ case TCG_COND_GTU:
+ case TCG_COND_GT:
+ tab = TAB (dest, v1, v2);
+ bc = CR_GT;
+ break;
+ default:
+ tcg_abort ();
+ }
+ tcg_out32 (s, ISEL | tab | ((bc + 28) << 6));
+ }
+}
+
static void tcg_out_brcond (TCGContext *s, TCGCond cond,
TCGArg arg1, TCGArg arg2, int const_arg2,
int label_index)
@@ -1394,15 +1497,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
case INDEX_op_call:
tcg_out_call (s, args[0], const_args[0]);
break;
- case INDEX_op_jmp:
- if (const_args[0]) {
- tcg_out_b (s, 0, args[0]);
- }
- else {
- tcg_out32 (s, MTSPR | RS (args[0]) | CTR);
- tcg_out32 (s, BCCTR | BO_ALWAYS);
- }
- break;
case INDEX_op_movi_i32:
tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
break;
@@ -1864,6 +1958,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
);
break;
+ case INDEX_op_movcond_i32:
+ tcg_out_movcond (s, args[5], args[0],
+ args[1], args[2],
+ args[3], args[4],
+ const_args[2]);
+ break;
+
default:
tcg_dump_ops (s);
tcg_abort ();
@@ -1874,7 +1975,6 @@ static const TCGTargetOpDef ppc_op_defs[] = {
{ INDEX_op_exit_tb, { } },
{ INDEX_op_goto_tb, { } },
{ INDEX_op_call, { "ri" } },
- { INDEX_op_jmp, { "ri" } },
{ INDEX_op_br, { } },
{ INDEX_op_mov_i32, { "r", "r" } },
@@ -1960,6 +2060,7 @@ static const TCGTargetOpDef ppc_op_defs[] = {
{ INDEX_op_ext16u_i32, { "r", "r" } },
{ INDEX_op_deposit_i32, { "r", "0", "r" } },
+ { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "r" } },
{ -1 },
};
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 2f37fd2..ad433ae 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -92,11 +92,10 @@ typedef enum {
#define TCG_TARGET_HAS_nand_i32 1
#define TCG_TARGET_HAS_nor_i32 1
#define TCG_TARGET_HAS_deposit_i32 1
+#define TCG_TARGET_HAS_movcond_i32 1
#define TCG_AREG0 TCG_REG_R27
-#define TCG_TARGET_HAS_GUEST_BASE
-
#define tcg_qemu_tb_exec(env, tb_ptr) \
((long __attribute__ ((longcall)) \
(*)(void *, void *))code_gen_prologue)(env, tb_ptr)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 27a0ae8..5403fc1 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -208,12 +208,6 @@ static void patch_reloc (uint8_t *code_ptr, int type,
}
}
-/* maximum number of register used for input function arguments */
-static int tcg_target_get_call_iarg_regs_count (int flags)
-{
- return ARRAY_SIZE (tcg_target_call_iarg_regs);
-}
-
/* parse target specific constraints */
static int target_parse_constraint (TCGArgConstraint *ct, const char **pct_str)
{
@@ -235,10 +229,8 @@ static int target_parse_constraint (TCGArgConstraint *ct, const char **pct_str)
tcg_regset_reset_reg (ct->u.regs, TCG_REG_R3);
#ifdef CONFIG_SOFTMMU
tcg_regset_reset_reg (ct->u.regs, TCG_REG_R4);
-#ifdef CONFIG_TCG_PASS_AREG0
tcg_regset_reset_reg (ct->u.regs, TCG_REG_R5);
#endif
-#endif
break;
case 'S': /* qemu_st constraint */
ct->ct |= TCG_CT_REG;
@@ -247,10 +239,8 @@ static int target_parse_constraint (TCGArgConstraint *ct, const char **pct_str)
#ifdef CONFIG_SOFTMMU
tcg_regset_reset_reg (ct->u.regs, TCG_REG_R4);
tcg_regset_reset_reg (ct->u.regs, TCG_REG_R5);
-#ifdef CONFIG_TCG_PASS_AREG0
tcg_regset_reset_reg (ct->u.regs, TCG_REG_R6);
#endif
-#endif
break;
case 'Z':
ct->ct |= TCG_CT_CONST_U32;
@@ -428,7 +418,7 @@ enum {
CR_SO
};
-static const uint32_t tcg_to_bc[10] = {
+static const uint32_t tcg_to_bc[] = {
[TCG_COND_EQ] = BC | BI (7, CR_EQ) | BO_COND_TRUE,
[TCG_COND_NE] = BC | BI (7, CR_EQ) | BO_COND_FALSE,
[TCG_COND_LT] = BC | BI (7, CR_LT) | BO_COND_TRUE,
@@ -558,7 +548,6 @@ static void tcg_out_ldsta (TCGContext *s, int ret, int addr,
#include "../../softmmu_defs.h"
-#ifdef CONFIG_TCG_PASS_AREG0
/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
int mmu_idx) */
static const void * const qemu_ld_helpers[4] = {
@@ -576,25 +565,6 @@ static const void * const qemu_st_helpers[4] = {
helper_stl_mmu,
helper_stq_mmu,
};
-#else
-/* legacy helper signature: __ld_mmu(target_ulong addr, int
- mmu_idx) */
-static void *qemu_ld_helpers[4] = {
- __ldb_mmu,
- __ldw_mmu,
- __ldl_mmu,
- __ldq_mmu,
-};
-
-/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
- int mmu_idx) */
-static void *qemu_st_helpers[4] = {
- __stb_mmu,
- __stw_mmu,
- __stl_mmu,
- __stq_mmu,
-};
-#endif
static void tcg_out_tlb_read (TCGContext *s, int r0, int r1, int r2,
int addr_reg, int s_bits, int offset)
@@ -676,9 +646,7 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
/* slow path */
ir = 3;
-#ifdef CONFIG_TCG_PASS_AREG0
tcg_out_mov (s, TCG_TYPE_I64, ir++, TCG_AREG0);
-#endif
tcg_out_mov (s, TCG_TYPE_I64, ir++, addr_reg);
tcg_out_movi (s, TCG_TYPE_I64, ir++, mem_index);
@@ -827,9 +795,7 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
/* slow path */
ir = 3;
-#ifdef CONFIG_TCG_PASS_AREG0
tcg_out_mov (s, TCG_TYPE_I64, ir++, TCG_AREG0);
-#endif
tcg_out_mov (s, TCG_TYPE_I64, ir++, addr_reg);
tcg_out_rld (s, RLDICL, ir++, data_reg, 0, 64 - (1 << (3 + opc)));
tcg_out_movi (s, TCG_TYPE_I64, ir++, mem_index);
@@ -1279,15 +1245,6 @@ static void tcg_out_op (TCGContext *s, TCGOpcode opc, const TCGArg *args,
case INDEX_op_call:
tcg_out_call (s, args[0], const_args[0]);
break;
- case INDEX_op_jmp:
- if (const_args[0]) {
- tcg_out_b (s, 0, args[0]);
- }
- else {
- tcg_out32 (s, MTSPR | RS (args[0]) | CTR);
- tcg_out32 (s, BCCTR | BO_ALWAYS);
- }
- break;
case INDEX_op_movi_i32:
tcg_out_movi (s, TCG_TYPE_I32, args[0], args[1]);
break;
@@ -1622,7 +1579,6 @@ static const TCGTargetOpDef ppc_op_defs[] = {
{ INDEX_op_exit_tb, { } },
{ INDEX_op_goto_tb, { } },
{ INDEX_op_call, { "ri" } },
- { INDEX_op_jmp, { "ri" } },
{ INDEX_op_br, { } },
{ INDEX_op_mov_i32, { "r", "r" } },
diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h
index 97eec08..97fc5c9 100644
--- a/tcg/ppc64/tcg-target.h
+++ b/tcg/ppc64/tcg-target.h
@@ -83,6 +83,7 @@ typedef enum {
#define TCG_TARGET_HAS_nand_i32 0
#define TCG_TARGET_HAS_nor_i32 0
#define TCG_TARGET_HAS_deposit_i32 0
+#define TCG_TARGET_HAS_movcond_i32 0
#define TCG_TARGET_HAS_div_i64 1
#define TCG_TARGET_HAS_rot_i64 0
@@ -103,8 +104,8 @@ typedef enum {
#define TCG_TARGET_HAS_nand_i64 0
#define TCG_TARGET_HAS_nor_i64 0
#define TCG_TARGET_HAS_deposit_i64 0
+#define TCG_TARGET_HAS_movcond_i64 0
#define TCG_AREG0 TCG_REG_R27
-#define TCG_TARGET_HAS_GUEST_BASE
#define TCG_TARGET_EXTEND_ARGS 1
diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index 04662c1..fd9286f 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -268,7 +268,7 @@ static const int tcg_target_call_oarg_regs[] = {
#define S390_CC_ALWAYS 15
/* Condition codes that result from a COMPARE and COMPARE LOGICAL. */
-static const uint8_t tcg_cond_to_s390_cond[10] = {
+static const uint8_t tcg_cond_to_s390_cond[] = {
[TCG_COND_EQ] = S390_CC_EQ,
[TCG_COND_NE] = S390_CC_NE,
[TCG_COND_LT] = S390_CC_LT,
@@ -284,7 +284,7 @@ static const uint8_t tcg_cond_to_s390_cond[10] = {
/* Condition codes that result from a LOAD AND TEST. Here, we have no
unsigned instruction variation, however since the test is vs zero we
can re-map the outcomes appropriately. */
-static const uint8_t tcg_cond_to_ltr_cond[10] = {
+static const uint8_t tcg_cond_to_ltr_cond[] = {
[TCG_COND_EQ] = S390_CC_EQ,
[TCG_COND_NE] = S390_CC_NE,
[TCG_COND_LT] = S390_CC_LT,
@@ -301,7 +301,6 @@ static const uint8_t tcg_cond_to_ltr_cond[10] = {
#include "../../softmmu_defs.h"
-#ifdef CONFIG_TCG_PASS_AREG0
/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
int mmu_idx) */
static const void * const qemu_ld_helpers[4] = {
@@ -319,25 +318,6 @@ static const void * const qemu_st_helpers[4] = {
helper_stl_mmu,
helper_stq_mmu,
};
-#else
-/* legacy helper signature: __ld_mmu(target_ulong addr, int
- mmu_idx) */
-static void *qemu_ld_helpers[4] = {
- __ldb_mmu,
- __ldw_mmu,
- __ldl_mmu,
- __ldq_mmu,
-};
-
-/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
- int mmu_idx) */
-static void *qemu_st_helpers[4] = {
- __stb_mmu,
- __stw_mmu,
- __stl_mmu,
- __stq_mmu,
-};
-#endif
#endif
static uint8_t *tb_ret_addr;
@@ -376,11 +356,6 @@ static void patch_reloc(uint8_t *code_ptr, int type,
}
}
-static int tcg_target_get_call_iarg_regs_count(int flags)
-{
- return sizeof(tcg_target_call_iarg_regs) / sizeof(int);
-}
-
/* parse target specific constraints */
static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
{
@@ -1138,7 +1113,7 @@ static void tgen64_xori(TCGContext *s, TCGReg dest, tcg_target_ulong val)
static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
TCGArg c2, int c2const)
{
- bool is_unsigned = (c > TCG_COND_GT);
+ bool is_unsigned = is_unsigned_cond(c);
if (c2const) {
if (c2 == 0) {
if (type == TCG_TYPE_I32) {
@@ -1507,29 +1482,25 @@ static void tcg_prepare_qemu_ldst(TCGContext* s, TCGReg data_reg,
tcg_abort();
}
tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, mem_index);
-#ifdef CONFIG_TCG_PASS_AREG0
/* XXX/FIXME: suboptimal */
- tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
+ tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
+ tcg_target_call_iarg_regs[2]);
+ tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
tcg_target_call_iarg_regs[1]);
- tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+ tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
tcg_target_call_iarg_regs[0]);
- tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+ tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
TCG_AREG0);
-#endif
tgen_calli(s, (tcg_target_ulong)qemu_st_helpers[s_bits]);
} else {
tcg_out_movi(s, TCG_TYPE_I32, arg1, mem_index);
-#ifdef CONFIG_TCG_PASS_AREG0
/* XXX/FIXME: suboptimal */
- tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
- tcg_target_call_iarg_regs[2]);
tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
tcg_target_call_iarg_regs[1]);
- tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
+ tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
tcg_target_call_iarg_regs[0]);
- tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
+ tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
TCG_AREG0);
-#endif
tgen_calli(s, (tcg_target_ulong)qemu_ld_helpers[s_bits]);
/* sign extension */
@@ -2066,11 +2037,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
#endif /* TCG_TARGET_REG_BITS == 64 */
- case INDEX_op_jmp:
- /* This one is obsolete and never emitted. */
- tcg_abort();
- break;
-
default:
fprintf(stderr,"unimplemented opc 0x%x\n",opc);
tcg_abort();
@@ -2081,7 +2047,6 @@ static const TCGTargetOpDef s390_op_defs[] = {
{ INDEX_op_exit_tb, { } },
{ INDEX_op_goto_tb, { } },
{ INDEX_op_call, { "ri" } },
- { INDEX_op_jmp, { "ri" } },
{ INDEX_op_br, { } },
{ INDEX_op_mov_i32, { "r", "r" } },
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index d12f90b..a0181ae 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -63,6 +63,7 @@ typedef enum TCGReg {
#define TCG_TARGET_HAS_nand_i32 0
#define TCG_TARGET_HAS_nor_i32 0
#define TCG_TARGET_HAS_deposit_i32 0
+#define TCG_TARGET_HAS_movcond_i32 0
#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_div2_i64 1
@@ -84,10 +85,9 @@ typedef enum TCGReg {
#define TCG_TARGET_HAS_nand_i64 0
#define TCG_TARGET_HAS_nor_i64 0
#define TCG_TARGET_HAS_deposit_i64 0
+#define TCG_TARGET_HAS_movcond_i64 0
#endif
-#define TCG_TARGET_HAS_GUEST_BASE
-
/* used for function call generation */
#define TCG_REG_CALL_STACK TCG_REG_R15
#define TCG_TARGET_STACK_ALIGN 8
@@ -96,7 +96,6 @@ typedef enum TCGReg {
#define TCG_TARGET_EXTEND_ARGS 1
enum {
- /* Note: must be synced with dyngen-exec.h */
TCG_AREG0 = TCG_REG_R10,
};
diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index 247a278..f146647 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -59,10 +59,14 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
};
#endif
-#ifdef CONFIG_TCG_PASS_AREG0
-#define ARG_OFFSET 1
+/* Define some temporary registers. T2 is used for constant generation. */
+#define TCG_REG_T1 TCG_REG_G1
+#define TCG_REG_T2 TCG_REG_O7
+
+#ifdef CONFIG_USE_GUEST_BASE
+# define TCG_GUEST_BASE_REG TCG_REG_I5
#else
-#define ARG_OFFSET 0
+# define TCG_GUEST_BASE_REG TCG_REG_G0
#endif
static const int tcg_target_reg_alloc_order[] = {
@@ -74,11 +78,25 @@ static const int tcg_target_reg_alloc_order[] = {
TCG_REG_L5,
TCG_REG_L6,
TCG_REG_L7,
+
TCG_REG_I0,
TCG_REG_I1,
TCG_REG_I2,
TCG_REG_I3,
TCG_REG_I4,
+ TCG_REG_I5,
+
+ TCG_REG_G2,
+ TCG_REG_G3,
+ TCG_REG_G4,
+ TCG_REG_G5,
+
+ TCG_REG_O0,
+ TCG_REG_O1,
+ TCG_REG_O2,
+ TCG_REG_O3,
+ TCG_REG_O4,
+ TCG_REG_O5,
};
static const int tcg_target_call_iarg_regs[6] = {
@@ -97,105 +115,6 @@ static const int tcg_target_call_oarg_regs[] = {
TCG_REG_O3,
};
-static inline int check_fit_tl(tcg_target_long val, unsigned int bits)
-{
- return (val << ((sizeof(tcg_target_long) * 8 - bits))
- >> (sizeof(tcg_target_long) * 8 - bits)) == val;
-}
-
-static inline int check_fit_i32(uint32_t val, unsigned int bits)
-{
- return ((val << (32 - bits)) >> (32 - bits)) == val;
-}
-
-static void patch_reloc(uint8_t *code_ptr, int type,
- tcg_target_long value, tcg_target_long addend)
-{
- value += addend;
- switch (type) {
- case R_SPARC_32:
- if (value != (uint32_t)value)
- tcg_abort();
- *(uint32_t *)code_ptr = value;
- break;
- case R_SPARC_WDISP22:
- value -= (long)code_ptr;
- value >>= 2;
- if (!check_fit_tl(value, 22))
- tcg_abort();
- *(uint32_t *)code_ptr = ((*(uint32_t *)code_ptr) & ~0x3fffff) | value;
- break;
- case R_SPARC_WDISP19:
- value -= (long)code_ptr;
- value >>= 2;
- if (!check_fit_tl(value, 19))
- tcg_abort();
- *(uint32_t *)code_ptr = ((*(uint32_t *)code_ptr) & ~0x7ffff) | value;
- break;
- default:
- tcg_abort();
- }
-}
-
-/* maximum number of register used for input function arguments */
-static inline int tcg_target_get_call_iarg_regs_count(int flags)
-{
- return 6;
-}
-
-/* parse target specific constraints */
-static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
-{
- const char *ct_str;
-
- ct_str = *pct_str;
- switch (ct_str[0]) {
- case 'r':
- ct->ct |= TCG_CT_REG;
- tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
- break;
- case 'L': /* qemu_ld/st constraint */
- ct->ct |= TCG_CT_REG;
- tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
- // Helper args
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0);
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1);
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2);
-#ifdef CONFIG_TCG_PASS_AREG0
- tcg_regset_reset_reg(ct->u.regs, TCG_REG_O3);
-#endif
- break;
- case 'I':
- ct->ct |= TCG_CT_CONST_S11;
- break;
- case 'J':
- ct->ct |= TCG_CT_CONST_S13;
- break;
- default:
- return -1;
- }
- ct_str++;
- *pct_str = ct_str;
- return 0;
-}
-
-/* test if a constant matches the constraint */
-static inline int tcg_target_const_match(tcg_target_long val,
- const TCGArgConstraint *arg_ct)
-{
- int ct;
-
- ct = arg_ct->ct;
- if (ct & TCG_CT_CONST)
- return 1;
- else if ((ct & TCG_CT_CONST_S11) && check_fit_tl(val, 11))
- return 1;
- else if ((ct & TCG_CT_CONST_S13) && check_fit_tl(val, 13))
- return 1;
- else
- return 0;
-}
-
#define INSN_OP(x) ((x) << 30)
#define INSN_OP2(x) ((x) << 22)
#define INSN_OP3(x) ((x) << 19)
@@ -205,12 +124,13 @@ static inline int tcg_target_const_match(tcg_target_long val,
#define INSN_RS2(x) (x)
#define INSN_ASI(x) ((x) << 5)
+#define INSN_IMM10(x) ((1 << 13) | ((x) & 0x3ff))
#define INSN_IMM11(x) ((1 << 13) | ((x) & 0x7ff))
#define INSN_IMM13(x) ((1 << 13) | ((x) & 0x1fff))
+#define INSN_OFF16(x) ((((x) >> 2) & 0x3fff) | ((((x) >> 16) & 3) << 20))
#define INSN_OFF19(x) (((x) >> 2) & 0x07ffff)
-#define INSN_OFF22(x) (((x) >> 2) & 0x3fffff)
+#define INSN_COND(x) ((x) << 25)
-#define INSN_COND(x, a) (((x) << 25) | ((a) << 29))
#define COND_N 0x0
#define COND_E 0x1
#define COND_LE 0x2
@@ -227,11 +147,26 @@ static inline int tcg_target_const_match(tcg_target_long val,
#define COND_CC 0xd
#define COND_POS 0xe
#define COND_VC 0xf
-#define BA (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x2))
+#define BA (INSN_OP(0) | INSN_COND(COND_A) | INSN_OP2(0x2))
+
+#define RCOND_Z 1
+#define RCOND_LEZ 2
+#define RCOND_LZ 3
+#define RCOND_NZ 5
+#define RCOND_GZ 6
+#define RCOND_GEZ 7
#define MOVCC_ICC (1 << 18)
#define MOVCC_XCC (1 << 18 | 1 << 12)
+#define BPCC_ICC 0
+#define BPCC_XCC (2 << 20)
+#define BPCC_PT (1 << 19)
+#define BPCC_PN 0
+#define BPCC_A (1 << 29)
+
+#define BPR_PT BPCC_PT
+
#define ARITH_ADD (INSN_OP(2) | INSN_OP3(0x00))
#define ARITH_ADDCC (INSN_OP(2) | INSN_OP3(0x10))
#define ARITH_AND (INSN_OP(2) | INSN_OP3(0x01))
@@ -242,7 +177,7 @@ static inline int tcg_target_const_match(tcg_target_long val,
#define ARITH_XOR (INSN_OP(2) | INSN_OP3(0x03))
#define ARITH_SUB (INSN_OP(2) | INSN_OP3(0x04))
#define ARITH_SUBCC (INSN_OP(2) | INSN_OP3(0x14))
-#define ARITH_ADDX (INSN_OP(2) | INSN_OP3(0x10))
+#define ARITH_ADDX (INSN_OP(2) | INSN_OP3(0x08))
#define ARITH_SUBX (INSN_OP(2) | INSN_OP3(0x0c))
#define ARITH_UMUL (INSN_OP(2) | INSN_OP3(0x0a))
#define ARITH_UDIV (INSN_OP(2) | INSN_OP3(0x0e))
@@ -251,6 +186,7 @@ static inline int tcg_target_const_match(tcg_target_long val,
#define ARITH_UDIVX (INSN_OP(2) | INSN_OP3(0x0d))
#define ARITH_SDIVX (INSN_OP(2) | INSN_OP3(0x2d))
#define ARITH_MOVCC (INSN_OP(2) | INSN_OP3(0x2c))
+#define ARITH_MOVR (INSN_OP(2) | INSN_OP3(0x2f))
#define SHIFT_SLL (INSN_OP(2) | INSN_OP3(0x25))
#define SHIFT_SRL (INSN_OP(2) | INSN_OP3(0x26))
@@ -294,6 +230,119 @@ static inline int tcg_target_const_match(tcg_target_long val,
#define ASI_PRIMARY_LITTLE 0x88
#endif
+#define LDUH_LE (LDUHA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define LDSH_LE (LDSHA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define LDUW_LE (LDUWA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define LDSW_LE (LDSWA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define LDX_LE (LDXA | INSN_ASI(ASI_PRIMARY_LITTLE))
+
+#define STH_LE (STHA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define STW_LE (STWA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define STX_LE (STXA | INSN_ASI(ASI_PRIMARY_LITTLE))
+
+static inline int check_fit_tl(tcg_target_long val, unsigned int bits)
+{
+ return (val << ((sizeof(tcg_target_long) * 8 - bits))
+ >> (sizeof(tcg_target_long) * 8 - bits)) == val;
+}
+
+static inline int check_fit_i32(uint32_t val, unsigned int bits)
+{
+ return ((val << (32 - bits)) >> (32 - bits)) == val;
+}
+
+static void patch_reloc(uint8_t *code_ptr, int type,
+ tcg_target_long value, tcg_target_long addend)
+{
+ uint32_t insn;
+ value += addend;
+ switch (type) {
+ case R_SPARC_32:
+ if (value != (uint32_t)value) {
+ tcg_abort();
+ }
+ *(uint32_t *)code_ptr = value;
+ break;
+ case R_SPARC_WDISP16:
+ value -= (long)code_ptr;
+ if (!check_fit_tl(value >> 2, 16)) {
+ tcg_abort();
+ }
+ insn = *(uint32_t *)code_ptr;
+ insn &= ~INSN_OFF16(-1);
+ insn |= INSN_OFF16(value);
+ *(uint32_t *)code_ptr = insn;
+ break;
+ case R_SPARC_WDISP19:
+ value -= (long)code_ptr;
+ if (!check_fit_tl(value >> 2, 19)) {
+ tcg_abort();
+ }
+ insn = *(uint32_t *)code_ptr;
+ insn &= ~INSN_OFF19(-1);
+ insn |= INSN_OFF19(value);
+ *(uint32_t *)code_ptr = insn;
+ break;
+ default:
+ tcg_abort();
+ }
+}
+
+/* parse target specific constraints */
+static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
+{
+ const char *ct_str;
+
+ ct_str = *pct_str;
+ switch (ct_str[0]) {
+ case 'r':
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
+ break;
+ case 'L': /* qemu_ld/st constraint */
+ ct->ct |= TCG_CT_REG;
+ tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
+ // Helper args
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1);
+ tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2);
+ break;
+ case 'I':
+ ct->ct |= TCG_CT_CONST_S11;
+ break;
+ case 'J':
+ ct->ct |= TCG_CT_CONST_S13;
+ break;
+ case 'Z':
+ ct->ct |= TCG_CT_CONST_ZERO;
+ break;
+ default:
+ return -1;
+ }
+ ct_str++;
+ *pct_str = ct_str;
+ return 0;
+}
+
+/* test if a constant matches the constraint */
+static inline int tcg_target_const_match(tcg_target_long val,
+ const TCGArgConstraint *arg_ct)
+{
+ int ct = arg_ct->ct;
+
+ if (ct & TCG_CT_CONST) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_S11) && check_fit_tl(val, 11)) {
+ return 1;
+ } else if ((ct & TCG_CT_CONST_S13) && check_fit_tl(val, 13)) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
static inline void tcg_out_arith(TCGContext *s, int rd, int rs1, int rs2,
int op)
{
@@ -318,7 +367,9 @@ static void tcg_out_arithc(TCGContext *s, int rd, int rs1,
static inline void tcg_out_mov(TCGContext *s, TCGType type,
TCGReg ret, TCGReg arg)
{
- tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR);
+ if (ret != arg) {
+ tcg_out_arith(s, ret, arg, TCG_REG_G0, ARITH_OR);
+ }
}
static inline void tcg_out_sethi(TCGContext *s, int ret, uint32_t arg)
@@ -359,71 +410,50 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type,
tcg_out_sethi(s, ret, ~arg);
tcg_out_arithi(s, ret, ret, (arg & 0x3ff) | -0x400, ARITH_XOR);
} else {
- tcg_out_movi_imm32(s, TCG_REG_I4, arg >> (TCG_TARGET_REG_BITS / 2));
- tcg_out_arithi(s, TCG_REG_I4, TCG_REG_I4, 32, SHIFT_SLLX);
- tcg_out_movi_imm32(s, ret, arg);
- tcg_out_arith(s, ret, ret, TCG_REG_I4, ARITH_OR);
+ tcg_out_movi_imm32(s, ret, arg >> (TCG_TARGET_REG_BITS / 2));
+ tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
+ tcg_out_movi_imm32(s, TCG_REG_T2, arg);
+ tcg_out_arith(s, ret, ret, TCG_REG_T2, ARITH_OR);
}
}
-static inline void tcg_out_ld_raw(TCGContext *s, int ret,
- tcg_target_long arg)
-{
- tcg_out_sethi(s, ret, arg);
- tcg_out32(s, LDUW | INSN_RD(ret) | INSN_RS1(ret) |
- INSN_IMM13(arg & 0x3ff));
-}
-
-static inline void tcg_out_ld_ptr(TCGContext *s, int ret,
- tcg_target_long arg)
+static inline void tcg_out_ldst_rr(TCGContext *s, int data, int a1,
+ int a2, int op)
{
- if (!check_fit_tl(arg, 10))
- tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ffULL);
- if (TCG_TARGET_REG_BITS == 64) {
- tcg_out32(s, LDX | INSN_RD(ret) | INSN_RS1(ret) |
- INSN_IMM13(arg & 0x3ff));
- } else {
- tcg_out32(s, LDUW | INSN_RD(ret) | INSN_RS1(ret) |
- INSN_IMM13(arg & 0x3ff));
- }
+ tcg_out32(s, op | INSN_RD(data) | INSN_RS1(a1) | INSN_RS2(a2));
}
-static inline void tcg_out_ldst(TCGContext *s, int ret, int addr, int offset, int op)
+static inline void tcg_out_ldst(TCGContext *s, int ret, int addr,
+ int offset, int op)
{
- if (check_fit_tl(offset, 13))
+ if (check_fit_tl(offset, 13)) {
tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(addr) |
INSN_IMM13(offset));
- else {
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I5, offset);
- tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(TCG_REG_I5) |
- INSN_RS2(addr));
+ } else {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, offset);
+ tcg_out_ldst_rr(s, ret, addr, TCG_REG_T1, op);
}
}
-static inline void tcg_out_ldst_asi(TCGContext *s, int ret, int addr,
- int offset, int op, int asi)
-{
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I5, offset);
- tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(TCG_REG_I5) |
- INSN_ASI(asi) | INSN_RS2(addr));
-}
-
static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
TCGReg arg1, tcg_target_long arg2)
{
- if (type == TCG_TYPE_I32)
- tcg_out_ldst(s, ret, arg1, arg2, LDUW);
- else
- tcg_out_ldst(s, ret, arg1, arg2, LDX);
+ tcg_out_ldst(s, ret, arg1, arg2, (type == TCG_TYPE_I32 ? LDUW : LDX));
}
static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
TCGReg arg1, tcg_target_long arg2)
{
- if (type == TCG_TYPE_I32)
- tcg_out_ldst(s, arg, arg1, arg2, STW);
- else
- tcg_out_ldst(s, arg, arg1, arg2, STX);
+ tcg_out_ldst(s, arg, arg1, arg2, (type == TCG_TYPE_I32 ? STW : STX));
+}
+
+static inline void tcg_out_ld_ptr(TCGContext *s, int ret,
+ tcg_target_long arg)
+{
+ if (!check_fit_tl(arg, 10)) {
+ tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ff);
+ }
+ tcg_out_ld(s, TCG_TYPE_PTR, ret, ret, arg & 0x3ff);
}
static inline void tcg_out_sety(TCGContext *s, int rs)
@@ -442,20 +472,21 @@ static inline void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
if (check_fit_tl(val, 13))
tcg_out_arithi(s, reg, reg, val, ARITH_ADD);
else {
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I5, val);
- tcg_out_arith(s, reg, reg, TCG_REG_I5, ARITH_ADD);
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, val);
+ tcg_out_arith(s, reg, reg, TCG_REG_T1, ARITH_ADD);
}
}
}
-static inline void tcg_out_andi(TCGContext *s, int reg, tcg_target_long val)
+static inline void tcg_out_andi(TCGContext *s, int rd, int rs,
+ tcg_target_long val)
{
if (val != 0) {
if (check_fit_tl(val, 13))
- tcg_out_arithi(s, reg, reg, val, ARITH_AND);
+ tcg_out_arithi(s, rd, rs, val, ARITH_AND);
else {
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_I5, val);
- tcg_out_arith(s, reg, reg, TCG_REG_I5, ARITH_AND);
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T1, val);
+ tcg_out_arith(s, rd, rs, TCG_REG_T1, ARITH_AND);
}
}
}
@@ -467,8 +498,8 @@ static void tcg_out_div32(TCGContext *s, int rd, int rs1,
if (uns) {
tcg_out_sety(s, TCG_REG_G0);
} else {
- tcg_out_arithi(s, TCG_REG_I5, rs1, 31, SHIFT_SRA);
- tcg_out_sety(s, TCG_REG_I5);
+ tcg_out_arithi(s, TCG_REG_T1, rs1, 31, SHIFT_SRA);
+ tcg_out_sety(s, TCG_REG_T1);
}
tcg_out_arithc(s, rd, rs1, val2, val2const,
@@ -480,37 +511,7 @@ static inline void tcg_out_nop(TCGContext *s)
tcg_out_sethi(s, TCG_REG_G0, 0);
}
-static void tcg_out_branch_i32(TCGContext *s, int opc, int label_index)
-{
- TCGLabel *l = &s->labels[label_index];
-
- if (l->has_value) {
- tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x2)
- | INSN_OFF22(l->u.value - (unsigned long)s->code_ptr)));
- } else {
- tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP22, label_index, 0);
- tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x2) | 0));
- }
-}
-
-#if TCG_TARGET_REG_BITS == 64
-static void tcg_out_branch_i64(TCGContext *s, int opc, int label_index)
-{
- TCGLabel *l = &s->labels[label_index];
-
- if (l->has_value) {
- tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x1) |
- (0x5 << 19) |
- INSN_OFF19(l->u.value - (unsigned long)s->code_ptr)));
- } else {
- tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, label_index, 0);
- tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x1) |
- (0x5 << 19) | 0));
- }
-}
-#endif
-
-static const uint8_t tcg_cond_to_bcond[10] = {
+static const uint8_t tcg_cond_to_bcond[] = {
[TCG_COND_EQ] = COND_E,
[TCG_COND_NE] = COND_NE,
[TCG_COND_LT] = COND_L,
@@ -523,70 +524,144 @@ static const uint8_t tcg_cond_to_bcond[10] = {
[TCG_COND_GTU] = COND_GU,
};
+static const uint8_t tcg_cond_to_rcond[] = {
+ [TCG_COND_EQ] = RCOND_Z,
+ [TCG_COND_NE] = RCOND_NZ,
+ [TCG_COND_LT] = RCOND_LZ,
+ [TCG_COND_GT] = RCOND_GZ,
+ [TCG_COND_LE] = RCOND_LEZ,
+ [TCG_COND_GE] = RCOND_GEZ
+};
+
+static void tcg_out_bpcc0(TCGContext *s, int scond, int flags, int off19)
+{
+ tcg_out32(s, INSN_OP(0) | INSN_OP2(1) | INSN_COND(scond) | flags | off19);
+}
+
+static void tcg_out_bpcc(TCGContext *s, int scond, int flags, int label)
+{
+ TCGLabel *l = &s->labels[label];
+ int off19;
+
+ if (l->has_value) {
+ off19 = INSN_OFF19(l->u.value - (unsigned long)s->code_ptr);
+ } else {
+ /* Make sure to preserve destinations during retranslation. */
+ off19 = *(uint32_t *)s->code_ptr & INSN_OFF19(-1);
+ tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, label, 0);
+ }
+ tcg_out_bpcc0(s, scond, flags, off19);
+}
+
static void tcg_out_cmp(TCGContext *s, TCGArg c1, TCGArg c2, int c2const)
{
tcg_out_arithc(s, TCG_REG_G0, c1, c2, c2const, ARITH_SUBCC);
}
-static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond,
- TCGArg arg1, TCGArg arg2, int const_arg2,
- int label_index)
+static void tcg_out_brcond_i32(TCGContext *s, TCGCond cond, TCGArg arg1,
+ TCGArg arg2, int const_arg2, int label)
{
tcg_out_cmp(s, arg1, arg2, const_arg2);
- tcg_out_branch_i32(s, tcg_cond_to_bcond[cond], label_index);
+ tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_ICC | BPCC_PT, label);
tcg_out_nop(s);
}
+static void tcg_out_movcc(TCGContext *s, TCGCond cond, int cc, TCGArg ret,
+ TCGArg v1, int v1const)
+{
+ tcg_out32(s, ARITH_MOVCC | cc | INSN_RD(ret)
+ | INSN_RS1(tcg_cond_to_bcond[cond])
+ | (v1const ? INSN_IMM11(v1) : INSN_RS2(v1)));
+}
+
+static void tcg_out_movcond_i32(TCGContext *s, TCGCond cond, TCGArg ret,
+ TCGArg c1, TCGArg c2, int c2const,
+ TCGArg v1, int v1const)
+{
+ tcg_out_cmp(s, c1, c2, c2const);
+ tcg_out_movcc(s, cond, MOVCC_ICC, ret, v1, v1const);
+}
+
#if TCG_TARGET_REG_BITS == 64
-static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond,
- TCGArg arg1, TCGArg arg2, int const_arg2,
- int label_index)
+static void tcg_out_brcond_i64(TCGContext *s, TCGCond cond, TCGArg arg1,
+ TCGArg arg2, int const_arg2, int label)
{
- tcg_out_cmp(s, arg1, arg2, const_arg2);
- tcg_out_branch_i64(s, tcg_cond_to_bcond[cond], label_index);
+ /* For 64-bit signed comparisons vs zero, we can avoid the compare. */
+ if (arg2 == 0 && !is_unsigned_cond(cond)) {
+ TCGLabel *l = &s->labels[label];
+ int off16;
+
+ if (l->has_value) {
+ off16 = INSN_OFF16(l->u.value - (unsigned long)s->code_ptr);
+ } else {
+ /* Make sure to preserve destinations during retranslation. */
+ off16 = *(uint32_t *)s->code_ptr & INSN_OFF16(-1);
+ tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP16, label, 0);
+ }
+ tcg_out32(s, INSN_OP(0) | INSN_OP2(3) | BPR_PT | INSN_RS1(arg1)
+ | INSN_COND(tcg_cond_to_rcond[cond]) | off16);
+ } else {
+ tcg_out_cmp(s, arg1, arg2, const_arg2);
+ tcg_out_bpcc(s, tcg_cond_to_bcond[cond], BPCC_XCC | BPCC_PT, label);
+ }
tcg_out_nop(s);
}
+
+static void tcg_out_movr(TCGContext *s, TCGCond cond, TCGArg ret, TCGArg c1,
+ TCGArg v1, int v1const)
+{
+ tcg_out32(s, ARITH_MOVR | INSN_RD(ret) | INSN_RS1(c1)
+ | (tcg_cond_to_rcond[cond] << 10)
+ | (v1const ? INSN_IMM10(v1) : INSN_RS2(v1)));
+}
+
+static void tcg_out_movcond_i64(TCGContext *s, TCGCond cond, TCGArg ret,
+ TCGArg c1, TCGArg c2, int c2const,
+ TCGArg v1, int v1const)
+{
+ /* For 64-bit signed comparisons vs zero, we can avoid the compare.
+ Note that the immediate range is one bit smaller, so we must check
+ for that as well. */
+ if (c2 == 0 && !is_unsigned_cond(cond)
+ && (!v1const || check_fit_tl(v1, 10))) {
+ tcg_out_movr(s, cond, ret, c1, v1, v1const);
+ } else {
+ tcg_out_cmp(s, c1, c2, c2const);
+ tcg_out_movcc(s, cond, MOVCC_XCC, ret, v1, v1const);
+ }
+}
#else
static void tcg_out_brcond2_i32(TCGContext *s, TCGCond cond,
TCGArg al, TCGArg ah,
TCGArg bl, int blconst,
TCGArg bh, int bhconst, int label_dest)
{
- int cc, label_next = gen_new_label();
+ int scond, label_next = gen_new_label();
tcg_out_cmp(s, ah, bh, bhconst);
/* Note that we fill one of the delay slots with the second compare. */
switch (cond) {
case TCG_COND_EQ:
- cc = INSN_COND(tcg_cond_to_bcond[TCG_COND_NE], 0);
- tcg_out_branch_i32(s, cc, label_next);
+ tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_next);
tcg_out_cmp(s, al, bl, blconst);
- cc = INSN_COND(tcg_cond_to_bcond[TCG_COND_EQ], 0);
- tcg_out_branch_i32(s, cc, label_dest);
+ tcg_out_bpcc(s, COND_E, BPCC_ICC | BPCC_PT, label_dest);
break;
case TCG_COND_NE:
- cc = INSN_COND(tcg_cond_to_bcond[TCG_COND_NE], 0);
- tcg_out_branch_i32(s, cc, label_dest);
+ tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_dest);
tcg_out_cmp(s, al, bl, blconst);
- tcg_out_branch_i32(s, cc, label_dest);
+ tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_dest);
break;
default:
- /* ??? One could fairly easily special-case 64-bit unsigned
- compares against 32-bit zero-extended constants. For instance,
- we know that (unsigned)AH < 0 is false and need not emit it.
- Similarly, (unsigned)AH > 0 being true implies AH != 0, so the
- second branch will never be taken. */
- cc = INSN_COND(tcg_cond_to_bcond[cond], 0);
- tcg_out_branch_i32(s, cc, label_dest);
+ scond = tcg_cond_to_bcond[tcg_high_cond(cond)];
+ tcg_out_bpcc(s, scond, BPCC_ICC | BPCC_PT, label_dest);
tcg_out_nop(s);
- cc = INSN_COND(tcg_cond_to_bcond[TCG_COND_NE], 0);
- tcg_out_branch_i32(s, cc, label_next);
+ tcg_out_bpcc(s, COND_NE, BPCC_ICC | BPCC_PT, label_next);
tcg_out_cmp(s, al, bl, blconst);
- cc = INSN_COND(tcg_cond_to_bcond[tcg_unsigned_cond(cond)], 0);
- tcg_out_branch_i32(s, cc, label_dest);
+ scond = tcg_cond_to_bcond[tcg_unsigned_cond(cond)];
+ tcg_out_bpcc(s, scond, BPCC_ICC | BPCC_PT, label_dest);
break;
}
tcg_out_nop(s);
@@ -598,47 +673,42 @@ static void tcg_out_brcond2_i32(TCGContext *s, TCGCond cond,
static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGArg ret,
TCGArg c1, TCGArg c2, int c2const)
{
- TCGArg t;
-
/* For 32-bit comparisons, we can play games with ADDX/SUBX. */
switch (cond) {
+ case TCG_COND_LTU:
+ case TCG_COND_GEU:
+ /* The result of the comparison is in the carry bit. */
+ break;
+
case TCG_COND_EQ:
case TCG_COND_NE:
+ /* For equality, we can transform to inequality vs zero. */
if (c2 != 0) {
tcg_out_arithc(s, ret, c1, c2, c2const, ARITH_XOR);
}
c1 = TCG_REG_G0, c2 = ret, c2const = 0;
- cond = (cond == TCG_COND_EQ ? TCG_COND_LEU : TCG_COND_LTU);
+ cond = (cond == TCG_COND_EQ ? TCG_COND_GEU : TCG_COND_LTU);
break;
case TCG_COND_GTU:
- case TCG_COND_GEU:
- if (c2const && c2 != 0) {
- tcg_out_movi_imm13(s, TCG_REG_I5, c2);
- c2 = TCG_REG_I5;
- }
- t = c1, c1 = c2, c2 = t, c2const = 0;
- cond = tcg_swap_cond(cond);
- break;
-
- case TCG_COND_LTU:
case TCG_COND_LEU:
- break;
+ /* If we don't need to load a constant into a register, we can
+ swap the operands on GTU/LEU. There's no benefit to loading
+ the constant into a temporary register. */
+ if (!c2const || c2 == 0) {
+ TCGArg t = c1;
+ c1 = c2;
+ c2 = t;
+ c2const = 0;
+ cond = tcg_swap_cond(cond);
+ break;
+ }
+ /* FALLTHRU */
default:
tcg_out_cmp(s, c1, c2, c2const);
-#if defined(__sparc_v9__) || defined(__sparc_v8plus__)
tcg_out_movi_imm13(s, ret, 0);
- tcg_out32 (s, ARITH_MOVCC | INSN_RD(ret)
- | INSN_RS1(tcg_cond_to_bcond[cond])
- | MOVCC_ICC | INSN_IMM11(1));
-#else
- t = gen_new_label();
- tcg_out_branch_i32(s, INSN_COND(tcg_cond_to_bcond[cond], 1), t);
- tcg_out_movi_imm13(s, ret, 1);
- tcg_out_movi_imm13(s, ret, 0);
- tcg_out_label(s, t, s->code_ptr);
-#endif
+ tcg_out_movcc(s, cond, MOVCC_ICC, ret, 1, 1);
return;
}
@@ -654,11 +724,16 @@ static void tcg_out_setcond_i32(TCGContext *s, TCGCond cond, TCGArg ret,
static void tcg_out_setcond_i64(TCGContext *s, TCGCond cond, TCGArg ret,
TCGArg c1, TCGArg c2, int c2const)
{
- tcg_out_cmp(s, c1, c2, c2const);
- tcg_out_movi_imm13(s, ret, 0);
- tcg_out32 (s, ARITH_MOVCC | INSN_RD(ret)
- | INSN_RS1(tcg_cond_to_bcond[cond])
- | MOVCC_XCC | INSN_IMM11(1));
+ /* For 64-bit signed comparisons vs zero, we can avoid the compare
+ if the input does not overlap the output. */
+ if (c2 == 0 && !is_unsigned_cond(cond) && c1 != ret) {
+ tcg_out_movi_imm13(s, ret, 0);
+ tcg_out_movr(s, cond, ret, c1, 1, 1);
+ } else {
+ tcg_out_cmp(s, c1, c2, c2const);
+ tcg_out_movi_imm13(s, ret, 0);
+ tcg_out_movcc(s, cond, MOVCC_XCC, ret, 1, 1);
+ }
}
#else
static void tcg_out_setcond2_i32(TCGContext *s, TCGCond cond, TCGArg ret,
@@ -666,56 +741,98 @@ static void tcg_out_setcond2_i32(TCGContext *s, TCGCond cond, TCGArg ret,
TCGArg bl, int blconst,
TCGArg bh, int bhconst)
{
- int lab;
+ int tmp = TCG_REG_T1;
+
+ /* Note that the low parts are fully consumed before tmp is set. */
+ if (ret != ah && (bhconst || ret != bh)) {
+ tmp = ret;
+ }
switch (cond) {
case TCG_COND_EQ:
- tcg_out_setcond_i32(s, TCG_COND_EQ, TCG_REG_I5, al, bl, blconst);
- tcg_out_setcond_i32(s, TCG_COND_EQ, ret, ah, bh, bhconst);
- tcg_out_arith(s, ret, ret, TCG_REG_I5, ARITH_AND);
- break;
-
case TCG_COND_NE:
- tcg_out_setcond_i32(s, TCG_COND_NE, TCG_REG_I5, al, al, blconst);
- tcg_out_setcond_i32(s, TCG_COND_NE, ret, ah, bh, bhconst);
- tcg_out_arith(s, ret, ret, TCG_REG_I5, ARITH_OR);
+ if (bl == 0 && bh == 0) {
+ if (cond == TCG_COND_EQ) {
+ tcg_out_arith(s, TCG_REG_G0, al, ah, ARITH_ORCC);
+ tcg_out_movi(s, TCG_TYPE_I32, ret, 1);
+ } else {
+ tcg_out_arith(s, ret, al, ah, ARITH_ORCC);
+ }
+ } else {
+ tcg_out_setcond_i32(s, cond, tmp, al, bl, blconst);
+ tcg_out_cmp(s, ah, bh, bhconst);
+ tcg_out_mov(s, TCG_TYPE_I32, ret, tmp);
+ }
+ tcg_out_movcc(s, TCG_COND_NE, MOVCC_ICC, ret, cond == TCG_COND_NE, 1);
break;
default:
- lab = gen_new_label();
-
+ /* <= : ah < bh | (ah == bh && al <= bl) */
+ tcg_out_setcond_i32(s, tcg_unsigned_cond(cond), tmp, al, bl, blconst);
tcg_out_cmp(s, ah, bh, bhconst);
- tcg_out_branch_i32(s, INSN_COND(tcg_cond_to_bcond[cond], 1), lab);
- tcg_out_movi_imm13(s, ret, 1);
- tcg_out_branch_i32(s, INSN_COND(COND_NE, 1), lab);
- tcg_out_movi_imm13(s, ret, 0);
+ tcg_out_mov(s, TCG_TYPE_I32, ret, tmp);
+ tcg_out_movcc(s, TCG_COND_NE, MOVCC_ICC, ret, 0, 1);
+ tcg_out_movcc(s, tcg_high_cond(cond), MOVCC_ICC, ret, 1, 1);
+ break;
+ }
+}
- tcg_out_setcond_i32(s, tcg_unsigned_cond(cond), ret, al, bl, blconst);
+static void tcg_out_addsub2(TCGContext *s, TCGArg rl, TCGArg rh,
+ TCGArg al, TCGArg ah, TCGArg bl, int blconst,
+ TCGArg bh, int bhconst, int opl, int oph)
+{
+ TCGArg tmp = TCG_REG_T1;
- tcg_out_label(s, lab, s->code_ptr);
- break;
+ /* Note that the low parts are fully consumed before tmp is set. */
+ if (rl != ah && (bhconst || rl != bh)) {
+ tmp = rl;
}
+
+ tcg_out_arithc(s, tmp, al, bl, blconst, opl);
+ tcg_out_arithc(s, rh, ah, bh, bhconst, oph);
+ tcg_out_mov(s, TCG_TYPE_I32, rl, tmp);
}
#endif
/* Generate global QEMU prologue and epilogue code */
static void tcg_target_qemu_prologue(TCGContext *s)
{
- tcg_set_frame(s, TCG_REG_I6, TCG_TARGET_CALL_STACK_OFFSET,
- CPU_TEMP_BUF_NLONGS * (int)sizeof(long));
+ int tmp_buf_size, frame_size;
+
+ /* The TCG temp buffer is at the top of the frame, immediately
+ below the frame pointer. */
+ tmp_buf_size = CPU_TEMP_BUF_NLONGS * (int)sizeof(long);
+ tcg_set_frame(s, TCG_REG_I6, TCG_TARGET_STACK_BIAS - tmp_buf_size,
+ tmp_buf_size);
+
+ /* TCG_TARGET_CALL_STACK_OFFSET includes the stack bias, but is
+ otherwise the minimal frame usable by callees. */
+ frame_size = TCG_TARGET_CALL_STACK_OFFSET - TCG_TARGET_STACK_BIAS;
+ frame_size += TCG_STATIC_CALL_ARGS_SIZE + tmp_buf_size;
+ frame_size += TCG_TARGET_STACK_ALIGN - 1;
+ frame_size &= -TCG_TARGET_STACK_ALIGN;
tcg_out32(s, SAVE | INSN_RD(TCG_REG_O6) | INSN_RS1(TCG_REG_O6) |
- INSN_IMM13(-(TCG_TARGET_STACK_MINFRAME +
- CPU_TEMP_BUF_NLONGS * (int)sizeof(long))));
+ INSN_IMM13(-frame_size));
+
+#ifdef CONFIG_USE_GUEST_BASE
+ if (GUEST_BASE != 0) {
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE);
+ tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
+ }
+#endif
+
tcg_out32(s, JMPL | INSN_RD(TCG_REG_G0) | INSN_RS1(TCG_REG_I1) |
INSN_RS2(TCG_REG_G0));
- tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_I0);
+ /* delay slot */
+ tcg_out_nop(s);
+
+ /* No epilogue required. We issue ret + restore directly in the TB. */
}
#if defined(CONFIG_SOFTMMU)
#include "../../softmmu_defs.h"
-#ifdef CONFIG_TCG_PASS_AREG0
/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
int mmu_idx) */
static const void * const qemu_ld_helpers[4] = {
@@ -733,441 +850,307 @@ static const void * const qemu_st_helpers[4] = {
helper_stl_mmu,
helper_stq_mmu,
};
-#else
-/* legacy helper signature: __ld_mmu(target_ulong addr, int
- mmu_idx) */
-static const void * const qemu_ld_helpers[4] = {
- __ldb_mmu,
- __ldw_mmu,
- __ldl_mmu,
- __ldq_mmu,
-};
-/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
- int mmu_idx) */
-static const void * const qemu_st_helpers[4] = {
- __stb_mmu,
- __stw_mmu,
- __stl_mmu,
- __stq_mmu,
-};
-#endif
-#endif
+/* Perform the TLB load and compare.
-#if TARGET_LONG_BITS == 32
-#define TARGET_LD_OP LDUW
-#else
-#define TARGET_LD_OP LDX
-#endif
+ Inputs:
+ ADDRLO_IDX contains the index into ARGS of the low part of the
+ address; the high part of the address is at ADDR_LOW_IDX+1.
-#if defined(CONFIG_SOFTMMU)
-#if HOST_LONG_BITS == 32
-#define TARGET_ADDEND_LD_OP LDUW
+ MEM_INDEX and S_BITS are the memory context and log2 size of the load.
+
+ WHICH is the offset into the CPUTLBEntry structure of the slot to read.
+ This should be offsetof addr_read or addr_write.
+
+ The result of the TLB comparison is in %[ix]cc. The sanitized address
+ is in the returned register, maybe %o0. The TLB addend is in %o1. */
+
+static int tcg_out_tlb_load(TCGContext *s, int addrlo_idx, int mem_index,
+ int s_bits, const TCGArg *args, int which)
+{
+ const int addrlo = args[addrlo_idx];
+ const int r0 = TCG_REG_O0;
+ const int r1 = TCG_REG_O1;
+ const int r2 = TCG_REG_O2;
+ int addr = addrlo;
+ int tlb_ofs;
+
+ if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) {
+ /* Assemble the 64-bit address in R0. */
+ tcg_out_arithi(s, r0, addrlo, 0, SHIFT_SRL);
+ tcg_out_arithi(s, r1, args[addrlo_idx + 1], 32, SHIFT_SLLX);
+ tcg_out_arith(s, r0, r0, r1, ARITH_OR);
+ }
+
+ /* Shift the page number down to tlb-entry. */
+ tcg_out_arithi(s, r1, addrlo,
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS, SHIFT_SRL);
+
+ /* Mask out the page offset, except for the required alignment. */
+ tcg_out_andi(s, r0, addr, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
+
+ /* Compute tlb index, modulo tlb size. */
+ tcg_out_andi(s, r1, r1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
+
+ /* Relative to the current ENV. */
+ tcg_out_arith(s, r1, TCG_AREG0, r1, ARITH_ADD);
+
+ /* Find a base address that can load both tlb comparator and addend. */
+ tlb_ofs = offsetof(CPUArchState, tlb_table[mem_index][0]);
+ if (!check_fit_tl(tlb_ofs + sizeof(CPUTLBEntry), 13)) {
+ tcg_out_addi(s, r1, tlb_ofs);
+ tlb_ofs = 0;
+ }
+
+ /* Load the tlb comparator and the addend. */
+ tcg_out_ld(s, TCG_TYPE_TL, r2, r1, tlb_ofs + which);
+ tcg_out_ld(s, TCG_TYPE_PTR, r1, r1, tlb_ofs+offsetof(CPUTLBEntry, addend));
+
+ /* subcc arg0, arg2, %g0 */
+ tcg_out_cmp(s, r0, r2, 0);
+
+ /* If the guest address must be zero-extended, do so now. */
+ if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) {
+ tcg_out_arithi(s, r0, addrlo, 0, SHIFT_SRL);
+ return r0;
+ }
+ return addrlo;
+}
+#endif /* CONFIG_SOFTMMU */
+
+static const int qemu_ld_opc[8] = {
+#ifdef TARGET_WORDS_BIGENDIAN
+ LDUB, LDUH, LDUW, LDX, LDSB, LDSH, LDSW, LDX
#else
-#define TARGET_ADDEND_LD_OP LDX
-#endif
+ LDUB, LDUH_LE, LDUW_LE, LDX_LE, LDSB, LDSH_LE, LDSW_LE, LDX_LE
#endif
+};
-#ifdef __arch64__
-#define HOST_LD_OP LDX
-#define HOST_ST_OP STX
-#define HOST_SLL_OP SHIFT_SLLX
-#define HOST_SRA_OP SHIFT_SRAX
+static const int qemu_st_opc[4] = {
+#ifdef TARGET_WORDS_BIGENDIAN
+ STB, STH, STW, STX
#else
-#define HOST_LD_OP LDUW
-#define HOST_ST_OP STW
-#define HOST_SLL_OP SHIFT_SLL
-#define HOST_SRA_OP SHIFT_SRA
+ STB, STH_LE, STW_LE, STX_LE
#endif
+};
-static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
- int opc)
+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int sizeop)
{
- int addr_reg, data_reg, arg0, arg1, arg2, mem_index, s_bits;
+ int addrlo_idx = 1, datalo, datahi, addr_reg;
#if defined(CONFIG_SOFTMMU)
- uint32_t *label1_ptr, *label2_ptr;
+ int memi_idx, memi, s_bits, n;
+ uint32_t *label_ptr[2];
#endif
- data_reg = *args++;
- addr_reg = *args++;
- mem_index = *args;
- s_bits = opc & 3;
-
- arg0 = TCG_REG_O0;
- arg1 = TCG_REG_O1;
- arg2 = TCG_REG_O2;
+ datahi = datalo = args[0];
+ if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
+ datahi = args[1];
+ addrlo_idx = 2;
+ }
#if defined(CONFIG_SOFTMMU)
- /* srl addr_reg, x, arg1 */
- tcg_out_arithi(s, arg1, addr_reg, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS,
- SHIFT_SRL);
- /* and addr_reg, x, arg0 */
- tcg_out_arithi(s, arg0, addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1),
- ARITH_AND);
-
- /* and arg1, x, arg1 */
- tcg_out_andi(s, arg1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
-
- /* add arg1, x, arg1 */
- tcg_out_addi(s, arg1, offsetof(CPUArchState,
- tlb_table[mem_index][0].addr_read));
+ memi_idx = addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS);
+ memi = args[memi_idx];
+ s_bits = sizeop & 3;
+
+ addr_reg = tcg_out_tlb_load(s, addrlo_idx, memi, s_bits, args,
+ offsetof(CPUTLBEntry, addr_read));
+
+ if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
+ int reg64;
+
+ /* bne,pn %[xi]cc, label0 */
+ label_ptr[0] = (uint32_t *)s->code_ptr;
+ tcg_out_bpcc0(s, COND_NE, BPCC_PN
+ | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
+
+ /* TLB Hit. */
+ /* Load all 64-bits into an O/G register. */
+ reg64 = (datalo < 16 ? datalo : TCG_REG_O0);
+ tcg_out_ldst_rr(s, reg64, addr_reg, TCG_REG_O1, qemu_ld_opc[sizeop]);
+
+ /* Move the two 32-bit pieces into the destination registers. */
+ tcg_out_arithi(s, datahi, reg64, 32, SHIFT_SRLX);
+ if (reg64 != datalo) {
+ tcg_out_mov(s, TCG_TYPE_I32, datalo, reg64);
+ }
- /* add env, arg1, arg1 */
- tcg_out_arith(s, arg1, TCG_AREG0, arg1, ARITH_ADD);
+ /* b,a,pt label1 */
+ label_ptr[1] = (uint32_t *)s->code_ptr;
+ tcg_out_bpcc0(s, COND_A, BPCC_A | BPCC_PT, 0);
+ } else {
+ /* The fast path is exactly one insn. Thus we can perform the
+ entire TLB Hit in the (annulled) delay slot of the branch
+ over the TLB Miss case. */
+
+ /* beq,a,pt %[xi]cc, label0 */
+ label_ptr[0] = NULL;
+ label_ptr[1] = (uint32_t *)s->code_ptr;
+ tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
+ | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
+ /* delay slot */
+ tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_O1, qemu_ld_opc[sizeop]);
+ }
- /* ld [arg1], arg2 */
- tcg_out32(s, TARGET_LD_OP | INSN_RD(arg2) | INSN_RS1(arg1) |
- INSN_RS2(TCG_REG_G0));
+ /* TLB Miss. */
- /* subcc arg0, arg2, %g0 */
- tcg_out_arith(s, TCG_REG_G0, arg0, arg2, ARITH_SUBCC);
-
- /* will become:
- be label1
- or
- be,pt %xcc label1 */
- label1_ptr = (uint32_t *)s->code_ptr;
- tcg_out32(s, 0);
-
- /* mov (delay slot) */
- tcg_out_mov(s, TCG_TYPE_PTR, arg0, addr_reg);
-
- /* mov */
- tcg_out_movi(s, TCG_TYPE_I32, arg1, mem_index);
-#ifdef CONFIG_TCG_PASS_AREG0
- /* XXX/FIXME: suboptimal */
- tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
- tcg_target_call_iarg_regs[2]);
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
- tcg_target_call_iarg_regs[1]);
- tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
- tcg_target_call_iarg_regs[0]);
- tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
- TCG_AREG0);
-#endif
+ if (label_ptr[0]) {
+ *label_ptr[0] |= INSN_OFF19((unsigned long)s->code_ptr -
+ (unsigned long)label_ptr[0]);
+ }
+ n = 0;
+ tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[n++], TCG_AREG0);
+ if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+ tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++],
+ args[addrlo_idx + 1]);
+ }
+ tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++],
+ args[addrlo_idx]);
- /* XXX: move that code at the end of the TB */
/* qemu_ld_helper[s_bits](arg0, arg1) */
tcg_out32(s, CALL | ((((tcg_target_ulong)qemu_ld_helpers[s_bits]
- (tcg_target_ulong)s->code_ptr) >> 2)
& 0x3fffffff));
- /* Store AREG0 in stack to avoid ugly glibc bugs that mangle
- global registers */
- // delay slot
- tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
- TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
- sizeof(long), HOST_ST_OP);
- tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
- TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
- sizeof(long), HOST_LD_OP);
-
- /* data_reg = sign_extend(arg0) */
- switch(opc) {
+ /* delay slot */
+ tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[n], memi);
+
+ n = tcg_target_call_oarg_regs[0];
+ /* datalo = sign_extend(arg0) */
+ switch (sizeop) {
case 0 | 4:
- /* sll arg0, 24/56, data_reg */
- tcg_out_arithi(s, data_reg, arg0, (int)sizeof(tcg_target_long) * 8 - 8,
- HOST_SLL_OP);
- /* sra data_reg, 24/56, data_reg */
- tcg_out_arithi(s, data_reg, data_reg,
- (int)sizeof(tcg_target_long) * 8 - 8, HOST_SRA_OP);
+ /* Recall that SRA sign extends from bit 31 through bit 63. */
+ tcg_out_arithi(s, datalo, n, 24, SHIFT_SLL);
+ tcg_out_arithi(s, datalo, datalo, 24, SHIFT_SRA);
break;
case 1 | 4:
- /* sll arg0, 16/48, data_reg */
- tcg_out_arithi(s, data_reg, arg0,
- (int)sizeof(tcg_target_long) * 8 - 16, HOST_SLL_OP);
- /* sra data_reg, 16/48, data_reg */
- tcg_out_arithi(s, data_reg, data_reg,
- (int)sizeof(tcg_target_long) * 8 - 16, HOST_SRA_OP);
+ tcg_out_arithi(s, datalo, n, 16, SHIFT_SLL);
+ tcg_out_arithi(s, datalo, datalo, 16, SHIFT_SRA);
break;
case 2 | 4:
- /* sll arg0, 32, data_reg */
- tcg_out_arithi(s, data_reg, arg0, 32, HOST_SLL_OP);
- /* sra data_reg, 32, data_reg */
- tcg_out_arithi(s, data_reg, data_reg, 32, HOST_SRA_OP);
+ tcg_out_arithi(s, datalo, n, 0, SHIFT_SRA);
break;
+ case 3:
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_out_mov(s, TCG_TYPE_REG, datahi, n);
+ tcg_out_mov(s, TCG_TYPE_REG, datalo, n + 1);
+ break;
+ }
+ /* FALLTHRU */
case 0:
case 1:
case 2:
- case 3:
default:
/* mov */
- tcg_out_mov(s, TCG_TYPE_REG, data_reg, arg0);
+ tcg_out_mov(s, TCG_TYPE_REG, datalo, n);
break;
}
- /* will become:
- ba label2 */
- label2_ptr = (uint32_t *)s->code_ptr;
- tcg_out32(s, 0);
-
- /* nop (delay slot */
- tcg_out_nop(s);
-
- /* label1: */
-#if TARGET_LONG_BITS == 32
- /* be label1 */
- *label1_ptr = (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x2) |
- INSN_OFF22((unsigned long)s->code_ptr -
- (unsigned long)label1_ptr));
-#else
- /* be,pt %xcc label1 */
- *label1_ptr = (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1) |
- (0x5 << 19) | INSN_OFF19((unsigned long)s->code_ptr -
- (unsigned long)label1_ptr));
-#endif
-
- /* ld [arg1 + x], arg1 */
- tcg_out_ldst(s, arg1, arg1, offsetof(CPUTLBEntry, addend) -
- offsetof(CPUTLBEntry, addr_read), TARGET_ADDEND_LD_OP);
-
-#if TARGET_LONG_BITS == 32
- /* and addr_reg, x, arg0 */
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_I5, 0xffffffff);
- tcg_out_arith(s, arg0, addr_reg, TCG_REG_I5, ARITH_AND);
- /* add arg0, arg1, arg0 */
- tcg_out_arith(s, arg0, arg0, arg1, ARITH_ADD);
+ *label_ptr[1] |= INSN_OFF19((unsigned long)s->code_ptr -
+ (unsigned long)label_ptr[1]);
#else
- /* add addr_reg, arg1, arg0 */
- tcg_out_arith(s, arg0, addr_reg, arg1, ARITH_ADD);
-#endif
+ addr_reg = args[addrlo_idx];
+ if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) {
+ tcg_out_arithi(s, TCG_REG_T1, addr_reg, 0, SHIFT_SRL);
+ addr_reg = TCG_REG_T1;
+ }
+ if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
+ int reg64 = (datalo < 16 ? datalo : TCG_REG_O0);
-#else
- arg0 = addr_reg;
-#endif
+ tcg_out_ldst_rr(s, reg64, addr_reg,
+ (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0),
+ qemu_ld_opc[sizeop]);
- switch(opc) {
- case 0:
- /* ldub [arg0], data_reg */
- tcg_out_ldst(s, data_reg, arg0, 0, LDUB);
- break;
- case 0 | 4:
- /* ldsb [arg0], data_reg */
- tcg_out_ldst(s, data_reg, arg0, 0, LDSB);
- break;
- case 1:
-#ifdef TARGET_WORDS_BIGENDIAN
- /* lduh [arg0], data_reg */
- tcg_out_ldst(s, data_reg, arg0, 0, LDUH);
-#else
- /* lduha [arg0] ASI_PRIMARY_LITTLE, data_reg */
- tcg_out_ldst_asi(s, data_reg, arg0, 0, LDUHA, ASI_PRIMARY_LITTLE);
-#endif
- break;
- case 1 | 4:
-#ifdef TARGET_WORDS_BIGENDIAN
- /* ldsh [arg0], data_reg */
- tcg_out_ldst(s, data_reg, arg0, 0, LDSH);
-#else
- /* ldsha [arg0] ASI_PRIMARY_LITTLE, data_reg */
- tcg_out_ldst_asi(s, data_reg, arg0, 0, LDSHA, ASI_PRIMARY_LITTLE);
-#endif
- break;
- case 2:
-#ifdef TARGET_WORDS_BIGENDIAN
- /* lduw [arg0], data_reg */
- tcg_out_ldst(s, data_reg, arg0, 0, LDUW);
-#else
- /* lduwa [arg0] ASI_PRIMARY_LITTLE, data_reg */
- tcg_out_ldst_asi(s, data_reg, arg0, 0, LDUWA, ASI_PRIMARY_LITTLE);
-#endif
- break;
- case 2 | 4:
-#ifdef TARGET_WORDS_BIGENDIAN
- /* ldsw [arg0], data_reg */
- tcg_out_ldst(s, data_reg, arg0, 0, LDSW);
-#else
- /* ldswa [arg0] ASI_PRIMARY_LITTLE, data_reg */
- tcg_out_ldst_asi(s, data_reg, arg0, 0, LDSWA, ASI_PRIMARY_LITTLE);
-#endif
- break;
- case 3:
-#ifdef TARGET_WORDS_BIGENDIAN
- /* ldx [arg0], data_reg */
- tcg_out_ldst(s, data_reg, arg0, 0, LDX);
-#else
- /* ldxa [arg0] ASI_PRIMARY_LITTLE, data_reg */
- tcg_out_ldst_asi(s, data_reg, arg0, 0, LDXA, ASI_PRIMARY_LITTLE);
-#endif
- break;
- default:
- tcg_abort();
+ tcg_out_arithi(s, datahi, reg64, 32, SHIFT_SRLX);
+ if (reg64 != datalo) {
+ tcg_out_mov(s, TCG_TYPE_I32, datalo, reg64);
+ }
+ } else {
+ tcg_out_ldst_rr(s, datalo, addr_reg,
+ (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0),
+ qemu_ld_opc[sizeop]);
}
-
-#if defined(CONFIG_SOFTMMU)
- /* label2: */
- *label2_ptr = (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x2) |
- INSN_OFF22((unsigned long)s->code_ptr -
- (unsigned long)label2_ptr));
-#endif
+#endif /* CONFIG_SOFTMMU */
}
-static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
- int opc)
+static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int sizeop)
{
- int addr_reg, data_reg, arg0, arg1, arg2, mem_index, s_bits;
+ int addrlo_idx = 1, datalo, datahi, addr_reg;
#if defined(CONFIG_SOFTMMU)
- uint32_t *label1_ptr, *label2_ptr;
+ int memi_idx, memi, n, datafull;
+ uint32_t *label_ptr;
#endif
- data_reg = *args++;
- addr_reg = *args++;
- mem_index = *args;
-
- s_bits = opc;
-
- arg0 = TCG_REG_O0;
- arg1 = TCG_REG_O1;
- arg2 = TCG_REG_O2;
+ datahi = datalo = args[0];
+ if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
+ datahi = args[1];
+ addrlo_idx = 2;
+ }
#if defined(CONFIG_SOFTMMU)
- /* srl addr_reg, x, arg1 */
- tcg_out_arithi(s, arg1, addr_reg, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS,
- SHIFT_SRL);
-
- /* and addr_reg, x, arg0 */
- tcg_out_arithi(s, arg0, addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1),
- ARITH_AND);
-
- /* and arg1, x, arg1 */
- tcg_out_andi(s, arg1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
-
- /* add arg1, x, arg1 */
- tcg_out_addi(s, arg1, offsetof(CPUArchState,
- tlb_table[mem_index][0].addr_write));
-
- /* add env, arg1, arg1 */
- tcg_out_arith(s, arg1, TCG_AREG0, arg1, ARITH_ADD);
+ memi_idx = addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS);
+ memi = args[memi_idx];
+
+ addr_reg = tcg_out_tlb_load(s, addrlo_idx, memi, sizeop, args,
+ offsetof(CPUTLBEntry, addr_write));
+
+ datafull = datalo;
+ if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
+ /* Reconstruct the full 64-bit value. */
+ tcg_out_arithi(s, TCG_REG_T1, datalo, 0, SHIFT_SRL);
+ tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX);
+ tcg_out_arith(s, TCG_REG_O2, TCG_REG_T1, TCG_REG_O2, ARITH_OR);
+ datafull = TCG_REG_O2;
+ }
- /* ld [arg1], arg2 */
- tcg_out32(s, TARGET_LD_OP | INSN_RD(arg2) | INSN_RS1(arg1) |
- INSN_RS2(TCG_REG_G0));
+ /* The fast path is exactly one insn. Thus we can perform the entire
+ TLB Hit in the (annulled) delay slot of the branch over TLB Miss. */
+ /* beq,a,pt %[xi]cc, label0 */
+ label_ptr = (uint32_t *)s->code_ptr;
+ tcg_out_bpcc0(s, COND_E, BPCC_A | BPCC_PT
+ | (TARGET_LONG_BITS == 64 ? BPCC_XCC : BPCC_ICC), 0);
+ /* delay slot */
+ tcg_out_ldst_rr(s, datafull, addr_reg, TCG_REG_O1, qemu_st_opc[sizeop]);
+
+ /* TLB Miss. */
+
+ n = 0;
+ tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[n++], TCG_AREG0);
+ if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+ tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++],
+ args[addrlo_idx + 1]);
+ }
+ tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++],
+ args[addrlo_idx]);
+ if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
+ tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], datahi);
+ }
+ tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], datalo);
- /* subcc arg0, arg2, %g0 */
- tcg_out_arith(s, TCG_REG_G0, arg0, arg2, ARITH_SUBCC);
-
- /* will become:
- be label1
- or
- be,pt %xcc label1 */
- label1_ptr = (uint32_t *)s->code_ptr;
- tcg_out32(s, 0);
-
- /* mov (delay slot) */
- tcg_out_mov(s, TCG_TYPE_PTR, arg0, addr_reg);
-
- /* mov */
- tcg_out_mov(s, TCG_TYPE_REG, arg1, data_reg);
-
- /* mov */
- tcg_out_movi(s, TCG_TYPE_I32, arg2, mem_index);
-
-#ifdef CONFIG_TCG_PASS_AREG0
- /* XXX/FIXME: suboptimal */
- tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
- tcg_target_call_iarg_regs[2]);
- tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
- tcg_target_call_iarg_regs[1]);
- tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
- tcg_target_call_iarg_regs[0]);
- tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
- TCG_AREG0);
-#endif
- /* XXX: move that code at the end of the TB */
/* qemu_st_helper[s_bits](arg0, arg1, arg2) */
- tcg_out32(s, CALL | ((((tcg_target_ulong)qemu_st_helpers[s_bits]
+ tcg_out32(s, CALL | ((((tcg_target_ulong)qemu_st_helpers[sizeop]
- (tcg_target_ulong)s->code_ptr) >> 2)
& 0x3fffffff));
- /* Store AREG0 in stack to avoid ugly glibc bugs that mangle
- global registers */
- // delay slot
- tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
- TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
- sizeof(long), HOST_ST_OP);
- tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
- TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
- sizeof(long), HOST_LD_OP);
-
- /* will become:
- ba label2 */
- label2_ptr = (uint32_t *)s->code_ptr;
- tcg_out32(s, 0);
-
- /* nop (delay slot) */
- tcg_out_nop(s);
-
-#if TARGET_LONG_BITS == 32
- /* be label1 */
- *label1_ptr = (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x2) |
- INSN_OFF22((unsigned long)s->code_ptr -
- (unsigned long)label1_ptr));
-#else
- /* be,pt %xcc label1 */
- *label1_ptr = (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1) |
- (0x5 << 19) | INSN_OFF19((unsigned long)s->code_ptr -
- (unsigned long)label1_ptr));
-#endif
-
- /* ld [arg1 + x], arg1 */
- tcg_out_ldst(s, arg1, arg1, offsetof(CPUTLBEntry, addend) -
- offsetof(CPUTLBEntry, addr_write), TARGET_ADDEND_LD_OP);
-
-#if TARGET_LONG_BITS == 32
- /* and addr_reg, x, arg0 */
- tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_I5, 0xffffffff);
- tcg_out_arith(s, arg0, addr_reg, TCG_REG_I5, ARITH_AND);
- /* add arg0, arg1, arg0 */
- tcg_out_arith(s, arg0, arg0, arg1, ARITH_ADD);
-#else
- /* add addr_reg, arg1, arg0 */
- tcg_out_arith(s, arg0, addr_reg, arg1, ARITH_ADD);
-#endif
-
-#else
- arg0 = addr_reg;
-#endif
+ /* delay slot */
+ tcg_out_movi(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n], memi);
- switch(opc) {
- case 0:
- /* stb data_reg, [arg0] */
- tcg_out_ldst(s, data_reg, arg0, 0, STB);
- break;
- case 1:
-#ifdef TARGET_WORDS_BIGENDIAN
- /* sth data_reg, [arg0] */
- tcg_out_ldst(s, data_reg, arg0, 0, STH);
-#else
- /* stha data_reg, [arg0] ASI_PRIMARY_LITTLE */
- tcg_out_ldst_asi(s, data_reg, arg0, 0, STHA, ASI_PRIMARY_LITTLE);
-#endif
- break;
- case 2:
-#ifdef TARGET_WORDS_BIGENDIAN
- /* stw data_reg, [arg0] */
- tcg_out_ldst(s, data_reg, arg0, 0, STW);
-#else
- /* stwa data_reg, [arg0] ASI_PRIMARY_LITTLE */
- tcg_out_ldst_asi(s, data_reg, arg0, 0, STWA, ASI_PRIMARY_LITTLE);
-#endif
- break;
- case 3:
-#ifdef TARGET_WORDS_BIGENDIAN
- /* stx data_reg, [arg0] */
- tcg_out_ldst(s, data_reg, arg0, 0, STX);
+ *label_ptr |= INSN_OFF19((unsigned long)s->code_ptr -
+ (unsigned long)label_ptr);
#else
- /* stxa data_reg, [arg0] ASI_PRIMARY_LITTLE */
- tcg_out_ldst_asi(s, data_reg, arg0, 0, STXA, ASI_PRIMARY_LITTLE);
-#endif
- break;
- default:
- tcg_abort();
+ addr_reg = args[addrlo_idx];
+ if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) {
+ tcg_out_arithi(s, TCG_REG_T1, addr_reg, 0, SHIFT_SRL);
+ addr_reg = TCG_REG_T1;
}
-
-#if defined(CONFIG_SOFTMMU)
- /* label2: */
- *label2_ptr = (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x2) |
- INSN_OFF22((unsigned long)s->code_ptr -
- (unsigned long)label2_ptr));
-#endif
+ if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
+ tcg_out_arithi(s, TCG_REG_T1, datalo, 0, SHIFT_SRL);
+ tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX);
+ tcg_out_arith(s, TCG_REG_O2, TCG_REG_T1, TCG_REG_O2, ARITH_OR);
+ datalo = TCG_REG_O2;
+ }
+ tcg_out_ldst_rr(s, datalo, addr_reg,
+ (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0),
+ qemu_st_opc[sizeop]);
+#endif /* CONFIG_SOFTMMU */
}
static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
@@ -1186,43 +1169,36 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
case INDEX_op_goto_tb:
if (s->tb_jmp_offset) {
/* direct jump method */
- tcg_out_sethi(s, TCG_REG_I5, args[0] & 0xffffe000);
- tcg_out32(s, JMPL | INSN_RD(TCG_REG_G0) | INSN_RS1(TCG_REG_I5) |
- INSN_IMM13((args[0] & 0x1fff)));
+ uint32_t old_insn = *(uint32_t *)s->code_ptr;
s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
+ /* Make sure to preserve links during retranslation. */
+ tcg_out32(s, CALL | (old_insn & ~INSN_OP(-1)));
} else {
/* indirect jump method */
- tcg_out_ld_ptr(s, TCG_REG_I5, (tcg_target_long)(s->tb_next + args[0]));
- tcg_out32(s, JMPL | INSN_RD(TCG_REG_G0) | INSN_RS1(TCG_REG_I5) |
+ tcg_out_ld_ptr(s, TCG_REG_T1,
+ (tcg_target_long)(s->tb_next + args[0]));
+ tcg_out32(s, JMPL | INSN_RD(TCG_REG_G0) | INSN_RS1(TCG_REG_T1) |
INSN_RS2(TCG_REG_G0));
}
tcg_out_nop(s);
s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
break;
case INDEX_op_call:
- if (const_args[0])
+ if (const_args[0]) {
tcg_out32(s, CALL | ((((tcg_target_ulong)args[0]
- (tcg_target_ulong)s->code_ptr) >> 2)
& 0x3fffffff));
- else {
- tcg_out_ld_ptr(s, TCG_REG_I5,
+ } else {
+ tcg_out_ld_ptr(s, TCG_REG_T1,
(tcg_target_long)(s->tb_next + args[0]));
- tcg_out32(s, JMPL | INSN_RD(TCG_REG_O7) | INSN_RS1(TCG_REG_I5) |
+ tcg_out32(s, JMPL | INSN_RD(TCG_REG_O7) | INSN_RS1(TCG_REG_T1) |
INSN_RS2(TCG_REG_G0));
}
- /* Store AREG0 in stack to avoid ugly glibc bugs that mangle
- global registers */
- // delay slot
- tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
- TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
- sizeof(long), HOST_ST_OP);
- tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
- TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
- sizeof(long), HOST_LD_OP);
- break;
- case INDEX_op_jmp:
+ /* delay slot */
+ tcg_out_nop(s);
+ break;
case INDEX_op_br:
- tcg_out_branch_i32(s, COND_A, args[0]);
+ tcg_out_bpcc(s, COND_A, BPCC_PT, args[0]);
tcg_out_nop(s);
break;
case INDEX_op_movi_i32:
@@ -1290,13 +1266,16 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
goto gen_arith;
case INDEX_op_shl_i32:
c = SHIFT_SLL;
- goto gen_arith;
+ do_shift32:
+ /* Limit immediate shift count lest we create an illegal insn. */
+ tcg_out_arithc(s, args[0], args[1], args[2] & 31, const_args[2], c);
+ break;
case INDEX_op_shr_i32:
c = SHIFT_SRL;
- goto gen_arith;
+ goto do_shift32;
case INDEX_op_sar_i32:
c = SHIFT_SRA;
- goto gen_arith;
+ goto do_shift32;
case INDEX_op_mul_i32:
c = ARITH_UMUL;
goto gen_arith;
@@ -1317,11 +1296,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
case INDEX_op_rem_i32:
case INDEX_op_remu_i32:
- tcg_out_div32(s, TCG_REG_I5, args[1], args[2], const_args[2],
+ tcg_out_div32(s, TCG_REG_T1, args[1], args[2], const_args[2],
opc == INDEX_op_remu_i32);
- tcg_out_arithc(s, TCG_REG_I5, TCG_REG_I5, args[2], const_args[2],
+ tcg_out_arithc(s, TCG_REG_T1, TCG_REG_T1, args[2], const_args[2],
ARITH_UMUL);
- tcg_out_arith(s, args[0], args[1], TCG_REG_I5, ARITH_SUB);
+ tcg_out_arith(s, args[0], args[1], TCG_REG_T1, ARITH_SUB);
break;
case INDEX_op_brcond_i32:
@@ -1332,6 +1311,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
tcg_out_setcond_i32(s, args[3], args[0], args[1],
args[2], const_args[2]);
break;
+ case INDEX_op_movcond_i32:
+ tcg_out_movcond_i32(s, args[5], args[0], args[1],
+ args[2], const_args[2], args[3], const_args[3]);
+ break;
#if TCG_TARGET_REG_BITS == 32
case INDEX_op_brcond2_i32:
@@ -1345,16 +1328,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
args[4], const_args[4]);
break;
case INDEX_op_add2_i32:
- tcg_out_arithc(s, args[0], args[2], args[4], const_args[4],
- ARITH_ADDCC);
- tcg_out_arithc(s, args[1], args[3], args[5], const_args[5],
- ARITH_ADDX);
+ tcg_out_addsub2(s, args[0], args[1], args[2], args[3],
+ args[4], const_args[4], args[5], const_args[5],
+ ARITH_ADDCC, ARITH_ADDX);
break;
case INDEX_op_sub2_i32:
- tcg_out_arithc(s, args[0], args[2], args[4], const_args[4],
- ARITH_SUBCC);
- tcg_out_arithc(s, args[1], args[3], args[5], const_args[5],
- ARITH_SUBX);
+ tcg_out_addsub2(s, args[0], args[1], args[2], args[3],
+ args[4], const_args[4], args[5], const_args[5],
+ ARITH_SUBCC, ARITH_SUBX);
break;
case INDEX_op_mulu2_i32:
tcg_out_arithc(s, args[0], args[2], args[3], const_args[3],
@@ -1386,6 +1367,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
tcg_out_qemu_ld(s, args, 2 | 4);
break;
#endif
+ case INDEX_op_qemu_ld64:
+ tcg_out_qemu_ld(s, args, 3);
+ break;
case INDEX_op_qemu_st8:
tcg_out_qemu_st(s, args, 0);
break;
@@ -1395,6 +1379,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
case INDEX_op_qemu_st32:
tcg_out_qemu_st(s, args, 2);
break;
+ case INDEX_op_qemu_st64:
+ tcg_out_qemu_st(s, args, 3);
+ break;
#if TCG_TARGET_REG_BITS == 64
case INDEX_op_movi_i64:
@@ -1411,13 +1398,16 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
break;
case INDEX_op_shl_i64:
c = SHIFT_SLLX;
- goto gen_arith;
+ do_shift64:
+ /* Limit immediate shift count lest we create an illegal insn. */
+ tcg_out_arithc(s, args[0], args[1], args[2] & 63, const_args[2], c);
+ break;
case INDEX_op_shr_i64:
c = SHIFT_SRLX;
- goto gen_arith;
+ goto do_shift64;
case INDEX_op_sar_i64:
c = SHIFT_SRAX;
- goto gen_arith;
+ goto do_shift64;
case INDEX_op_mul_i64:
c = ARITH_MULX;
goto gen_arith;
@@ -1429,11 +1419,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
goto gen_arith;
case INDEX_op_rem_i64:
case INDEX_op_remu_i64:
- tcg_out_arithc(s, TCG_REG_I5, args[1], args[2], const_args[2],
+ tcg_out_arithc(s, TCG_REG_T1, args[1], args[2], const_args[2],
opc == INDEX_op_rem_i64 ? ARITH_SDIVX : ARITH_UDIVX);
- tcg_out_arithc(s, TCG_REG_I5, TCG_REG_I5, args[2], const_args[2],
+ tcg_out_arithc(s, TCG_REG_T1, TCG_REG_T1, args[2], const_args[2],
ARITH_MULX);
- tcg_out_arith(s, args[0], args[1], TCG_REG_I5, ARITH_SUB);
+ tcg_out_arith(s, args[0], args[1], TCG_REG_T1, ARITH_SUB);
break;
case INDEX_op_ext32s_i64:
if (const_args[1]) {
@@ -1458,14 +1448,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
tcg_out_setcond_i64(s, args[3], args[0], args[1],
args[2], const_args[2]);
break;
-
- case INDEX_op_qemu_ld64:
- tcg_out_qemu_ld(s, args, 3);
- break;
- case INDEX_op_qemu_st64:
- tcg_out_qemu_st(s, args, 3);
+ case INDEX_op_movcond_i64:
+ tcg_out_movcond_i64(s, args[5], args[0], args[1],
+ args[2], const_args[2], args[3], const_args[3]);
break;
-
#endif
gen_arith:
tcg_out_arithc(s, args[0], args[1], args[2], const_args[2], c);
@@ -1485,7 +1471,6 @@ static const TCGTargetOpDef sparc_op_defs[] = {
{ INDEX_op_exit_tb, { } },
{ INDEX_op_goto_tb, { } },
{ INDEX_op_call, { "ri" } },
- { INDEX_op_jmp, { "ri" } },
{ INDEX_op_br, { } },
{ INDEX_op_mov_i32, { "r", "r" } },
@@ -1495,55 +1480,42 @@ static const TCGTargetOpDef sparc_op_defs[] = {
{ INDEX_op_ld16u_i32, { "r", "r" } },
{ INDEX_op_ld16s_i32, { "r", "r" } },
{ INDEX_op_ld_i32, { "r", "r" } },
- { INDEX_op_st8_i32, { "r", "r" } },
- { INDEX_op_st16_i32, { "r", "r" } },
- { INDEX_op_st_i32, { "r", "r" } },
-
- { INDEX_op_add_i32, { "r", "r", "rJ" } },
- { INDEX_op_mul_i32, { "r", "r", "rJ" } },
- { INDEX_op_div_i32, { "r", "r", "rJ" } },
- { INDEX_op_divu_i32, { "r", "r", "rJ" } },
- { INDEX_op_rem_i32, { "r", "r", "rJ" } },
- { INDEX_op_remu_i32, { "r", "r", "rJ" } },
- { INDEX_op_sub_i32, { "r", "r", "rJ" } },
- { INDEX_op_and_i32, { "r", "r", "rJ" } },
- { INDEX_op_andc_i32, { "r", "r", "rJ" } },
- { INDEX_op_or_i32, { "r", "r", "rJ" } },
- { INDEX_op_orc_i32, { "r", "r", "rJ" } },
- { INDEX_op_xor_i32, { "r", "r", "rJ" } },
-
- { INDEX_op_shl_i32, { "r", "r", "rJ" } },
- { INDEX_op_shr_i32, { "r", "r", "rJ" } },
- { INDEX_op_sar_i32, { "r", "r", "rJ" } },
+ { INDEX_op_st8_i32, { "rZ", "r" } },
+ { INDEX_op_st16_i32, { "rZ", "r" } },
+ { INDEX_op_st_i32, { "rZ", "r" } },
+
+ { INDEX_op_add_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_mul_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_div_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_divu_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_rem_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_remu_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_sub_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_and_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_andc_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_or_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_orc_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_xor_i32, { "r", "rZ", "rJ" } },
+
+ { INDEX_op_shl_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_shr_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_sar_i32, { "r", "rZ", "rJ" } },
{ INDEX_op_neg_i32, { "r", "rJ" } },
{ INDEX_op_not_i32, { "r", "rJ" } },
- { INDEX_op_brcond_i32, { "r", "rJ" } },
- { INDEX_op_setcond_i32, { "r", "r", "rJ" } },
+ { INDEX_op_brcond_i32, { "rZ", "rJ" } },
+ { INDEX_op_setcond_i32, { "r", "rZ", "rJ" } },
+ { INDEX_op_movcond_i32, { "r", "rZ", "rJ", "rI", "0" } },
#if TCG_TARGET_REG_BITS == 32
- { INDEX_op_brcond2_i32, { "r", "r", "rJ", "rJ" } },
- { INDEX_op_setcond2_i32, { "r", "r", "r", "rJ", "rJ" } },
- { INDEX_op_add2_i32, { "r", "r", "r", "r", "rJ", "rJ" } },
- { INDEX_op_sub2_i32, { "r", "r", "r", "r", "rJ", "rJ" } },
- { INDEX_op_mulu2_i32, { "r", "r", "r", "rJ" } },
-#endif
-
- { INDEX_op_qemu_ld8u, { "r", "L" } },
- { INDEX_op_qemu_ld8s, { "r", "L" } },
- { INDEX_op_qemu_ld16u, { "r", "L" } },
- { INDEX_op_qemu_ld16s, { "r", "L" } },
- { INDEX_op_qemu_ld32, { "r", "L" } },
-#if TCG_TARGET_REG_BITS == 64
- { INDEX_op_qemu_ld32u, { "r", "L" } },
- { INDEX_op_qemu_ld32s, { "r", "L" } },
+ { INDEX_op_brcond2_i32, { "rZ", "rZ", "rJ", "rJ" } },
+ { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rJ", "rJ" } },
+ { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } },
+ { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } },
+ { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rJ" } },
#endif
- { INDEX_op_qemu_st8, { "L", "L" } },
- { INDEX_op_qemu_st16, { "L", "L" } },
- { INDEX_op_qemu_st32, { "L", "L" } },
-
#if TCG_TARGET_REG_BITS == 64
{ INDEX_op_mov_i64, { "r", "r" } },
{ INDEX_op_movi_i64, { "r" } },
@@ -1554,29 +1526,27 @@ static const TCGTargetOpDef sparc_op_defs[] = {
{ INDEX_op_ld32u_i64, { "r", "r" } },
{ INDEX_op_ld32s_i64, { "r", "r" } },
{ INDEX_op_ld_i64, { "r", "r" } },
- { INDEX_op_st8_i64, { "r", "r" } },
- { INDEX_op_st16_i64, { "r", "r" } },
- { INDEX_op_st32_i64, { "r", "r" } },
- { INDEX_op_st_i64, { "r", "r" } },
- { INDEX_op_qemu_ld64, { "L", "L" } },
- { INDEX_op_qemu_st64, { "L", "L" } },
-
- { INDEX_op_add_i64, { "r", "r", "rJ" } },
- { INDEX_op_mul_i64, { "r", "r", "rJ" } },
- { INDEX_op_div_i64, { "r", "r", "rJ" } },
- { INDEX_op_divu_i64, { "r", "r", "rJ" } },
- { INDEX_op_rem_i64, { "r", "r", "rJ" } },
- { INDEX_op_remu_i64, { "r", "r", "rJ" } },
- { INDEX_op_sub_i64, { "r", "r", "rJ" } },
- { INDEX_op_and_i64, { "r", "r", "rJ" } },
- { INDEX_op_andc_i64, { "r", "r", "rJ" } },
- { INDEX_op_or_i64, { "r", "r", "rJ" } },
- { INDEX_op_orc_i64, { "r", "r", "rJ" } },
- { INDEX_op_xor_i64, { "r", "r", "rJ" } },
-
- { INDEX_op_shl_i64, { "r", "r", "rJ" } },
- { INDEX_op_shr_i64, { "r", "r", "rJ" } },
- { INDEX_op_sar_i64, { "r", "r", "rJ" } },
+ { INDEX_op_st8_i64, { "rZ", "r" } },
+ { INDEX_op_st16_i64, { "rZ", "r" } },
+ { INDEX_op_st32_i64, { "rZ", "r" } },
+ { INDEX_op_st_i64, { "rZ", "r" } },
+
+ { INDEX_op_add_i64, { "r", "rZ", "rJ" } },
+ { INDEX_op_mul_i64, { "r", "rZ", "rJ" } },
+ { INDEX_op_div_i64, { "r", "rZ", "rJ" } },
+ { INDEX_op_divu_i64, { "r", "rZ", "rJ" } },
+ { INDEX_op_rem_i64, { "r", "rZ", "rJ" } },
+ { INDEX_op_remu_i64, { "r", "rZ", "rJ" } },
+ { INDEX_op_sub_i64, { "r", "rZ", "rJ" } },
+ { INDEX_op_and_i64, { "r", "rZ", "rJ" } },
+ { INDEX_op_andc_i64, { "r", "rZ", "rJ" } },
+ { INDEX_op_or_i64, { "r", "rZ", "rJ" } },
+ { INDEX_op_orc_i64, { "r", "rZ", "rJ" } },
+ { INDEX_op_xor_i64, { "r", "rZ", "rJ" } },
+
+ { INDEX_op_shl_i64, { "r", "rZ", "rJ" } },
+ { INDEX_op_shr_i64, { "r", "rZ", "rJ" } },
+ { INDEX_op_sar_i64, { "r", "rZ", "rJ" } },
{ INDEX_op_neg_i64, { "r", "rJ" } },
{ INDEX_op_not_i64, { "r", "rJ" } },
@@ -1584,9 +1554,51 @@ static const TCGTargetOpDef sparc_op_defs[] = {
{ INDEX_op_ext32s_i64, { "r", "ri" } },
{ INDEX_op_ext32u_i64, { "r", "ri" } },
- { INDEX_op_brcond_i64, { "r", "rJ" } },
- { INDEX_op_setcond_i64, { "r", "r", "rJ" } },
+ { INDEX_op_brcond_i64, { "rZ", "rJ" } },
+ { INDEX_op_setcond_i64, { "r", "rZ", "rJ" } },
+ { INDEX_op_movcond_i64, { "r", "rZ", "rJ", "rI", "0" } },
+#endif
+
+#if TCG_TARGET_REG_BITS == 64
+ { INDEX_op_qemu_ld8u, { "r", "L" } },
+ { INDEX_op_qemu_ld8s, { "r", "L" } },
+ { INDEX_op_qemu_ld16u, { "r", "L" } },
+ { INDEX_op_qemu_ld16s, { "r", "L" } },
+ { INDEX_op_qemu_ld32, { "r", "L" } },
+ { INDEX_op_qemu_ld32u, { "r", "L" } },
+ { INDEX_op_qemu_ld32s, { "r", "L" } },
+ { INDEX_op_qemu_ld64, { "r", "L" } },
+
+ { INDEX_op_qemu_st8, { "L", "L" } },
+ { INDEX_op_qemu_st16, { "L", "L" } },
+ { INDEX_op_qemu_st32, { "L", "L" } },
+ { INDEX_op_qemu_st64, { "L", "L" } },
+#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
+ { INDEX_op_qemu_ld8u, { "r", "L" } },
+ { INDEX_op_qemu_ld8s, { "r", "L" } },
+ { INDEX_op_qemu_ld16u, { "r", "L" } },
+ { INDEX_op_qemu_ld16s, { "r", "L" } },
+ { INDEX_op_qemu_ld32, { "r", "L" } },
+ { INDEX_op_qemu_ld64, { "r", "r", "L" } },
+
+ { INDEX_op_qemu_st8, { "L", "L" } },
+ { INDEX_op_qemu_st16, { "L", "L" } },
+ { INDEX_op_qemu_st32, { "L", "L" } },
+ { INDEX_op_qemu_st64, { "L", "L", "L" } },
+#else
+ { INDEX_op_qemu_ld8u, { "r", "L", "L" } },
+ { INDEX_op_qemu_ld8s, { "r", "L", "L" } },
+ { INDEX_op_qemu_ld16u, { "r", "L", "L" } },
+ { INDEX_op_qemu_ld16s, { "r", "L", "L" } },
+ { INDEX_op_qemu_ld32, { "r", "L", "L" } },
+ { INDEX_op_qemu_ld64, { "L", "L", "L", "L" } },
+
+ { INDEX_op_qemu_st8, { "L", "L", "L" } },
+ { INDEX_op_qemu_st16, { "L", "L", "L" } },
+ { INDEX_op_qemu_st32, { "L", "L", "L" } },
+ { INDEX_op_qemu_st64, { "L", "L", "L", "L" } },
#endif
+
{ -1 },
};
@@ -1613,25 +1625,23 @@ static void tcg_target_init(TCGContext *s)
(1 << TCG_REG_O7));
tcg_regset_clear(s->reserved_regs);
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_G0);
-#if TCG_TARGET_REG_BITS == 64
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_I4); // for internal use
-#endif
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_I5); // for internal use
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_I6);
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_I7);
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_O6);
- tcg_regset_set_reg(s->reserved_regs, TCG_REG_O7);
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_G0); /* zero */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_G6); /* reserved for os */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_G7); /* thread pointer */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_I6); /* frame pointer */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_I7); /* return address */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_O6); /* stack pointer */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_T1); /* for internal use */
+ tcg_regset_set_reg(s->reserved_regs, TCG_REG_T2); /* for internal use */
+
tcg_add_target_add_op_defs(sparc_op_defs);
}
#if TCG_TARGET_REG_BITS == 64
# define ELF_HOST_MACHINE EM_SPARCV9
-#elif defined(__sparc_v8plus__)
+#else
# define ELF_HOST_MACHINE EM_SPARC32PLUS
# define ELF_HOST_FLAGS EF_SPARC_32PLUS
-#else
-# define ELF_HOST_MACHINE EM_SPARC
#endif
typedef struct {
@@ -1687,3 +1697,18 @@ void tcg_register_jit(void *buf, size_t buf_size)
tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
}
+
+void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr)
+{
+ uint32_t *ptr = (uint32_t *)jmp_addr;
+ tcg_target_long disp = (tcg_target_long)(addr - jmp_addr) >> 2;
+
+ /* We can reach the entire address space for 32-bit. For 64-bit
+ the code_gen_buffer can't be larger than 2GB. */
+ if (TCG_TARGET_REG_BITS == 64 && !check_fit_tl(disp, 30)) {
+ tcg_abort();
+ }
+
+ *ptr = CALL | (disp & 0x3fffffff);
+ flush_icache_range(jmp_addr, jmp_addr + 4);
+}
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index ee2274d..0e7d398 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -62,26 +62,24 @@ typedef enum {
TCG_REG_I7,
} TCGReg;
-#define TCG_CT_CONST_S11 0x100
-#define TCG_CT_CONST_S13 0x200
+#define TCG_CT_CONST_S11 0x100
+#define TCG_CT_CONST_S13 0x200
+#define TCG_CT_CONST_ZERO 0x400
/* used for function call generation */
-#define TCG_REG_CALL_STACK TCG_REG_I6
-#ifdef __arch64__
-// Reserve space for AREG0
-#define TCG_TARGET_STACK_MINFRAME (176 + 4 * (int)sizeof(long) + \
- TCG_STATIC_CALL_ARGS_SIZE)
-#define TCG_TARGET_CALL_STACK_OFFSET (2047 - 16)
-#define TCG_TARGET_STACK_ALIGN 16
+#define TCG_REG_CALL_STACK TCG_REG_O6
+
+#if TCG_TARGET_REG_BITS == 64
+#define TCG_TARGET_STACK_BIAS 2047
+#define TCG_TARGET_STACK_ALIGN 16
+#define TCG_TARGET_CALL_STACK_OFFSET (128 + 6*8 + TCG_TARGET_STACK_BIAS)
#else
-// AREG0 + one word for alignment
-#define TCG_TARGET_STACK_MINFRAME (92 + (2 + 1) * (int)sizeof(long) + \
- TCG_STATIC_CALL_ARGS_SIZE)
-#define TCG_TARGET_CALL_STACK_OFFSET TCG_TARGET_STACK_MINFRAME
-#define TCG_TARGET_STACK_ALIGN 8
+#define TCG_TARGET_STACK_BIAS 0
+#define TCG_TARGET_STACK_ALIGN 8
+#define TCG_TARGET_CALL_STACK_OFFSET (64 + 4 + 6*4)
#endif
-#ifdef __arch64__
+#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_EXTEND_ARGS 1
#endif
@@ -102,6 +100,7 @@ typedef enum {
#define TCG_TARGET_HAS_nand_i32 0
#define TCG_TARGET_HAS_nor_i32 0
#define TCG_TARGET_HAS_deposit_i32 0
+#define TCG_TARGET_HAS_movcond_i32 1
#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_div_i64 1
@@ -123,16 +122,10 @@ typedef enum {
#define TCG_TARGET_HAS_nand_i64 0
#define TCG_TARGET_HAS_nor_i64 0
#define TCG_TARGET_HAS_deposit_i64 0
+#define TCG_TARGET_HAS_movcond_i64 1
#endif
-/* Note: must be synced with dyngen-exec.h */
-#ifdef CONFIG_SOLARIS
-#define TCG_AREG0 TCG_REG_G2
-#elif defined(__sparc_v9__)
-#define TCG_AREG0 TCG_REG_G5
-#else
-#define TCG_AREG0 TCG_REG_G6
-#endif
+#define TCG_AREG0 TCG_REG_I0
static inline void flush_icache_range(tcg_target_ulong start,
tcg_target_ulong stop)
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 169d3b2..0b3cb0b 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -25,335 +25,340 @@
int gen_new_label(void);
+static inline void tcg_gen_op0(TCGOpcode opc)
+{
+ *tcg_ctx.gen_opc_ptr++ = opc;
+}
+
static inline void tcg_gen_op1_i32(TCGOpcode opc, TCGv_i32 arg1)
{
- *gen_opc_ptr++ = opc;
- *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+ *tcg_ctx.gen_opc_ptr++ = opc;
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg1);
}
static inline void tcg_gen_op1_i64(TCGOpcode opc, TCGv_i64 arg1)
{
- *gen_opc_ptr++ = opc;
- *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+ *tcg_ctx.gen_opc_ptr++ = opc;
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg1);
}
static inline void tcg_gen_op1i(TCGOpcode opc, TCGArg arg1)
{
- *gen_opc_ptr++ = opc;
- *gen_opparam_ptr++ = arg1;
+ *tcg_ctx.gen_opc_ptr++ = opc;
+ *tcg_ctx.gen_opparam_ptr++ = arg1;
}
static inline void tcg_gen_op2_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2)
{
- *gen_opc_ptr++ = opc;
- *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
- *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+ *tcg_ctx.gen_opc_ptr++ = opc;
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg2);
}
static inline void tcg_gen_op2_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2)
{
- *gen_opc_ptr++ = opc;
- *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
- *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+ *tcg_ctx.gen_opc_ptr++ = opc;
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg2);
}
static inline void tcg_gen_op2i_i32(TCGOpcode opc, TCGv_i32 arg1, TCGArg arg2)
{
- *gen_opc_ptr++ = opc;
- *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
- *gen_opparam_ptr++ = arg2;
+ *tcg_ctx.gen_opc_ptr++ = opc;
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+ *tcg_ctx.gen_opparam_ptr++ = arg2;
}
static inline void tcg_gen_op2i_i64(TCGOpcode opc, TCGv_i64 arg1, TCGArg arg2)
{
- *gen_opc_ptr++ = opc;
- *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
- *gen_opparam_ptr++ = arg2;
+ *tcg_ctx.gen_opc_ptr++ = opc;
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+ *tcg_ctx.gen_opparam_ptr++ = arg2;
}
static inline void tcg_gen_op2ii(TCGOpcode opc, TCGArg arg1, TCGArg arg2)
{
- *gen_opc_ptr++ = opc;
- *gen_opparam_ptr++ = arg1;
- *gen_opparam_ptr++ = arg2;
+ *tcg_ctx.gen_opc_ptr++ = opc;
+ *tcg_ctx.gen_opparam_ptr++ = arg1;
+ *tcg_ctx.gen_opparam_ptr++ = arg2;
}
static inline void tcg_gen_op3_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2,
TCGv_i32 arg3)
{
- *gen_opc_ptr++ = opc;
- *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
- *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
- *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
+ *tcg_ctx.gen_opc_ptr++ = opc;
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg3);
}
static inline void tcg_gen_op3_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2,
TCGv_i64 arg3)
{
- *gen_opc_ptr++ = opc;
- *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
- *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
- *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
+ *tcg_ctx.gen_opc_ptr++ = opc;
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg3);
}
static inline void tcg_gen_op3i_i32(TCGOpcode opc, TCGv_i32 arg1,
TCGv_i32 arg2, TCGArg arg3)
{
- *gen_opc_ptr++ = opc;
- *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
- *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
- *gen_opparam_ptr++ = arg3;
+ *tcg_ctx.gen_opc_ptr++ = opc;
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+ *tcg_ctx.gen_opparam_ptr++ = arg3;
}
static inline void tcg_gen_op3i_i64(TCGOpcode opc, TCGv_i64 arg1,
TCGv_i64 arg2, TCGArg arg3)
{
- *gen_opc_ptr++ = opc;
- *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
- *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
- *gen_opparam_ptr++ = arg3;
+ *tcg_ctx.gen_opc_ptr++ = opc;
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+ *tcg_ctx.gen_opparam_ptr++ = arg3;
}
static inline void tcg_gen_ldst_op_i32(TCGOpcode opc, TCGv_i32 val,
TCGv_ptr base, TCGArg offset)
{
- *gen_opc_ptr++ = opc;
- *gen_opparam_ptr++ = GET_TCGV_I32(val);
- *gen_opparam_ptr++ = GET_TCGV_PTR(base);
- *gen_opparam_ptr++ = offset;
+ *tcg_ctx.gen_opc_ptr++ = opc;
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(val);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_PTR(base);
+ *tcg_ctx.gen_opparam_ptr++ = offset;
}
static inline void tcg_gen_ldst_op_i64(TCGOpcode opc, TCGv_i64 val,
TCGv_ptr base, TCGArg offset)
{
- *gen_opc_ptr++ = opc;
- *gen_opparam_ptr++ = GET_TCGV_I64(val);
- *gen_opparam_ptr++ = GET_TCGV_PTR(base);
- *gen_opparam_ptr++ = offset;
+ *tcg_ctx.gen_opc_ptr++ = opc;
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(val);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_PTR(base);
+ *tcg_ctx.gen_opparam_ptr++ = offset;
}
static inline void tcg_gen_qemu_ldst_op_i64_i32(TCGOpcode opc, TCGv_i64 val,
TCGv_i32 addr, TCGArg mem_index)
{
- *gen_opc_ptr++ = opc;
- *gen_opparam_ptr++ = GET_TCGV_I64(val);
- *gen_opparam_ptr++ = GET_TCGV_I32(addr);
- *gen_opparam_ptr++ = mem_index;
+ *tcg_ctx.gen_opc_ptr++ = opc;
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(val);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(addr);
+ *tcg_ctx.gen_opparam_ptr++ = mem_index;
}
static inline void tcg_gen_qemu_ldst_op_i64_i64(TCGOpcode opc, TCGv_i64 val,
TCGv_i64 addr, TCGArg mem_index)
{
- *gen_opc_ptr++ = opc;
- *gen_opparam_ptr++ = GET_TCGV_I64(val);
- *gen_opparam_ptr++ = GET_TCGV_I64(addr);
- *gen_opparam_ptr++ = mem_index;
+ *tcg_ctx.gen_opc_ptr++ = opc;
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(val);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(addr);
+ *tcg_ctx.gen_opparam_ptr++ = mem_index;
}
static inline void tcg_gen_op4_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2,
TCGv_i32 arg3, TCGv_i32 arg4)
{
- *gen_opc_ptr++ = opc;
- *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
- *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
- *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
- *gen_opparam_ptr++ = GET_TCGV_I32(arg4);
+ *tcg_ctx.gen_opc_ptr++ = opc;
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg3);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg4);
}
static inline void tcg_gen_op4_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2,
TCGv_i64 arg3, TCGv_i64 arg4)
{
- *gen_opc_ptr++ = opc;
- *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
- *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
- *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
- *gen_opparam_ptr++ = GET_TCGV_I64(arg4);
+ *tcg_ctx.gen_opc_ptr++ = opc;
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg3);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg4);
}
static inline void tcg_gen_op4i_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2,
TCGv_i32 arg3, TCGArg arg4)
{
- *gen_opc_ptr++ = opc;
- *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
- *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
- *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
- *gen_opparam_ptr++ = arg4;
+ *tcg_ctx.gen_opc_ptr++ = opc;
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg3);
+ *tcg_ctx.gen_opparam_ptr++ = arg4;
}
static inline void tcg_gen_op4i_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2,
TCGv_i64 arg3, TCGArg arg4)
{
- *gen_opc_ptr++ = opc;
- *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
- *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
- *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
- *gen_opparam_ptr++ = arg4;
+ *tcg_ctx.gen_opc_ptr++ = opc;
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg3);
+ *tcg_ctx.gen_opparam_ptr++ = arg4;
}
static inline void tcg_gen_op4ii_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2,
TCGArg arg3, TCGArg arg4)
{
- *gen_opc_ptr++ = opc;
- *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
- *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
- *gen_opparam_ptr++ = arg3;
- *gen_opparam_ptr++ = arg4;
+ *tcg_ctx.gen_opc_ptr++ = opc;
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+ *tcg_ctx.gen_opparam_ptr++ = arg3;
+ *tcg_ctx.gen_opparam_ptr++ = arg4;
}
static inline void tcg_gen_op4ii_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2,
TCGArg arg3, TCGArg arg4)
{
- *gen_opc_ptr++ = opc;
- *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
- *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
- *gen_opparam_ptr++ = arg3;
- *gen_opparam_ptr++ = arg4;
+ *tcg_ctx.gen_opc_ptr++ = opc;
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+ *tcg_ctx.gen_opparam_ptr++ = arg3;
+ *tcg_ctx.gen_opparam_ptr++ = arg4;
}
static inline void tcg_gen_op5_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2,
TCGv_i32 arg3, TCGv_i32 arg4, TCGv_i32 arg5)
{
- *gen_opc_ptr++ = opc;
- *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
- *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
- *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
- *gen_opparam_ptr++ = GET_TCGV_I32(arg4);
- *gen_opparam_ptr++ = GET_TCGV_I32(arg5);
+ *tcg_ctx.gen_opc_ptr++ = opc;
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg3);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg4);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg5);
}
static inline void tcg_gen_op5_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2,
TCGv_i64 arg3, TCGv_i64 arg4, TCGv_i64 arg5)
{
- *gen_opc_ptr++ = opc;
- *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
- *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
- *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
- *gen_opparam_ptr++ = GET_TCGV_I64(arg4);
- *gen_opparam_ptr++ = GET_TCGV_I64(arg5);
+ *tcg_ctx.gen_opc_ptr++ = opc;
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg3);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg4);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg5);
}
static inline void tcg_gen_op5i_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2,
TCGv_i32 arg3, TCGv_i32 arg4, TCGArg arg5)
{
- *gen_opc_ptr++ = opc;
- *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
- *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
- *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
- *gen_opparam_ptr++ = GET_TCGV_I32(arg4);
- *gen_opparam_ptr++ = arg5;
+ *tcg_ctx.gen_opc_ptr++ = opc;
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg3);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg4);
+ *tcg_ctx.gen_opparam_ptr++ = arg5;
}
static inline void tcg_gen_op5i_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2,
TCGv_i64 arg3, TCGv_i64 arg4, TCGArg arg5)
{
- *gen_opc_ptr++ = opc;
- *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
- *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
- *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
- *gen_opparam_ptr++ = GET_TCGV_I64(arg4);
- *gen_opparam_ptr++ = arg5;
+ *tcg_ctx.gen_opc_ptr++ = opc;
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg3);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg4);
+ *tcg_ctx.gen_opparam_ptr++ = arg5;
}
static inline void tcg_gen_op5ii_i32(TCGOpcode opc, TCGv_i32 arg1,
TCGv_i32 arg2, TCGv_i32 arg3,
TCGArg arg4, TCGArg arg5)
{
- *gen_opc_ptr++ = opc;
- *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
- *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
- *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
- *gen_opparam_ptr++ = arg4;
- *gen_opparam_ptr++ = arg5;
+ *tcg_ctx.gen_opc_ptr++ = opc;
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg3);
+ *tcg_ctx.gen_opparam_ptr++ = arg4;
+ *tcg_ctx.gen_opparam_ptr++ = arg5;
}
static inline void tcg_gen_op5ii_i64(TCGOpcode opc, TCGv_i64 arg1,
TCGv_i64 arg2, TCGv_i64 arg3,
TCGArg arg4, TCGArg arg5)
{
- *gen_opc_ptr++ = opc;
- *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
- *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
- *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
- *gen_opparam_ptr++ = arg4;
- *gen_opparam_ptr++ = arg5;
+ *tcg_ctx.gen_opc_ptr++ = opc;
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg3);
+ *tcg_ctx.gen_opparam_ptr++ = arg4;
+ *tcg_ctx.gen_opparam_ptr++ = arg5;
}
static inline void tcg_gen_op6_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2,
TCGv_i32 arg3, TCGv_i32 arg4, TCGv_i32 arg5,
TCGv_i32 arg6)
{
- *gen_opc_ptr++ = opc;
- *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
- *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
- *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
- *gen_opparam_ptr++ = GET_TCGV_I32(arg4);
- *gen_opparam_ptr++ = GET_TCGV_I32(arg5);
- *gen_opparam_ptr++ = GET_TCGV_I32(arg6);
+ *tcg_ctx.gen_opc_ptr++ = opc;
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg3);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg4);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg5);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg6);
}
static inline void tcg_gen_op6_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2,
TCGv_i64 arg3, TCGv_i64 arg4, TCGv_i64 arg5,
TCGv_i64 arg6)
{
- *gen_opc_ptr++ = opc;
- *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
- *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
- *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
- *gen_opparam_ptr++ = GET_TCGV_I64(arg4);
- *gen_opparam_ptr++ = GET_TCGV_I64(arg5);
- *gen_opparam_ptr++ = GET_TCGV_I64(arg6);
+ *tcg_ctx.gen_opc_ptr++ = opc;
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg3);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg4);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg5);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg6);
}
static inline void tcg_gen_op6i_i32(TCGOpcode opc, TCGv_i32 arg1, TCGv_i32 arg2,
TCGv_i32 arg3, TCGv_i32 arg4,
TCGv_i32 arg5, TCGArg arg6)
{
- *gen_opc_ptr++ = opc;
- *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
- *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
- *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
- *gen_opparam_ptr++ = GET_TCGV_I32(arg4);
- *gen_opparam_ptr++ = GET_TCGV_I32(arg5);
- *gen_opparam_ptr++ = arg6;
+ *tcg_ctx.gen_opc_ptr++ = opc;
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg3);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg4);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg5);
+ *tcg_ctx.gen_opparam_ptr++ = arg6;
}
static inline void tcg_gen_op6i_i64(TCGOpcode opc, TCGv_i64 arg1, TCGv_i64 arg2,
TCGv_i64 arg3, TCGv_i64 arg4,
TCGv_i64 arg5, TCGArg arg6)
{
- *gen_opc_ptr++ = opc;
- *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
- *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
- *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
- *gen_opparam_ptr++ = GET_TCGV_I64(arg4);
- *gen_opparam_ptr++ = GET_TCGV_I64(arg5);
- *gen_opparam_ptr++ = arg6;
+ *tcg_ctx.gen_opc_ptr++ = opc;
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg3);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg4);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg5);
+ *tcg_ctx.gen_opparam_ptr++ = arg6;
}
static inline void tcg_gen_op6ii_i32(TCGOpcode opc, TCGv_i32 arg1,
TCGv_i32 arg2, TCGv_i32 arg3,
TCGv_i32 arg4, TCGArg arg5, TCGArg arg6)
{
- *gen_opc_ptr++ = opc;
- *gen_opparam_ptr++ = GET_TCGV_I32(arg1);
- *gen_opparam_ptr++ = GET_TCGV_I32(arg2);
- *gen_opparam_ptr++ = GET_TCGV_I32(arg3);
- *gen_opparam_ptr++ = GET_TCGV_I32(arg4);
- *gen_opparam_ptr++ = arg5;
- *gen_opparam_ptr++ = arg6;
+ *tcg_ctx.gen_opc_ptr++ = opc;
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg1);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg2);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg3);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(arg4);
+ *tcg_ctx.gen_opparam_ptr++ = arg5;
+ *tcg_ctx.gen_opparam_ptr++ = arg6;
}
static inline void tcg_gen_op6ii_i64(TCGOpcode opc, TCGv_i64 arg1,
TCGv_i64 arg2, TCGv_i64 arg3,
TCGv_i64 arg4, TCGArg arg5, TCGArg arg6)
{
- *gen_opc_ptr++ = opc;
- *gen_opparam_ptr++ = GET_TCGV_I64(arg1);
- *gen_opparam_ptr++ = GET_TCGV_I64(arg2);
- *gen_opparam_ptr++ = GET_TCGV_I64(arg3);
- *gen_opparam_ptr++ = GET_TCGV_I64(arg4);
- *gen_opparam_ptr++ = arg5;
- *gen_opparam_ptr++ = arg6;
+ *tcg_ctx.gen_opc_ptr++ = opc;
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg1);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg2);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg3);
+ *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(arg4);
+ *tcg_ctx.gen_opparam_ptr++ = arg5;
+ *tcg_ctx.gen_opparam_ptr++ = arg6;
}
static inline void gen_set_label(int n)
@@ -396,10 +401,10 @@ static inline void tcg_gen_helperN(void *func, int flags, int sizemask,
}
/* Note: Both tcg_gen_helper32() and tcg_gen_helper64() are currently
- reserved for helpers in tcg-runtime.c. These helpers are all const
- and pure, hence the call to tcg_gen_callN() with TCG_CALL_CONST |
- TCG_CALL_PURE. This may need to be adjusted if these functions
- start to be used with other helpers. */
+ reserved for helpers in tcg-runtime.c. These helpers all do not read
+ globals and do not have side effects, hence the call to tcg_gen_callN()
+ with TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_SIDE_EFFECTS. This may need
+ to be adjusted if these functions start to be used with other helpers. */
static inline void tcg_gen_helper32(void *func, int sizemask, TCGv_i32 ret,
TCGv_i32 a, TCGv_i32 b)
{
@@ -408,8 +413,9 @@ static inline void tcg_gen_helper32(void *func, int sizemask, TCGv_i32 ret,
fn = tcg_const_ptr(func);
args[0] = GET_TCGV_I32(a);
args[1] = GET_TCGV_I32(b);
- tcg_gen_callN(&tcg_ctx, fn, TCG_CALL_CONST | TCG_CALL_PURE, sizemask,
- GET_TCGV_I32(ret), 2, args);
+ tcg_gen_callN(&tcg_ctx, fn,
+ TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_SIDE_EFFECTS,
+ sizemask, GET_TCGV_I32(ret), 2, args);
tcg_temp_free_ptr(fn);
}
@@ -421,8 +427,9 @@ static inline void tcg_gen_helper64(void *func, int sizemask, TCGv_i64 ret,
fn = tcg_const_ptr(func);
args[0] = GET_TCGV_I64(a);
args[1] = GET_TCGV_I64(b);
- tcg_gen_callN(&tcg_ctx, fn, TCG_CALL_CONST | TCG_CALL_PURE, sizemask,
- GET_TCGV_I64(ret), 2, args);
+ tcg_gen_callN(&tcg_ctx, fn,
+ TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_SIDE_EFFECTS,
+ sizemask, GET_TCGV_I64(ret), 2, args);
tcg_temp_free_ptr(fn);
}
@@ -518,18 +525,34 @@ static inline void tcg_gen_and_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
}
}
-static inline void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+static inline void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
{
- /* some cases can be optimized here */
- if (arg2 == 0) {
+ TCGv_i32 t0;
+ /* Some cases can be optimized here. */
+ switch (arg2) {
+ case 0:
tcg_gen_movi_i32(ret, 0);
- } else if (arg2 == 0xffffffff) {
+ return;
+ case 0xffffffffu:
tcg_gen_mov_i32(ret, arg1);
- } else {
- TCGv_i32 t0 = tcg_const_i32(arg2);
- tcg_gen_and_i32(ret, arg1, t0);
- tcg_temp_free_i32(t0);
- }
+ return;
+ case 0xffu:
+ /* Don't recurse with tcg_gen_ext8u_i32. */
+ if (TCG_TARGET_HAS_ext8u_i32) {
+ tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg1);
+ return;
+ }
+ break;
+ case 0xffffu:
+ if (TCG_TARGET_HAS_ext16u_i32) {
+ tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg1);
+ return;
+ }
+ break;
+ }
+ t0 = tcg_const_i32(arg2);
+ tcg_gen_and_i32(ret, arg1, t0);
+ tcg_temp_free_i32(t0);
}
static inline void tcg_gen_or_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
@@ -543,9 +566,9 @@ static inline void tcg_gen_or_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
static inline void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
{
- /* some cases can be optimized here */
- if (arg2 == 0xffffffff) {
- tcg_gen_movi_i32(ret, 0xffffffff);
+ /* Some cases can be optimized here. */
+ if (arg2 == -1) {
+ tcg_gen_movi_i32(ret, -1);
} else if (arg2 == 0) {
tcg_gen_mov_i32(ret, arg1);
} else {
@@ -566,9 +589,12 @@ static inline void tcg_gen_xor_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
static inline void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
{
- /* some cases can be optimized here */
+ /* Some cases can be optimized here. */
if (arg2 == 0) {
tcg_gen_mov_i32(ret, arg1);
+ } else if (arg2 == -1 && TCG_TARGET_HAS_not_i32) {
+ /* Don't recurse with tcg_gen_not_i32. */
+ tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg1);
} else {
TCGv_i32 t0 = tcg_const_i32(arg2);
tcg_gen_xor_i32(ret, arg1, t0);
@@ -627,29 +653,49 @@ static inline void tcg_gen_sari_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
static inline void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1,
TCGv_i32 arg2, int label_index)
{
- tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_index);
+ if (cond == TCG_COND_ALWAYS) {
+ tcg_gen_br(label_index);
+ } else if (cond != TCG_COND_NEVER) {
+ tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_index);
+ }
}
static inline void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1,
int32_t arg2, int label_index)
{
- TCGv_i32 t0 = tcg_const_i32(arg2);
- tcg_gen_brcond_i32(cond, arg1, t0, label_index);
- tcg_temp_free_i32(t0);
+ if (cond == TCG_COND_ALWAYS) {
+ tcg_gen_br(label_index);
+ } else if (cond != TCG_COND_NEVER) {
+ TCGv_i32 t0 = tcg_const_i32(arg2);
+ tcg_gen_brcond_i32(cond, arg1, t0, label_index);
+ tcg_temp_free_i32(t0);
+ }
}
static inline void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret,
TCGv_i32 arg1, TCGv_i32 arg2)
{
- tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
+ if (cond == TCG_COND_ALWAYS) {
+ tcg_gen_movi_i32(ret, 1);
+ } else if (cond == TCG_COND_NEVER) {
+ tcg_gen_movi_i32(ret, 0);
+ } else {
+ tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
+ }
}
static inline void tcg_gen_setcondi_i32(TCGCond cond, TCGv_i32 ret,
TCGv_i32 arg1, int32_t arg2)
{
- TCGv_i32 t0 = tcg_const_i32(arg2);
- tcg_gen_setcond_i32(cond, ret, arg1, t0);
- tcg_temp_free_i32(t0);
+ if (cond == TCG_COND_ALWAYS) {
+ tcg_gen_movi_i32(ret, 1);
+ } else if (cond == TCG_COND_NEVER) {
+ tcg_gen_movi_i32(ret, 0);
+ } else {
+ TCGv_i32 t0 = tcg_const_i32(arg2);
+ tcg_gen_setcond_i32(cond, ret, arg1, t0);
+ tcg_temp_free_i32(t0);
+ }
}
static inline void tcg_gen_mul_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
@@ -847,6 +893,8 @@ static inline void tcg_gen_add_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
tcg_gen_op6_i32(INDEX_op_add2_i32, TCGV_LOW(ret), TCGV_HIGH(ret),
TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2),
TCGV_HIGH(arg2));
+ /* Allow the optimizer room to replace add2 with two moves. */
+ tcg_gen_op0(INDEX_op_nop);
}
static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
@@ -854,6 +902,8 @@ static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
tcg_gen_op6_i32(INDEX_op_sub2_i32, TCGV_LOW(ret), TCGV_HIGH(ret),
TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2),
TCGV_HIGH(arg2));
+ /* Allow the optimizer room to replace sub2 with two moves. */
+ tcg_gen_op0(INDEX_op_nop);
}
static inline void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
@@ -945,17 +995,27 @@ static inline void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
static inline void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1,
TCGv_i64 arg2, int label_index)
{
- tcg_gen_op6ii_i32(INDEX_op_brcond2_i32,
- TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2),
- TCGV_HIGH(arg2), cond, label_index);
+ if (cond == TCG_COND_ALWAYS) {
+ tcg_gen_br(label_index);
+ } else if (cond != TCG_COND_NEVER) {
+ tcg_gen_op6ii_i32(INDEX_op_brcond2_i32,
+ TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2),
+ TCGV_HIGH(arg2), cond, label_index);
+ }
}
static inline void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret,
TCGv_i64 arg1, TCGv_i64 arg2)
{
- tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
- TCGV_LOW(arg1), TCGV_HIGH(arg1),
- TCGV_LOW(arg2), TCGV_HIGH(arg2), cond);
+ if (cond == TCG_COND_ALWAYS) {
+ tcg_gen_movi_i32(TCGV_LOW(ret), 1);
+ } else if (cond == TCG_COND_NEVER) {
+ tcg_gen_movi_i32(TCGV_LOW(ret), 0);
+ } else {
+ tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
+ TCGV_LOW(arg1), TCGV_HIGH(arg1),
+ TCGV_LOW(arg2), TCGV_HIGH(arg2), cond);
+ }
tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
}
@@ -969,6 +1029,8 @@ static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
tcg_gen_op4_i32(INDEX_op_mulu2_i32, TCGV_LOW(t0), TCGV_HIGH(t0),
TCGV_LOW(arg1), TCGV_LOW(arg2));
+ /* Allow the optimizer room to replace mulu2 with two moves. */
+ tcg_gen_op0(INDEX_op_nop);
tcg_gen_mul_i32(t1, TCGV_LOW(arg1), TCGV_HIGH(arg2));
tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1);
@@ -1120,9 +1182,38 @@ static inline void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
}
}
-static inline void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+static inline void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
{
- TCGv_i64 t0 = tcg_const_i64(arg2);
+ TCGv_i64 t0;
+ /* Some cases can be optimized here. */
+ switch (arg2) {
+ case 0:
+ tcg_gen_movi_i64(ret, 0);
+ return;
+ case 0xffffffffffffffffull:
+ tcg_gen_mov_i64(ret, arg1);
+ return;
+ case 0xffull:
+ /* Don't recurse with tcg_gen_ext8u_i32. */
+ if (TCG_TARGET_HAS_ext8u_i64) {
+ tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg1);
+ return;
+ }
+ break;
+ case 0xffffu:
+ if (TCG_TARGET_HAS_ext16u_i64) {
+ tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg1);
+ return;
+ }
+ break;
+ case 0xffffffffull:
+ if (TCG_TARGET_HAS_ext32u_i64) {
+ tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg1);
+ return;
+ }
+ break;
+ }
+ t0 = tcg_const_i64(arg2);
tcg_gen_and_i64(ret, arg1, t0);
tcg_temp_free_i64(t0);
}
@@ -1138,9 +1229,16 @@ static inline void tcg_gen_or_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
static inline void tcg_gen_ori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
{
- TCGv_i64 t0 = tcg_const_i64(arg2);
- tcg_gen_or_i64(ret, arg1, t0);
- tcg_temp_free_i64(t0);
+ /* Some cases can be optimized here. */
+ if (arg2 == -1) {
+ tcg_gen_movi_i64(ret, -1);
+ } else if (arg2 == 0) {
+ tcg_gen_mov_i64(ret, arg1);
+ } else {
+ TCGv_i64 t0 = tcg_const_i64(arg2);
+ tcg_gen_or_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+ }
}
static inline void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
@@ -1154,9 +1252,17 @@ static inline void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
static inline void tcg_gen_xori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
{
- TCGv_i64 t0 = tcg_const_i64(arg2);
- tcg_gen_xor_i64(ret, arg1, t0);
- tcg_temp_free_i64(t0);
+ /* Some cases can be optimized here. */
+ if (arg2 == 0) {
+ tcg_gen_mov_i64(ret, arg1);
+ } else if (arg2 == -1 && TCG_TARGET_HAS_not_i64) {
+ /* Don't recurse with tcg_gen_not_i64. */
+ tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg1);
+ } else {
+ TCGv_i64 t0 = tcg_const_i64(arg2);
+ tcg_gen_xor_i64(ret, arg1, t0);
+ tcg_temp_free_i64(t0);
+ }
}
static inline void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
@@ -1210,13 +1316,23 @@ static inline void tcg_gen_sari_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
static inline void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1,
TCGv_i64 arg2, int label_index)
{
- tcg_gen_op4ii_i64(INDEX_op_brcond_i64, arg1, arg2, cond, label_index);
+ if (cond == TCG_COND_ALWAYS) {
+ tcg_gen_br(label_index);
+ } else if (cond != TCG_COND_NEVER) {
+ tcg_gen_op4ii_i64(INDEX_op_brcond_i64, arg1, arg2, cond, label_index);
+ }
}
static inline void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret,
TCGv_i64 arg1, TCGv_i64 arg2)
{
- tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
+ if (cond == TCG_COND_ALWAYS) {
+ tcg_gen_movi_i64(ret, 1);
+ } else if (cond == TCG_COND_NEVER) {
+ tcg_gen_movi_i64(ret, 0);
+ } else {
+ tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
+ }
}
static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
@@ -1334,9 +1450,13 @@ static inline void tcg_gen_subi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
static inline void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1,
int64_t arg2, int label_index)
{
- TCGv_i64 t0 = tcg_const_i64(arg2);
- tcg_gen_brcond_i64(cond, arg1, t0, label_index);
- tcg_temp_free_i64(t0);
+ if (cond == TCG_COND_ALWAYS) {
+ tcg_gen_br(label_index);
+ } else if (cond != TCG_COND_NEVER) {
+ TCGv_i64 t0 = tcg_const_i64(arg2);
+ tcg_gen_brcond_i64(cond, arg1, t0, label_index);
+ tcg_temp_free_i64(t0);
+ }
}
static inline void tcg_gen_setcondi_i64(TCGCond cond, TCGv_i64 ret,
@@ -1746,36 +1866,6 @@ static inline void tcg_gen_discard_i64(TCGv_i64 arg)
#endif
}
-static inline void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low, TCGv_i32 high)
-{
-#if TCG_TARGET_REG_BITS == 32
- tcg_gen_mov_i32(TCGV_LOW(dest), low);
- tcg_gen_mov_i32(TCGV_HIGH(dest), high);
-#else
- TCGv_i64 tmp = tcg_temp_new_i64();
- /* This extension is only needed for type correctness.
- We may be able to do better given target specific information. */
- tcg_gen_extu_i32_i64(tmp, high);
- tcg_gen_shli_i64(tmp, tmp, 32);
- tcg_gen_extu_i32_i64(dest, low);
- tcg_gen_or_i64(dest, dest, tmp);
- tcg_temp_free_i64(tmp);
-#endif
-}
-
-static inline void tcg_gen_concat32_i64(TCGv_i64 dest, TCGv_i64 low, TCGv_i64 high)
-{
-#if TCG_TARGET_REG_BITS == 32
- tcg_gen_concat_i32_i64(dest, TCGV_LOW(low), TCGV_LOW(high));
-#else
- TCGv_i64 tmp = tcg_temp_new_i64();
- tcg_gen_ext32u_i64(dest, low);
- tcg_gen_shli_i64(tmp, high, 32);
- tcg_gen_or_i64(dest, dest, tmp);
- tcg_temp_free_i64(tmp);
-#endif
-}
-
static inline void tcg_gen_andc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
{
if (TCG_TARGET_HAS_andc_i32) {
@@ -2048,6 +2138,10 @@ static inline void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1,
uint32_t mask;
TCGv_i32 t1;
+ tcg_debug_assert(ofs < 32);
+ tcg_debug_assert(len <= 32);
+ tcg_debug_assert(ofs + len <= 32);
+
if (ofs == 0 && len == 32) {
tcg_gen_mov_i32(ret, arg2);
return;
@@ -2079,6 +2173,10 @@ static inline void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1,
uint64_t mask;
TCGv_i64 t1;
+ tcg_debug_assert(ofs < 64);
+ tcg_debug_assert(len <= 64);
+ tcg_debug_assert(ofs + len <= 64);
+
if (ofs == 0 && len == 64) {
tcg_gen_mov_i64(ret, arg2);
return;
@@ -2118,6 +2216,102 @@ static inline void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1,
tcg_temp_free_i64(t1);
}
+static inline void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low,
+ TCGv_i32 high)
+{
+#if TCG_TARGET_REG_BITS == 32
+ tcg_gen_mov_i32(TCGV_LOW(dest), low);
+ tcg_gen_mov_i32(TCGV_HIGH(dest), high);
+#else
+ TCGv_i64 tmp = tcg_temp_new_i64();
+ /* These extensions are only needed for type correctness.
+ We may be able to do better given target specific information. */
+ tcg_gen_extu_i32_i64(tmp, high);
+ tcg_gen_extu_i32_i64(dest, low);
+ /* If deposit is available, use it. Otherwise use the extra
+ knowledge that we have of the zero-extensions above. */
+ if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(32, 32)) {
+ tcg_gen_deposit_i64(dest, dest, tmp, 32, 32);
+ } else {
+ tcg_gen_shli_i64(tmp, tmp, 32);
+ tcg_gen_or_i64(dest, dest, tmp);
+ }
+ tcg_temp_free_i64(tmp);
+#endif
+}
+
+static inline void tcg_gen_concat32_i64(TCGv_i64 dest, TCGv_i64 low,
+ TCGv_i64 high)
+{
+ tcg_gen_deposit_i64(dest, low, high, 32, 32);
+}
+
+static inline void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret,
+ TCGv_i32 c1, TCGv_i32 c2,
+ TCGv_i32 v1, TCGv_i32 v2)
+{
+ if (TCG_TARGET_HAS_movcond_i32) {
+ tcg_gen_op6i_i32(INDEX_op_movcond_i32, ret, c1, c2, v1, v2, cond);
+ } else {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ tcg_gen_setcond_i32(cond, t0, c1, c2);
+ tcg_gen_neg_i32(t0, t0);
+ tcg_gen_and_i32(t1, v1, t0);
+ tcg_gen_andc_i32(ret, v2, t0);
+ tcg_gen_or_i32(ret, ret, t1);
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+ }
+}
+
+static inline void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret,
+ TCGv_i64 c1, TCGv_i64 c2,
+ TCGv_i64 v1, TCGv_i64 v2)
+{
+#if TCG_TARGET_REG_BITS == 32
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 t1 = tcg_temp_new_i32();
+ tcg_gen_op6i_i32(INDEX_op_setcond2_i32, t0,
+ TCGV_LOW(c1), TCGV_HIGH(c1),
+ TCGV_LOW(c2), TCGV_HIGH(c2), cond);
+
+ if (TCG_TARGET_HAS_movcond_i32) {
+ tcg_gen_movi_i32(t1, 0);
+ tcg_gen_movcond_i32(TCG_COND_NE, TCGV_LOW(ret), t0, t1,
+ TCGV_LOW(v1), TCGV_LOW(v2));
+ tcg_gen_movcond_i32(TCG_COND_NE, TCGV_HIGH(ret), t0, t1,
+ TCGV_HIGH(v1), TCGV_HIGH(v2));
+ } else {
+ tcg_gen_neg_i32(t0, t0);
+
+ tcg_gen_and_i32(t1, TCGV_LOW(v1), t0);
+ tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(v2), t0);
+ tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(ret), t1);
+
+ tcg_gen_and_i32(t1, TCGV_HIGH(v1), t0);
+ tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(v2), t0);
+ tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), t1);
+ }
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+#else
+ if (TCG_TARGET_HAS_movcond_i64) {
+ tcg_gen_op6i_i64(INDEX_op_movcond_i64, ret, c1, c2, v1, v2, cond);
+ } else {
+ TCGv_i64 t0 = tcg_temp_new_i64();
+ TCGv_i64 t1 = tcg_temp_new_i64();
+ tcg_gen_setcond_i64(cond, t0, c1, c2);
+ tcg_gen_neg_i64(t0, t0);
+ tcg_gen_and_i64(t1, v1, t0);
+ tcg_gen_andc_i64(ret, v2, t0);
+ tcg_gen_or_i64(ret, ret, t1);
+ tcg_temp_free_i64(t0);
+ tcg_temp_free_i64(t1);
+ }
+#endif
+}
+
/***************************************/
/* QEMU specific operations. Their type depend on the QEMU CPU
type. */
@@ -2166,8 +2360,15 @@ static inline void tcg_gen_exit_tb(tcg_target_long val)
tcg_gen_op1i(INDEX_op_exit_tb, val);
}
-static inline void tcg_gen_goto_tb(int idx)
+static inline void tcg_gen_goto_tb(unsigned idx)
{
+ /* We only support two chained exits. */
+ tcg_debug_assert(idx <= 1);
+#ifdef CONFIG_DEBUG_TCG
+ /* Verify that we havn't seen this numbered exit before. */
+ tcg_debug_assert((tcg_ctx.goto_tb_issue_mask & (1 << idx)) == 0);
+ tcg_ctx.goto_tb_issue_mask |= 1 << idx;
+#endif
tcg_gen_op1i(INDEX_op_goto_tb, idx);
}
@@ -2434,6 +2635,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
#define tcg_gen_deposit_tl tcg_gen_deposit_i64
#define tcg_const_tl tcg_const_i64
#define tcg_const_local_tl tcg_const_local_i64
+#define tcg_gen_movcond_tl tcg_gen_movcond_i64
#else
#define tcg_gen_movi_tl tcg_gen_movi_i32
#define tcg_gen_mov_tl tcg_gen_mov_i32
@@ -2505,6 +2707,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index)
#define tcg_gen_deposit_tl tcg_gen_deposit_i32
#define tcg_const_tl tcg_const_i32
#define tcg_const_local_tl tcg_const_local_i32
+#define tcg_gen_movcond_tl tcg_gen_movcond_i32
#endif
#if TCG_TARGET_REG_BITS == 32
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index 8e06d03..9651063 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -36,10 +36,9 @@ DEF(nopn, 0, 0, 1, 0) /* variable number of parameters */
DEF(discard, 1, 0, 0, 0)
-DEF(set_label, 0, 0, 1, 0)
-DEF(call, 0, 1, 2, TCG_OPF_SIDE_EFFECTS) /* variable number of parameters */
-DEF(jmp, 0, 1, 0, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
-DEF(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
+DEF(set_label, 0, 0, 1, TCG_OPF_BB_END)
+DEF(call, 0, 1, 2, TCG_OPF_CALL_CLOBBER) /* variable number of parameters */
+DEF(br, 0, 0, 1, TCG_OPF_BB_END)
#define IMPL(X) (X ? 0 : TCG_OPF_NOT_PRESENT)
#if TCG_TARGET_REG_BITS == 32
@@ -51,15 +50,16 @@ DEF(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
DEF(mov_i32, 1, 1, 0, 0)
DEF(movi_i32, 1, 0, 1, 0)
DEF(setcond_i32, 1, 2, 1, 0)
+DEF(movcond_i32, 1, 4, 1, IMPL(TCG_TARGET_HAS_movcond_i32))
/* load/store */
DEF(ld8u_i32, 1, 1, 1, 0)
DEF(ld8s_i32, 1, 1, 1, 0)
DEF(ld16u_i32, 1, 1, 1, 0)
DEF(ld16s_i32, 1, 1, 1, 0)
DEF(ld_i32, 1, 1, 1, 0)
-DEF(st8_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
-DEF(st16_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
-DEF(st_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS)
+DEF(st8_i32, 0, 2, 1, 0)
+DEF(st16_i32, 0, 2, 1, 0)
+DEF(st_i32, 0, 2, 1, 0)
/* arith */
DEF(add_i32, 1, 2, 0, 0)
DEF(sub_i32, 1, 2, 0, 0)
@@ -81,12 +81,11 @@ DEF(rotl_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rot_i32))
DEF(rotr_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rot_i32))
DEF(deposit_i32, 1, 2, 2, IMPL(TCG_TARGET_HAS_deposit_i32))
-DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
+DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END)
DEF(add2_i32, 2, 4, 0, IMPL(TCG_TARGET_REG_BITS == 32))
DEF(sub2_i32, 2, 4, 0, IMPL(TCG_TARGET_REG_BITS == 32))
-DEF(brcond2_i32, 0, 4, 2,
- TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS | IMPL(TCG_TARGET_REG_BITS == 32))
+DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | IMPL(TCG_TARGET_REG_BITS == 32))
DEF(mulu2_i32, 2, 2, 0, IMPL(TCG_TARGET_REG_BITS == 32))
DEF(setcond2_i32, 1, 4, 1, IMPL(TCG_TARGET_REG_BITS == 32))
@@ -107,6 +106,7 @@ DEF(nor_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nor_i32))
DEF(mov_i64, 1, 1, 0, IMPL64)
DEF(movi_i64, 1, 0, 1, IMPL64)
DEF(setcond_i64, 1, 2, 1, IMPL64)
+DEF(movcond_i64, 1, 4, 1, IMPL64 | IMPL(TCG_TARGET_HAS_movcond_i64))
/* load/store */
DEF(ld8u_i64, 1, 1, 1, IMPL64)
DEF(ld8s_i64, 1, 1, 1, IMPL64)
@@ -115,10 +115,10 @@ DEF(ld16s_i64, 1, 1, 1, IMPL64)
DEF(ld32u_i64, 1, 1, 1, IMPL64)
DEF(ld32s_i64, 1, 1, 1, IMPL64)
DEF(ld_i64, 1, 1, 1, IMPL64)
-DEF(st8_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS | IMPL64)
-DEF(st16_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS | IMPL64)
-DEF(st32_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS | IMPL64)
-DEF(st_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS | IMPL64)
+DEF(st8_i64, 0, 2, 1, IMPL64)
+DEF(st16_i64, 0, 2, 1, IMPL64)
+DEF(st32_i64, 0, 2, 1, IMPL64)
+DEF(st_i64, 0, 2, 1, IMPL64)
/* arith */
DEF(add_i64, 1, 2, 0, IMPL64)
DEF(sub_i64, 1, 2, 0, IMPL64)
@@ -140,7 +140,7 @@ DEF(rotl_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64))
DEF(rotr_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64))
DEF(deposit_i64, 1, 2, 2, IMPL64 | IMPL(TCG_TARGET_HAS_deposit_i64))
-DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS | IMPL64)
+DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | IMPL64)
DEF(ext8s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8s_i64))
DEF(ext16s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext16s_i64))
DEF(ext32s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32s_i64))
@@ -164,8 +164,8 @@ DEF(debug_insn_start, 0, 0, 2, 0)
#else
DEF(debug_insn_start, 0, 0, 1, 0)
#endif
-DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
-DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
+DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_END)
+DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_END)
/* Note: even if TARGET_LONG_BITS is not defined, the INDEX_op
constants must be defined */
#if TCG_TARGET_REG_BITS == 32
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 8386b70..cb193f2 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -62,10 +62,6 @@
#include "elf.h"
-#if defined(CONFIG_USE_GUEST_BASE) && !defined(TCG_TARGET_HAS_GUEST_BASE)
-#error GUEST_BASE not supported on this host.
-#endif
-
/* Forward declarations for functions declared in tcg-target.c and used here. */
static void tcg_target_init(TCGContext *s);
static void tcg_target_qemu_prologue(TCGContext *s);
@@ -89,7 +85,6 @@ static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
tcg_target_long arg2);
static int tcg_target_const_match(tcg_target_long val,
const TCGArgConstraint *arg_ct);
-static int tcg_target_get_call_iarg_regs_count(int flags);
TCGOpDef tcg_op_defs[] = {
#define DEF(s, oargs, iargs, cargs, flags) { #s, oargs, iargs, cargs, iargs + oargs + cargs, flags },
@@ -101,10 +96,6 @@ const size_t tcg_op_defs_max = ARRAY_SIZE(tcg_op_defs);
static TCGRegSet tcg_target_available_regs[2];
static TCGRegSet tcg_target_call_clobber_regs;
-/* XXX: move that inside the context */
-uint16_t *gen_opc_ptr;
-TCGArg *gen_opparam_ptr;
-
static inline void tcg_out8(TCGContext *s, uint8_t v)
{
*s->code_ptr++ = v;
@@ -243,7 +234,6 @@ void tcg_context_init(TCGContext *s)
int *sorted_args;
memset(s, 0, sizeof(*s));
- s->temps = s->static_temps;
s->nb_globals = 0;
/* Count total number of arguments and allocate the corresponding
@@ -299,8 +289,20 @@ void tcg_func_start(TCGContext *s)
s->nb_labels = 0;
s->current_frame_offset = s->frame_start;
- gen_opc_ptr = gen_opc_buf;
- gen_opparam_ptr = gen_opparam_buf;
+#ifdef CONFIG_DEBUG_TCG
+ s->goto_tb_issue_mask = 0;
+#endif
+
+ s->gen_opc_ptr = s->gen_opc_buf;
+ s->gen_opparam_ptr = s->gen_opparam_buf;
+
+#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
+ /* Initialize qemu_ld/st labels to assist code generation at the end of TB
+ for TLB miss cases at the end of TB */
+ s->qemu_ldst_labels = tcg_malloc(sizeof(TCGLabelQemuLdst) *
+ TCG_MAX_QEMU_LDST);
+ s->nb_qemu_ldst_labels = 0;
+#endif
}
static inline void tcg_temp_alloc(TCGContext *s, int n)
@@ -635,23 +637,23 @@ void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags,
}
#endif /* TCG_TARGET_EXTEND_ARGS */
- *gen_opc_ptr++ = INDEX_op_call;
- nparam = gen_opparam_ptr++;
+ *s->gen_opc_ptr++ = INDEX_op_call;
+ nparam = s->gen_opparam_ptr++;
if (ret != TCG_CALL_DUMMY_ARG) {
#if TCG_TARGET_REG_BITS < 64
if (sizemask & 1) {
#ifdef TCG_TARGET_WORDS_BIGENDIAN
- *gen_opparam_ptr++ = ret + 1;
- *gen_opparam_ptr++ = ret;
+ *s->gen_opparam_ptr++ = ret + 1;
+ *s->gen_opparam_ptr++ = ret;
#else
- *gen_opparam_ptr++ = ret;
- *gen_opparam_ptr++ = ret + 1;
+ *s->gen_opparam_ptr++ = ret;
+ *s->gen_opparam_ptr++ = ret + 1;
#endif
nb_rets = 2;
} else
#endif
{
- *gen_opparam_ptr++ = ret;
+ *s->gen_opparam_ptr++ = ret;
nb_rets = 1;
}
} else {
@@ -665,7 +667,7 @@ void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags,
#ifdef TCG_TARGET_CALL_ALIGN_ARGS
/* some targets want aligned 64 bit args */
if (real_args & 1) {
- *gen_opparam_ptr++ = TCG_CALL_DUMMY_ARG;
+ *s->gen_opparam_ptr++ = TCG_CALL_DUMMY_ARG;
real_args++;
}
#endif
@@ -680,28 +682,28 @@ void tcg_gen_callN(TCGContext *s, TCGv_ptr func, unsigned int flags,
have to get more complicated to differentiate between
stack arguments and register arguments. */
#if defined(TCG_TARGET_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
- *gen_opparam_ptr++ = args[i] + 1;
- *gen_opparam_ptr++ = args[i];
+ *s->gen_opparam_ptr++ = args[i] + 1;
+ *s->gen_opparam_ptr++ = args[i];
#else
- *gen_opparam_ptr++ = args[i];
- *gen_opparam_ptr++ = args[i] + 1;
+ *s->gen_opparam_ptr++ = args[i];
+ *s->gen_opparam_ptr++ = args[i] + 1;
#endif
real_args += 2;
continue;
}
#endif /* TCG_TARGET_REG_BITS < 64 */
- *gen_opparam_ptr++ = args[i];
+ *s->gen_opparam_ptr++ = args[i];
real_args++;
}
- *gen_opparam_ptr++ = GET_TCGV_PTR(func);
+ *s->gen_opparam_ptr++ = GET_TCGV_PTR(func);
- *gen_opparam_ptr++ = flags;
+ *s->gen_opparam_ptr++ = flags;
*nparam = (nb_rets << 16) | (real_args + 1);
/* total parameters, needed to go backward in the instruction stream */
- *gen_opparam_ptr++ = 1 + nb_rets + real_args + 3;
+ *s->gen_opparam_ptr++ = 1 + nb_rets + real_args + 3;
#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
for (i = 0; i < nargs; ++i) {
@@ -778,7 +780,11 @@ static void tcg_reg_alloc_start(TCGContext *s)
}
for(i = s->nb_globals; i < s->nb_temps; i++) {
ts = &s->temps[i];
- ts->val_type = TEMP_VAL_DEAD;
+ if (ts->temp_local) {
+ ts->val_type = TEMP_VAL_MEM;
+ } else {
+ ts->val_type = TEMP_VAL_DEAD;
+ }
ts->mem_allocated = 0;
ts->fixed_reg = 0;
}
@@ -861,6 +867,8 @@ static TCGHelperInfo *tcg_find_helper(TCGContext *s, tcg_target_ulong val)
static const char * const cond_name[] =
{
+ [TCG_COND_NEVER] = "never",
+ [TCG_COND_ALWAYS] = "always",
[TCG_COND_EQ] = "eq",
[TCG_COND_NE] = "ne",
[TCG_COND_LT] = "lt",
@@ -884,9 +892,9 @@ void tcg_dump_ops(TCGContext *s)
char buf[128];
first_insn = 1;
- opc_ptr = gen_opc_buf;
- args = gen_opparam_buf;
- while (opc_ptr < gen_opc_ptr) {
+ opc_ptr = s->gen_opc_buf;
+ args = s->gen_opparam_buf;
+ while (opc_ptr < s->gen_opc_ptr) {
c = *opc_ptr++;
def = &tcg_op_defs[c];
if (c == INDEX_op_debug_insn_start) {
@@ -937,11 +945,7 @@ void tcg_dump_ops(TCGContext *s)
args[nb_oargs + i]));
}
}
- } else if (c == INDEX_op_movi_i32
-#if TCG_TARGET_REG_BITS == 64
- || c == INDEX_op_movi_i64
-#endif
- ) {
+ } else if (c == INDEX_op_movi_i32 || c == INDEX_op_movi_i64) {
tcg_target_ulong val;
TCGHelperInfo *th;
@@ -991,17 +995,13 @@ void tcg_dump_ops(TCGContext *s)
}
switch (c) {
case INDEX_op_brcond_i32:
-#if TCG_TARGET_REG_BITS == 32
- case INDEX_op_brcond2_i32:
-#elif TCG_TARGET_REG_BITS == 64
- case INDEX_op_brcond_i64:
-#endif
case INDEX_op_setcond_i32:
-#if TCG_TARGET_REG_BITS == 32
+ case INDEX_op_movcond_i32:
+ case INDEX_op_brcond2_i32:
case INDEX_op_setcond2_i32:
-#elif TCG_TARGET_REG_BITS == 64
+ case INDEX_op_brcond_i64:
case INDEX_op_setcond_i64:
-#endif
+ case INDEX_op_movcond_i64:
if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]]) {
qemu_log(",%s", cond_name[args[k++]]);
} else {
@@ -1188,31 +1188,27 @@ static inline void tcg_set_nop(TCGContext *s, uint16_t *opc_ptr,
}
}
-/* liveness analysis: end of function: globals are live, temps are
- dead. */
-/* XXX: at this stage, not used as there would be little gains because
- most TBs end with a conditional jump. */
-static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps)
+/* liveness analysis: end of function: all temps are dead, and globals
+ should be in memory. */
+static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps,
+ uint8_t *mem_temps)
{
- memset(dead_temps, 0, s->nb_globals);
- memset(dead_temps + s->nb_globals, 1, s->nb_temps - s->nb_globals);
+ memset(dead_temps, 1, s->nb_temps);
+ memset(mem_temps, 1, s->nb_globals);
+ memset(mem_temps + s->nb_globals, 0, s->nb_temps - s->nb_globals);
}
-/* liveness analysis: end of basic block: globals are live, temps are
- dead, local temps are live. */
-static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps)
+/* liveness analysis: end of basic block: all temps are dead, globals
+ and local temps should be in memory. */
+static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
+ uint8_t *mem_temps)
{
int i;
- TCGTemp *ts;
- memset(dead_temps, 0, s->nb_globals);
- ts = &s->temps[s->nb_globals];
+ memset(dead_temps, 1, s->nb_temps);
+ memset(mem_temps, 1, s->nb_globals);
for(i = s->nb_globals; i < s->nb_temps; i++) {
- if (ts->temp_local)
- dead_temps[i] = 0;
- else
- dead_temps[i] = 1;
- ts++;
+ mem_temps[i] = s->temps[i].temp_local;
}
}
@@ -1225,22 +1221,25 @@ static void tcg_liveness_analysis(TCGContext *s)
TCGOpcode op;
TCGArg *args;
const TCGOpDef *def;
- uint8_t *dead_temps;
- unsigned int dead_args;
+ uint8_t *dead_temps, *mem_temps;
+ uint16_t dead_args;
+ uint8_t sync_args;
- gen_opc_ptr++; /* skip end */
+ s->gen_opc_ptr++; /* skip end */
- nb_ops = gen_opc_ptr - gen_opc_buf;
+ nb_ops = s->gen_opc_ptr - s->gen_opc_buf;
s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
+ s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
dead_temps = tcg_malloc(s->nb_temps);
- memset(dead_temps, 1, s->nb_temps);
+ mem_temps = tcg_malloc(s->nb_temps);
+ tcg_la_func_end(s, dead_temps, mem_temps);
- args = gen_opparam_ptr;
+ args = s->gen_opparam_ptr;
op_index = nb_ops - 1;
while (op_index >= 0) {
- op = gen_opc_buf[op_index];
+ op = s->gen_opc_buf[op_index];
def = &tcg_op_defs[op];
switch(op) {
case INDEX_op_call:
@@ -1256,30 +1255,41 @@ static void tcg_liveness_analysis(TCGContext *s)
/* pure functions can be removed if their result is not
used */
- if (call_flags & TCG_CALL_PURE) {
+ if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
for(i = 0; i < nb_oargs; i++) {
arg = args[i];
- if (!dead_temps[arg])
+ if (!dead_temps[arg] || mem_temps[arg]) {
goto do_not_remove_call;
+ }
}
- tcg_set_nop(s, gen_opc_buf + op_index,
+ tcg_set_nop(s, s->gen_opc_buf + op_index,
args - 1, nb_args);
} else {
do_not_remove_call:
/* output args are dead */
dead_args = 0;
+ sync_args = 0;
for(i = 0; i < nb_oargs; i++) {
arg = args[i];
if (dead_temps[arg]) {
dead_args |= (1 << i);
}
+ if (mem_temps[arg]) {
+ sync_args |= (1 << i);
+ }
dead_temps[arg] = 1;
+ mem_temps[arg] = 0;
}
-
- if (!(call_flags & TCG_CALL_CONST)) {
- /* globals are live (they may be used by the call) */
- memset(dead_temps, 0, s->nb_globals);
+
+ if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
+ /* globals should be synced to memory */
+ memset(mem_temps, 1, s->nb_globals);
+ }
+ if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
+ TCG_CALL_NO_READ_GLOBALS))) {
+ /* globals should go back to memory */
+ memset(dead_temps, 1, s->nb_globals);
}
/* input args are live */
@@ -1293,15 +1303,11 @@ static void tcg_liveness_analysis(TCGContext *s)
}
}
s->op_dead_args[op_index] = dead_args;
+ s->op_sync_args[op_index] = sync_args;
}
args--;
}
break;
- case INDEX_op_set_label:
- args--;
- /* mark end of basic block */
- tcg_la_bb_end(s, dead_temps);
- break;
case INDEX_op_debug_insn_start:
args -= def->nb_args;
break;
@@ -1313,11 +1319,62 @@ static void tcg_liveness_analysis(TCGContext *s)
args--;
/* mark the temporary as dead */
dead_temps[args[0]] = 1;
+ mem_temps[args[0]] = 0;
break;
case INDEX_op_end:
break;
- /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
+
+ case INDEX_op_add2_i32:
+ case INDEX_op_sub2_i32:
+ args -= 6;
+ nb_iargs = 4;
+ nb_oargs = 2;
+ /* Test if the high part of the operation is dead, but not
+ the low part. The result can be optimized to a simple
+ add or sub. This happens often for x86_64 guest when the
+ cpu mode is set to 32 bit. */
+ if (dead_temps[args[1]] && !mem_temps[args[1]]) {
+ if (dead_temps[args[0]] && !mem_temps[args[0]]) {
+ goto do_remove;
+ }
+ /* Create the single operation plus nop. */
+ if (op == INDEX_op_add2_i32) {
+ op = INDEX_op_add_i32;
+ } else {
+ op = INDEX_op_sub_i32;
+ }
+ s->gen_opc_buf[op_index] = op;
+ args[1] = args[2];
+ args[2] = args[4];
+ assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
+ tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 3);
+ /* Fall through and mark the single-word operation live. */
+ nb_iargs = 2;
+ nb_oargs = 1;
+ }
+ goto do_not_remove;
+
+ case INDEX_op_mulu2_i32:
+ args -= 4;
+ nb_iargs = 2;
+ nb_oargs = 2;
+ /* Likewise, test for the high part of the operation dead. */
+ if (dead_temps[args[1]] && !mem_temps[args[1]]) {
+ if (dead_temps[args[0]] && !mem_temps[args[0]]) {
+ goto do_remove;
+ }
+ s->gen_opc_buf[op_index] = op = INDEX_op_mul_i32;
+ args[1] = args[2];
+ args[2] = args[3];
+ assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
+ tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 1);
+ /* Fall through and mark the single-word operation live. */
+ nb_oargs = 1;
+ }
+ goto do_not_remove;
+
default:
+ /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
args -= def->nb_args;
nb_iargs = def->nb_iargs;
nb_oargs = def->nb_oargs;
@@ -1328,10 +1385,12 @@ static void tcg_liveness_analysis(TCGContext *s)
if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
for(i = 0; i < nb_oargs; i++) {
arg = args[i];
- if (!dead_temps[arg])
+ if (!dead_temps[arg] || mem_temps[arg]) {
goto do_not_remove;
+ }
}
- tcg_set_nop(s, gen_opc_buf + op_index, args, def->nb_args);
+ do_remove:
+ tcg_set_nop(s, s->gen_opc_buf + op_index, args, def->nb_args);
#ifdef CONFIG_PROFILER
s->del_op_count++;
#endif
@@ -1340,20 +1399,25 @@ static void tcg_liveness_analysis(TCGContext *s)
/* output args are dead */
dead_args = 0;
+ sync_args = 0;
for(i = 0; i < nb_oargs; i++) {
arg = args[i];
if (dead_temps[arg]) {
dead_args |= (1 << i);
}
+ if (mem_temps[arg]) {
+ sync_args |= (1 << i);
+ }
dead_temps[arg] = 1;
+ mem_temps[arg] = 0;
}
/* if end of basic block, update */
if (def->flags & TCG_OPF_BB_END) {
- tcg_la_bb_end(s, dead_temps);
- } else if (def->flags & TCG_OPF_CALL_CLOBBER) {
- /* globals are live */
- memset(dead_temps, 0, s->nb_globals);
+ tcg_la_bb_end(s, dead_temps, mem_temps);
+ } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
+ /* globals should be synced to memory */
+ memset(mem_temps, 1, s->nb_globals);
}
/* input args are live */
@@ -1365,24 +1429,28 @@ static void tcg_liveness_analysis(TCGContext *s)
dead_temps[arg] = 0;
}
s->op_dead_args[op_index] = dead_args;
+ s->op_sync_args[op_index] = sync_args;
}
break;
}
op_index--;
}
- if (args != gen_opparam_buf)
+ if (args != s->gen_opparam_buf) {
tcg_abort();
+ }
}
#else
/* dummy liveness analysis */
static void tcg_liveness_analysis(TCGContext *s)
{
int nb_ops;
- nb_ops = gen_opc_ptr - gen_opc_buf;
+ nb_ops = s->gen_opc_ptr - s->gen_opc_buf;
s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
memset(s->op_dead_args, 0, nb_ops * sizeof(uint16_t));
+ s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
+ memset(s->op_sync_args, 0, nb_ops * sizeof(uint8_t));
}
#endif
@@ -1463,7 +1531,8 @@ static void temp_allocate_frame(TCGContext *s, int temp)
{
TCGTemp *ts;
ts = &s->temps[temp];
-#ifndef __sparc_v9__ /* Sparc64 stack is accessed with offset of 2047 */
+#if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
+ /* Sparc64 stack is accessed with offset of 2047 */
s->current_frame_offset = (s->current_frame_offset +
(tcg_target_long)sizeof(tcg_target_long) - 1) &
~(sizeof(tcg_target_long) - 1);
@@ -1478,22 +1547,33 @@ static void temp_allocate_frame(TCGContext *s, int temp)
s->current_frame_offset += (tcg_target_long)sizeof(tcg_target_long);
}
+/* sync register 'reg' by saving it to the corresponding temporary */
+static inline void tcg_reg_sync(TCGContext *s, int reg)
+{
+ TCGTemp *ts;
+ int temp;
+
+ temp = s->reg_to_temp[reg];
+ ts = &s->temps[temp];
+ assert(ts->val_type == TEMP_VAL_REG);
+ if (!ts->mem_coherent && !ts->fixed_reg) {
+ if (!ts->mem_allocated) {
+ temp_allocate_frame(s, temp);
+ }
+ tcg_out_st(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
+ }
+ ts->mem_coherent = 1;
+}
+
/* free register 'reg' by spilling the corresponding temporary if necessary */
static void tcg_reg_free(TCGContext *s, int reg)
{
- TCGTemp *ts;
int temp;
temp = s->reg_to_temp[reg];
if (temp != -1) {
- ts = &s->temps[temp];
- assert(ts->val_type == TEMP_VAL_REG);
- if (!ts->mem_coherent) {
- if (!ts->mem_allocated)
- temp_allocate_frame(s, temp);
- tcg_out_st(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
- }
- ts->val_type = TEMP_VAL_MEM;
+ tcg_reg_sync(s, reg);
+ s->temps[temp].val_type = TEMP_VAL_MEM;
s->reg_to_temp[reg] = -1;
}
}
@@ -1525,31 +1605,45 @@ static int tcg_reg_alloc(TCGContext *s, TCGRegSet reg1, TCGRegSet reg2)
tcg_abort();
}
-/* save a temporary to memory. 'allocated_regs' is used in case a
+/* mark a temporary as dead. */
+static inline void temp_dead(TCGContext *s, int temp)
+{
+ TCGTemp *ts;
+
+ ts = &s->temps[temp];
+ if (!ts->fixed_reg) {
+ if (ts->val_type == TEMP_VAL_REG) {
+ s->reg_to_temp[ts->reg] = -1;
+ }
+ if (temp < s->nb_globals || ts->temp_local) {
+ ts->val_type = TEMP_VAL_MEM;
+ } else {
+ ts->val_type = TEMP_VAL_DEAD;
+ }
+ }
+}
+
+/* sync a temporary to memory. 'allocated_regs' is used in case a
temporary registers needs to be allocated to store a constant. */
-static void temp_save(TCGContext *s, int temp, TCGRegSet allocated_regs)
+static inline void temp_sync(TCGContext *s, int temp, TCGRegSet allocated_regs)
{
TCGTemp *ts;
- int reg;
ts = &s->temps[temp];
if (!ts->fixed_reg) {
switch(ts->val_type) {
+ case TEMP_VAL_CONST:
+ ts->reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
+ allocated_regs);
+ ts->val_type = TEMP_VAL_REG;
+ s->reg_to_temp[ts->reg] = temp;
+ ts->mem_coherent = 0;
+ tcg_out_movi(s, ts->type, ts->reg, ts->val);
+ /* fallthrough*/
case TEMP_VAL_REG:
- tcg_reg_free(s, ts->reg);
+ tcg_reg_sync(s, ts->reg);
break;
case TEMP_VAL_DEAD:
- ts->val_type = TEMP_VAL_MEM;
- break;
- case TEMP_VAL_CONST:
- reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
- allocated_regs);
- if (!ts->mem_allocated)
- temp_allocate_frame(s, temp);
- tcg_out_movi(s, ts->type, reg, ts->val);
- tcg_out_st(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
- ts->val_type = TEMP_VAL_MEM;
- break;
case TEMP_VAL_MEM:
break;
default:
@@ -1558,6 +1652,20 @@ static void temp_save(TCGContext *s, int temp, TCGRegSet allocated_regs)
}
}
+/* save a temporary to memory. 'allocated_regs' is used in case a
+ temporary registers needs to be allocated to store a constant. */
+static inline void temp_save(TCGContext *s, int temp, TCGRegSet allocated_regs)
+{
+#ifdef USE_LIVENESS_ANALYSIS
+ /* The liveness analysis already ensures that globals are back
+ in memory. Keep an assert for safety. */
+ assert(s->temps[temp].val_type == TEMP_VAL_MEM || s->temps[temp].fixed_reg);
+#else
+ temp_sync(s, temp, allocated_regs);
+ temp_dead(s, temp);
+#endif
+}
+
/* save globals to their canonical location and assume they can be
modified be the following code. 'allocated_regs' is used in case a
temporary registers needs to be allocated to store a constant. */
@@ -1570,6 +1678,23 @@ static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
}
}
+/* sync globals to their canonical location and assume they can be
+ read by the following code. 'allocated_regs' is used in case a
+ temporary registers needs to be allocated to store a constant. */
+static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
+{
+ int i;
+
+ for (i = 0; i < s->nb_globals; i++) {
+#ifdef USE_LIVENESS_ANALYSIS
+ assert(s->temps[i].val_type != TEMP_VAL_REG || s->temps[i].fixed_reg ||
+ s->temps[i].mem_coherent);
+#else
+ temp_sync(s, i, allocated_regs);
+#endif
+ }
+}
+
/* at the end of a basic block, we assume all temporaries are dead and
all globals are stored at their canonical location. */
static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
@@ -1582,10 +1707,13 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
if (ts->temp_local) {
temp_save(s, i, allocated_regs);
} else {
- if (ts->val_type == TEMP_VAL_REG) {
- s->reg_to_temp[ts->reg] = -1;
- }
- ts->val_type = TEMP_VAL_DEAD;
+#ifdef USE_LIVENESS_ANALYSIS
+ /* The liveness analysis already ensures that temps are dead.
+ Keep an assert for safety. */
+ assert(ts->val_type == TEMP_VAL_DEAD);
+#else
+ temp_dead(s, i);
+#endif
}
}
@@ -1593,8 +1721,10 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
}
#define IS_DEAD_ARG(n) ((dead_args >> (n)) & 1)
+#define NEED_SYNC_ARG(n) ((sync_args >> (n)) & 1)
-static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args)
+static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args,
+ uint16_t dead_args, uint8_t sync_args)
{
TCGTemp *ots;
tcg_target_ulong val;
@@ -1613,71 +1743,99 @@ static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args)
ots->val_type = TEMP_VAL_CONST;
ots->val = val;
}
+ if (NEED_SYNC_ARG(0)) {
+ temp_sync(s, args[0], s->reserved_regs);
+ }
+ if (IS_DEAD_ARG(0)) {
+ temp_dead(s, args[0]);
+ }
}
static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
- const TCGArg *args,
- unsigned int dead_args)
+ const TCGArg *args, uint16_t dead_args,
+ uint8_t sync_args)
{
+ TCGRegSet allocated_regs;
TCGTemp *ts, *ots;
- int reg;
- const TCGArgConstraint *arg_ct;
+ const TCGArgConstraint *arg_ct, *oarg_ct;
+ tcg_regset_set(allocated_regs, s->reserved_regs);
ots = &s->temps[args[0]];
ts = &s->temps[args[1]];
- arg_ct = &def->args_ct[0];
+ oarg_ct = &def->args_ct[0];
+ arg_ct = &def->args_ct[1];
+
+ /* If the source value is not in a register, and we're going to be
+ forced to have it in a register in order to perform the copy,
+ then copy the SOURCE value into its own register first. That way
+ we don't have to reload SOURCE the next time it is used. */
+ if (((NEED_SYNC_ARG(0) || ots->fixed_reg) && ts->val_type != TEMP_VAL_REG)
+ || ts->val_type == TEMP_VAL_MEM) {
+ ts->reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
+ if (ts->val_type == TEMP_VAL_MEM) {
+ tcg_out_ld(s, ts->type, ts->reg, ts->mem_reg, ts->mem_offset);
+ ts->mem_coherent = 1;
+ } else if (ts->val_type == TEMP_VAL_CONST) {
+ tcg_out_movi(s, ts->type, ts->reg, ts->val);
+ }
+ s->reg_to_temp[ts->reg] = args[1];
+ ts->val_type = TEMP_VAL_REG;
+ }
- /* XXX: always mark arg dead if IS_DEAD_ARG(1) */
- if (ts->val_type == TEMP_VAL_REG) {
+ if (IS_DEAD_ARG(0) && !ots->fixed_reg) {
+ /* mov to a non-saved dead register makes no sense (even with
+ liveness analysis disabled). */
+ assert(NEED_SYNC_ARG(0));
+ /* The code above should have moved the temp to a register. */
+ assert(ts->val_type == TEMP_VAL_REG);
+ if (!ots->mem_allocated) {
+ temp_allocate_frame(s, args[0]);
+ }
+ tcg_out_st(s, ots->type, ts->reg, ots->mem_reg, ots->mem_offset);
+ if (IS_DEAD_ARG(1)) {
+ temp_dead(s, args[1]);
+ }
+ temp_dead(s, args[0]);
+ } else if (ts->val_type == TEMP_VAL_CONST) {
+ /* propagate constant */
+ if (ots->val_type == TEMP_VAL_REG) {
+ s->reg_to_temp[ots->reg] = -1;
+ }
+ ots->val_type = TEMP_VAL_CONST;
+ ots->val = ts->val;
+ } else {
+ /* The code in the first if block should have moved the
+ temp to a register. */
+ assert(ts->val_type == TEMP_VAL_REG);
if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) {
/* the mov can be suppressed */
- if (ots->val_type == TEMP_VAL_REG)
- s->reg_to_temp[ots->reg] = -1;
- reg = ts->reg;
- s->reg_to_temp[reg] = -1;
- ts->val_type = TEMP_VAL_DEAD;
- } else {
if (ots->val_type == TEMP_VAL_REG) {
- reg = ots->reg;
- } else {
- reg = tcg_reg_alloc(s, arg_ct->u.regs, s->reserved_regs);
- }
- if (ts->reg != reg) {
- tcg_out_mov(s, ots->type, reg, ts->reg);
+ s->reg_to_temp[ots->reg] = -1;
}
- }
- } else if (ts->val_type == TEMP_VAL_MEM) {
- if (ots->val_type == TEMP_VAL_REG) {
- reg = ots->reg;
+ ots->reg = ts->reg;
+ temp_dead(s, args[1]);
} else {
- reg = tcg_reg_alloc(s, arg_ct->u.regs, s->reserved_regs);
+ if (ots->val_type != TEMP_VAL_REG) {
+ /* When allocating a new register, make sure to not spill the
+ input one. */
+ tcg_regset_set_reg(allocated_regs, ts->reg);
+ ots->reg = tcg_reg_alloc(s, oarg_ct->u.regs, allocated_regs);
+ }
+ tcg_out_mov(s, ots->type, ots->reg, ts->reg);
}
- tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
- } else if (ts->val_type == TEMP_VAL_CONST) {
- if (ots->fixed_reg) {
- reg = ots->reg;
- tcg_out_movi(s, ots->type, reg, ts->val);
- } else {
- /* propagate constant */
- if (ots->val_type == TEMP_VAL_REG)
- s->reg_to_temp[ots->reg] = -1;
- ots->val_type = TEMP_VAL_CONST;
- ots->val = ts->val;
- return;
+ ots->val_type = TEMP_VAL_REG;
+ ots->mem_coherent = 0;
+ s->reg_to_temp[ots->reg] = args[0];
+ if (NEED_SYNC_ARG(0)) {
+ tcg_reg_sync(s, ots->reg);
}
- } else {
- tcg_abort();
}
- s->reg_to_temp[reg] = args[0];
- ots->reg = reg;
- ots->val_type = TEMP_VAL_REG;
- ots->mem_coherent = 0;
}
static void tcg_reg_alloc_op(TCGContext *s,
const TCGOpDef *def, TCGOpcode opc,
- const TCGArg *args,
- unsigned int dead_args)
+ const TCGArg *args, uint16_t dead_args,
+ uint8_t sync_args)
{
TCGRegSet allocated_regs;
int i, k, nb_iargs, nb_oargs, reg;
@@ -1757,22 +1915,16 @@ static void tcg_reg_alloc_op(TCGContext *s,
iarg_end: ;
}
+ /* mark dead temporaries and free the associated registers */
+ for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
+ if (IS_DEAD_ARG(i)) {
+ temp_dead(s, args[i]);
+ }
+ }
+
if (def->flags & TCG_OPF_BB_END) {
tcg_reg_alloc_bb_end(s, allocated_regs);
} else {
- /* mark dead temporaries and free the associated registers */
- for(i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
- arg = args[i];
- if (IS_DEAD_ARG(i)) {
- ts = &s->temps[arg];
- if (!ts->fixed_reg) {
- if (ts->val_type == TEMP_VAL_REG)
- s->reg_to_temp[ts->reg] = -1;
- ts->val_type = TEMP_VAL_DEAD;
- }
- }
- }
-
if (def->flags & TCG_OPF_CALL_CLOBBER) {
/* XXX: permit generic clobber register list ? */
for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
@@ -1780,12 +1932,11 @@ static void tcg_reg_alloc_op(TCGContext *s,
tcg_reg_free(s, reg);
}
}
- /* XXX: for load/store we could do that only for the slow path
- (i.e. when a memory callback is called) */
-
- /* store globals and free associated registers (we assume the insn
- can modify any global. */
- save_globals(s, allocated_regs);
+ }
+ if (def->flags & TCG_OPF_SIDE_EFFECTS) {
+ /* sync globals if the op has side effects and might trigger
+ an exception. */
+ sync_globals(s, allocated_regs);
}
/* satisfy the output constraints */
@@ -1809,18 +1960,15 @@ static void tcg_reg_alloc_op(TCGContext *s,
tcg_regset_set_reg(allocated_regs, reg);
/* if a fixed register is used, then a move will be done afterwards */
if (!ts->fixed_reg) {
- if (ts->val_type == TEMP_VAL_REG)
+ if (ts->val_type == TEMP_VAL_REG) {
s->reg_to_temp[ts->reg] = -1;
- if (IS_DEAD_ARG(i)) {
- ts->val_type = TEMP_VAL_DEAD;
- } else {
- ts->val_type = TEMP_VAL_REG;
- ts->reg = reg;
- /* temp value is modified, so the value kept in memory is
- potentially not the same */
- ts->mem_coherent = 0;
- s->reg_to_temp[reg] = arg;
- }
+ }
+ ts->val_type = TEMP_VAL_REG;
+ ts->reg = reg;
+ /* temp value is modified, so the value kept in memory is
+ potentially not the same */
+ ts->mem_coherent = 0;
+ s->reg_to_temp[reg] = arg;
}
oarg_end:
new_args[i] = reg;
@@ -1837,6 +1985,12 @@ static void tcg_reg_alloc_op(TCGContext *s,
if (ts->fixed_reg && ts->reg != reg) {
tcg_out_mov(s, ts->type, ts->reg, reg);
}
+ if (NEED_SYNC_ARG(i)) {
+ tcg_reg_sync(s, reg);
+ }
+ if (IS_DEAD_ARG(i)) {
+ temp_dead(s, args[i]);
+ }
}
}
@@ -1848,7 +2002,7 @@ static void tcg_reg_alloc_op(TCGContext *s,
static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
TCGOpcode opc, const TCGArg *args,
- unsigned int dead_args)
+ uint16_t dead_args, uint8_t sync_args)
{
int nb_iargs, nb_oargs, flags, nb_regs, i, reg, nb_params;
TCGArg arg, func_arg;
@@ -1866,7 +2020,7 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
flags = args[nb_oargs + nb_iargs];
- nb_regs = tcg_target_get_call_iarg_regs_count(flags);
+ nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
if (nb_regs > nb_params)
nb_regs = nb_params;
@@ -1972,14 +2126,8 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
/* mark dead temporaries and free the associated registers */
for(i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
- arg = args[i];
if (IS_DEAD_ARG(i)) {
- ts = &s->temps[arg];
- if (!ts->fixed_reg) {
- if (ts->val_type == TEMP_VAL_REG)
- s->reg_to_temp[ts->reg] = -1;
- ts->val_type = TEMP_VAL_DEAD;
- }
+ temp_dead(s, args[i]);
}
}
@@ -1989,10 +2137,14 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
tcg_reg_free(s, reg);
}
}
-
- /* store globals and free associated registers (we assume the call
- can modify any global. */
- if (!(flags & TCG_CALL_CONST)) {
+
+ /* Save globals if they might be written by the helper, sync them if
+ they might be read. */
+ if (flags & TCG_CALL_NO_READ_GLOBALS) {
+ /* Nothing to do */
+ } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
+ sync_globals(s, allocated_regs);
+ } else {
save_globals(s, allocated_regs);
}
@@ -2009,15 +2161,18 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
tcg_out_mov(s, ts->type, ts->reg, reg);
}
} else {
- if (ts->val_type == TEMP_VAL_REG)
+ if (ts->val_type == TEMP_VAL_REG) {
s->reg_to_temp[ts->reg] = -1;
+ }
+ ts->val_type = TEMP_VAL_REG;
+ ts->reg = reg;
+ ts->mem_coherent = 0;
+ s->reg_to_temp[reg] = arg;
+ if (NEED_SYNC_ARG(i)) {
+ tcg_reg_sync(s, reg);
+ }
if (IS_DEAD_ARG(i)) {
- ts->val_type = TEMP_VAL_DEAD;
- } else {
- ts->val_type = TEMP_VAL_REG;
- ts->reg = reg;
- ts->mem_coherent = 0;
- s->reg_to_temp[reg] = arg;
+ temp_dead(s, args[i]);
}
}
}
@@ -2048,7 +2203,6 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf,
TCGOpcode opc;
int op_index;
const TCGOpDef *def;
- unsigned int dead_args;
const TCGArg *args;
#ifdef DEBUG_DISAS
@@ -2059,22 +2213,29 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf,
}
#endif
+#ifdef CONFIG_PROFILER
+ s->opt_time -= profile_getclock();
+#endif
+
#ifdef USE_TCG_OPTIMIZATIONS
- gen_opparam_ptr =
- tcg_optimize(s, gen_opc_ptr, gen_opparam_buf, tcg_op_defs);
+ s->gen_opparam_ptr =
+ tcg_optimize(s, s->gen_opc_ptr, s->gen_opparam_buf, tcg_op_defs);
#endif
#ifdef CONFIG_PROFILER
+ s->opt_time += profile_getclock();
s->la_time -= profile_getclock();
#endif
+
tcg_liveness_analysis(s);
+
#ifdef CONFIG_PROFILER
s->la_time += profile_getclock();
#endif
#ifdef DEBUG_DISAS
if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT))) {
- qemu_log("OP after liveness analysis:\n");
+ qemu_log("OP after optimization and liveness analysis:\n");
tcg_dump_ops(s);
qemu_log("\n");
}
@@ -2085,11 +2246,11 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf,
s->code_buf = gen_code_buf;
s->code_ptr = gen_code_buf;
- args = gen_opparam_buf;
+ args = s->gen_opparam_buf;
op_index = 0;
for(;;) {
- opc = gen_opc_buf[op_index];
+ opc = s->gen_opc_buf[op_index];
#ifdef CONFIG_PROFILER
tcg_table_op_count[opc]++;
#endif
@@ -2101,17 +2262,14 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf,
#endif
switch(opc) {
case INDEX_op_mov_i32:
-#if TCG_TARGET_REG_BITS == 64
case INDEX_op_mov_i64:
-#endif
- dead_args = s->op_dead_args[op_index];
- tcg_reg_alloc_mov(s, def, args, dead_args);
+ tcg_reg_alloc_mov(s, def, args, s->op_dead_args[op_index],
+ s->op_sync_args[op_index]);
break;
case INDEX_op_movi_i32:
-#if TCG_TARGET_REG_BITS == 64
case INDEX_op_movi_i64:
-#endif
- tcg_reg_alloc_movi(s, args);
+ tcg_reg_alloc_movi(s, args, s->op_dead_args[op_index],
+ s->op_sync_args[op_index]);
break;
case INDEX_op_debug_insn_start:
/* debug instruction */
@@ -2125,24 +2283,16 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf,
args += args[0];
goto next;
case INDEX_op_discard:
- {
- TCGTemp *ts;
- ts = &s->temps[args[0]];
- /* mark the temporary as dead */
- if (!ts->fixed_reg) {
- if (ts->val_type == TEMP_VAL_REG)
- s->reg_to_temp[ts->reg] = -1;
- ts->val_type = TEMP_VAL_DEAD;
- }
- }
+ temp_dead(s, args[0]);
break;
case INDEX_op_set_label:
tcg_reg_alloc_bb_end(s, s->reserved_regs);
tcg_out_label(s, args[0], s->code_ptr);
break;
case INDEX_op_call:
- dead_args = s->op_dead_args[op_index];
- args += tcg_reg_alloc_call(s, def, opc, args, dead_args);
+ args += tcg_reg_alloc_call(s, def, opc, args,
+ s->op_dead_args[op_index],
+ s->op_sync_args[op_index]);
goto next;
case INDEX_op_end:
goto the_end;
@@ -2154,8 +2304,8 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf,
/* Note: in order to speed up the code, it would be much
faster to have specialized register allocator functions for
some common argument patterns */
- dead_args = s->op_dead_args[op_index];
- tcg_reg_alloc_op(s, def, opc, args, dead_args);
+ tcg_reg_alloc_op(s, def, opc, args, s->op_dead_args[op_index],
+ s->op_sync_args[op_index]);
break;
}
args += def->nb_args;
@@ -2169,6 +2319,10 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf,
#endif
}
the_end:
+#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
+ /* Generate TB finalization at the end of block */
+ tcg_out_tb_finalize(s);
+#endif
return -1;
}
@@ -2177,7 +2331,7 @@ int tcg_gen_code(TCGContext *s, uint8_t *gen_code_buf)
#ifdef CONFIG_PROFILER
{
int n;
- n = (gen_opc_ptr - gen_opc_buf);
+ n = (s->gen_opc_ptr - s->gen_opc_buf);
s->op_count += n;
if (n > s->op_count_max)
s->op_count_max = n;
@@ -2241,6 +2395,9 @@ void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
(double)s->interm_time / tot * 100.0);
cpu_fprintf(f, " gen_code time %0.1f%%\n",
(double)s->code_time / tot * 100.0);
+ cpu_fprintf(f, "optim./code time %0.1f%%\n",
+ (double)s->opt_time / (s->code_time ? s->code_time : 1)
+ * 100.0);
cpu_fprintf(f, "liveness/code time %0.1f%%\n",
(double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
cpu_fprintf(f, "cpu_restore count %" PRId64 "\n",
diff --git a/tcg/tcg.h b/tcg/tcg.h
index d710694..9481e35 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -79,6 +79,7 @@ typedef uint64_t TCGRegSet;
#define TCG_TARGET_HAS_nand_i64 0
#define TCG_TARGET_HAS_nor_i64 0
#define TCG_TARGET_HAS_deposit_i64 0
+#define TCG_TARGET_HAS_movcond_i64 0
#endif
#ifndef TCG_TARGET_deposit_i32_valid
@@ -187,6 +188,24 @@ typedef tcg_target_ulong TCGArg;
are aliases for target_ulong and host pointer sized values respectively.
*/
+#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
+/* Macros/structures for qemu_ld/st IR code optimization:
+ TCG_MAX_HELPER_LABELS is defined as same as OPC_BUF_SIZE in exec-all.h. */
+#define TCG_MAX_QEMU_LDST 640
+
+typedef struct TCGLabelQemuLdst {
+ int is_ld:1; /* qemu_ld: 1, qemu_st: 0 */
+ int opc:4;
+ int addrlo_reg; /* reg index for low word of guest virtual addr */
+ int addrhi_reg; /* reg index for high word of guest virtual addr */
+ int datalo_reg; /* reg index for low word to be loaded or stored */
+ int datahi_reg; /* reg index for high word to be loaded or stored */
+ int mem_index; /* soft MMU memory index */
+ uint8_t *raddr; /* gen code addr of the next IR of qemu_ld/st IR */
+ uint8_t *label_ptr[2]; /* label pointers to be updated */
+} TCGLabelQemuLdst;
+#endif
+
#ifdef CONFIG_DEBUG_TCG
#define DEBUG_TCGV 1
#endif
@@ -252,31 +271,47 @@ typedef int TCGv_i64;
#define TCGV_UNUSED_I64(x) x = MAKE_TCGV_I64(-1)
/* call flags */
-/* A pure function only reads its arguments and TCG global variables
- and cannot raise exceptions. Hence a call to a pure function can be
- safely suppressed if the return value is not used. */
-#define TCG_CALL_PURE 0x0010
-/* A const function only reads its arguments and does not use TCG
- global variables. Hence a call to such a function does not
- save TCG global variables back to their canonical location. */
-#define TCG_CALL_CONST 0x0020
+/* Helper does not read globals (either directly or through an exception). It
+ implies TCG_CALL_NO_WRITE_GLOBALS. */
+#define TCG_CALL_NO_READ_GLOBALS 0x0010
+/* Helper does not write globals */
+#define TCG_CALL_NO_WRITE_GLOBALS 0x0020
+/* Helper can be safely suppressed if the return value is not used. */
+#define TCG_CALL_NO_SIDE_EFFECTS 0x0040
+
+/* convenience version of most used call flags */
+#define TCG_CALL_NO_RWG TCG_CALL_NO_READ_GLOBALS
+#define TCG_CALL_NO_WG TCG_CALL_NO_WRITE_GLOBALS
+#define TCG_CALL_NO_SE TCG_CALL_NO_SIDE_EFFECTS
+#define TCG_CALL_NO_RWG_SE (TCG_CALL_NO_RWG | TCG_CALL_NO_SE)
+#define TCG_CALL_NO_WG_SE (TCG_CALL_NO_WG | TCG_CALL_NO_SE)
/* used to align parameters */
#define TCG_CALL_DUMMY_TCGV MAKE_TCGV_I32(-1)
#define TCG_CALL_DUMMY_ARG ((TCGArg)(-1))
+/* Conditions. Note that these are layed out for easy manipulation by
+ the the functions below:
+ bit 0 is used for inverting;
+ bit 1 is signed,
+ bit 2 is unsigned,
+ bit 3 is used with bit 0 for swapping signed/unsigned. */
typedef enum {
- TCG_COND_EQ,
- TCG_COND_NE,
- TCG_COND_LT,
- TCG_COND_GE,
- TCG_COND_LE,
- TCG_COND_GT,
+ /* non-signed */
+ TCG_COND_NEVER = 0 | 0 | 0 | 0,
+ TCG_COND_ALWAYS = 0 | 0 | 0 | 1,
+ TCG_COND_EQ = 8 | 0 | 0 | 0,
+ TCG_COND_NE = 8 | 0 | 0 | 1,
+ /* signed */
+ TCG_COND_LT = 0 | 0 | 2 | 0,
+ TCG_COND_GE = 0 | 0 | 2 | 1,
+ TCG_COND_LE = 8 | 0 | 2 | 0,
+ TCG_COND_GT = 8 | 0 | 2 | 1,
/* unsigned */
- TCG_COND_LTU,
- TCG_COND_GEU,
- TCG_COND_LEU,
- TCG_COND_GTU,
+ TCG_COND_LTU = 0 | 4 | 0 | 0,
+ TCG_COND_GEU = 0 | 4 | 0 | 1,
+ TCG_COND_LEU = 8 | 4 | 0 | 0,
+ TCG_COND_GTU = 8 | 4 | 0 | 1,
} TCGCond;
/* Invert the sense of the comparison. */
@@ -288,13 +323,34 @@ static inline TCGCond tcg_invert_cond(TCGCond c)
/* Swap the operands in a comparison. */
static inline TCGCond tcg_swap_cond(TCGCond c)
{
- int mask = (c < TCG_COND_LT ? 0 : c < TCG_COND_LTU ? 7 : 15);
- return (TCGCond)(c ^ mask);
+ return c & 6 ? (TCGCond)(c ^ 9) : c;
}
+/* Create an "unsigned" version of a "signed" comparison. */
static inline TCGCond tcg_unsigned_cond(TCGCond c)
{
- return (c >= TCG_COND_LT && c <= TCG_COND_GT ? c + 4 : c);
+ return c & 2 ? (TCGCond)(c ^ 6) : c;
+}
+
+/* Must a comparison be considered unsigned? */
+static inline bool is_unsigned_cond(TCGCond c)
+{
+ return (c & 4) != 0;
+}
+
+/* Create a "high" version of a double-word comparison.
+ This removes equality from a LTE or GTE comparison. */
+static inline TCGCond tcg_high_cond(TCGCond c)
+{
+ switch (c) {
+ case TCG_COND_GE:
+ case TCG_COND_LE:
+ case TCG_COND_GEU:
+ case TCG_COND_LEU:
+ return (TCGCond)(c ^ 8);
+ default:
+ return c;
+ }
}
#define TEMP_VAL_DEAD 0
@@ -335,7 +391,6 @@ struct TCGContext {
TCGPool *pool_first, *pool_current, *pool_first_large;
TCGLabel *labels;
int nb_labels;
- TCGTemp *temps; /* globals first, temps after */
int nb_globals;
int nb_temps;
/* index of free temps, -1 if none */
@@ -343,13 +398,16 @@ struct TCGContext {
/* goto_tb support */
uint8_t *code_buf;
- unsigned long *tb_next;
+ uintptr_t *tb_next;
uint16_t *tb_next_offset;
uint16_t *tb_jmp_offset; /* != NULL if USE_DIRECT_JUMP */
/* liveness analysis */
uint16_t *op_dead_args; /* for each operation, each bit tells if the
corresponding argument is dead */
+ uint8_t *op_sync_args; /* for each operation, each bit tells if the
+ corresponding output argument needs to be
+ sync to memory. */
/* tells in which temporary a given register is. It does not take
into account fixed registers */
@@ -361,7 +419,7 @@ struct TCGContext {
int frame_reg;
uint8_t *code_ptr;
- TCGTemp static_temps[TCG_MAX_TEMPS];
+ TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */
TCGHelperInfo *helpers;
int nb_helpers;
@@ -382,20 +440,31 @@ struct TCGContext {
int64_t interm_time;
int64_t code_time;
int64_t la_time;
+ int64_t opt_time;
int64_t restore_count;
int64_t restore_time;
#endif
#ifdef CONFIG_DEBUG_TCG
int temps_in_use;
+ int goto_tb_issue_mask;
+#endif
+
+ uint16_t gen_opc_buf[OPC_BUF_SIZE];
+ TCGArg gen_opparam_buf[OPPARAM_BUF_SIZE];
+
+ uint16_t *gen_opc_ptr;
+ TCGArg *gen_opparam_ptr;
+
+#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
+ /* labels info for qemu_ld/st IRs
+ The labels help to generate TLB miss case codes at the end of TB */
+ TCGLabelQemuLdst *qemu_ldst_labels;
+ int nb_qemu_ldst_labels;
#endif
};
extern TCGContext tcg_ctx;
-extern uint16_t *gen_opc_ptr;
-extern TCGArg *gen_opparam_ptr;
-extern uint16_t gen_opc_buf[];
-extern TCGArg gen_opparam_buf[];
/* pool based memory allocation */
@@ -458,11 +527,6 @@ static inline TCGv_i64 tcg_temp_local_new_i64(void)
void tcg_temp_free_i64(TCGv_i64 arg);
char *tcg_get_arg_str_i64(TCGContext *s, char *buf, int buf_size, TCGv_i64 arg);
-static inline bool tcg_arg_is_local(TCGContext *s, TCGArg arg)
-{
- return s->temps[arg].temp_local;
-}
-
#if defined(CONFIG_DEBUG_TCG)
/* If you call tcg_clear_temp_count() at the start of a section of
* code which is not supposed to leak any TCG temporaries, then
@@ -499,8 +563,8 @@ enum {
TCG_OPF_BB_END = 0x01,
/* Instruction clobbers call registers and potentially update globals. */
TCG_OPF_CALL_CLOBBER = 0x02,
- /* Instruction has side effects: it cannot be removed
- if its outputs are not used. */
+ /* Instruction has side effects: it cannot be removed if its outputs
+ are not used, and might trigger exceptions. */
TCG_OPF_SIDE_EFFECTS = 0x04,
/* Instruction operands are 64-bits (otherwise 32-bits). */
TCG_OPF_64BIT = 0x08,
@@ -533,6 +597,15 @@ do {\
abort();\
} while (0)
+#ifdef CONFIG_DEBUG_TCG
+# define tcg_debug_assert(X) do { assert(X); } while (0)
+#elif QEMU_GNUC_PREREQ(4, 5)
+# define tcg_debug_assert(X) \
+ do { if (!(X)) { __builtin_unreachable(); } } while (0)
+#else
+# define tcg_debug_assert(X) do { (void)(X); } while (0)
+#endif
+
void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs);
#if TCG_TARGET_REG_BITS == 32
@@ -579,7 +652,7 @@ TCGv_i64 tcg_const_i64(int64_t val);
TCGv_i32 tcg_const_local_i32(int32_t val);
TCGv_i64 tcg_const_local_i64(int64_t val);
-extern uint8_t code_gen_prologue[];
+extern uint8_t *code_gen_prologue;
/* TCG targets may use a different definition of tcg_qemu_tb_exec. */
#if !defined(tcg_qemu_tb_exec)
@@ -588,3 +661,8 @@ extern uint8_t code_gen_prologue[];
#endif
void tcg_register_jit(void *buf, size_t buf_size);
+
+#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
+/* Generate TB finalization at the end of block */
+void tcg_out_tb_finalize(TCGContext *s);
+#endif
diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c
index ef8580f..1707169 100644
--- a/tcg/tci/tcg-target.c
+++ b/tcg/tci/tcg-target.c
@@ -69,7 +69,6 @@ static const TCGTargetOpDef tcg_target_op_defs[] = {
{ INDEX_op_exit_tb, { NULL } },
{ INDEX_op_goto_tb, { NULL } },
{ INDEX_op_call, { RI } },
- { INDEX_op_jmp, { RI } },
{ INDEX_op_br, { NULL } },
{ INDEX_op_mov_i32, { R, R } },
@@ -123,6 +122,9 @@ static const TCGTargetOpDef tcg_target_op_defs[] = {
{ INDEX_op_rotl_i32, { R, RI, RI } },
{ INDEX_op_rotr_i32, { R, RI, RI } },
#endif
+#if TCG_TARGET_HAS_deposit_i32
+ { INDEX_op_deposit_i32, { R, "0", R } },
+#endif
{ INDEX_op_brcond_i32, { R, RI } },
@@ -201,6 +203,9 @@ static const TCGTargetOpDef tcg_target_op_defs[] = {
{ INDEX_op_rotl_i64, { R, RI, RI } },
{ INDEX_op_rotr_i64, { R, RI, RI } },
#endif
+#if TCG_TARGET_HAS_deposit_i64
+ { INDEX_op_deposit_i64, { R, "0", R } },
+#endif
{ INDEX_op_brcond_i64, { R, RI } },
#if TCG_TARGET_HAS_ext8s_i64
@@ -300,7 +305,7 @@ static const int tcg_target_reg_alloc_order[] = {
#endif
};
-#if MAX_OPC_PARAM_IARGS != 4
+#if MAX_OPC_PARAM_IARGS != 5
# error Fix needed, number of supported input arguments changed!
#endif
@@ -309,16 +314,18 @@ static const int tcg_target_call_iarg_regs[] = {
TCG_REG_R1,
TCG_REG_R2,
TCG_REG_R3,
-#if TCG_TARGET_REG_BITS == 32
- /* 32 bit hosts need 2 * MAX_OPC_PARAM_IARGS registers. */
#if 0 /* used for TCG_REG_CALL_STACK */
TCG_REG_R4,
#endif
TCG_REG_R5,
+#if TCG_TARGET_REG_BITS == 32
+ /* 32 bit hosts need 2 * MAX_OPC_PARAM_IARGS registers. */
TCG_REG_R6,
TCG_REG_R7,
#if TCG_TARGET_NB_REGS >= 16
TCG_REG_R8,
+ TCG_REG_R9,
+ TCG_REG_R10,
#else
# error Too few input registers available
#endif
@@ -581,9 +588,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
case INDEX_op_call:
tcg_out_ri(s, const_args[0], args[0]);
break;
- case INDEX_op_jmp:
- TODO();
- break;
case INDEX_op_setcond_i32:
tcg_out_r(s, args[0]);
tcg_out_r(s, args[1]);
@@ -655,6 +659,15 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
tcg_out_ri32(s, const_args[1], args[1]);
tcg_out_ri32(s, const_args[2], args[2]);
break;
+ case INDEX_op_deposit_i32: /* Optional (TCG_TARGET_HAS_deposit_i32). */
+ tcg_out_r(s, args[0]);
+ tcg_out_r(s, args[1]);
+ tcg_out_r(s, args[2]);
+ assert(args[3] <= UINT8_MAX);
+ tcg_out8(s, args[3]);
+ assert(args[4] <= UINT8_MAX);
+ tcg_out8(s, args[4]);
+ break;
#if TCG_TARGET_REG_BITS == 64
case INDEX_op_mov_i64:
@@ -682,6 +695,15 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
tcg_out_ri64(s, const_args[1], args[1]);
tcg_out_ri64(s, const_args[2], args[2]);
break;
+ case INDEX_op_deposit_i64: /* Optional (TCG_TARGET_HAS_deposit_i64). */
+ tcg_out_r(s, args[0]);
+ tcg_out_r(s, args[1]);
+ tcg_out_r(s, args[2]);
+ assert(args[3] <= UINT8_MAX);
+ tcg_out8(s, args[3]);
+ assert(args[4] <= UINT8_MAX);
+ tcg_out8(s, args[4]);
+ break;
case INDEX_op_div_i64: /* Optional (TCG_TARGET_HAS_div_i64). */
case INDEX_op_divu_i64: /* Optional (TCG_TARGET_HAS_div_i64). */
case INDEX_op_rem_i64: /* Optional (TCG_TARGET_HAS_div_i64). */
@@ -798,9 +820,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
case INDEX_op_qemu_st8:
case INDEX_op_qemu_st16:
case INDEX_op_qemu_st32:
-#ifdef CONFIG_TCG_PASS_AREG0
- tcg_out_r(s, TCG_AREG0);
-#endif
tcg_out_r(s, *args++);
tcg_out_r(s, *args++);
#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
@@ -811,9 +830,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
#endif
break;
case INDEX_op_qemu_st64:
-#ifdef CONFIG_TCG_PASS_AREG0
- tcg_out_r(s, TCG_AREG0);
-#endif
tcg_out_r(s, *args++);
#if TCG_TARGET_REG_BITS == 32
tcg_out_r(s, *args++);
@@ -867,12 +883,6 @@ static int tcg_target_const_match(tcg_target_long val,
return arg_ct->ct & TCG_CT_CONST;
}
-/* Maximum number of register used for input function arguments. */
-static int tcg_target_get_call_iarg_regs_count(int flags)
-{
- return ARRAY_SIZE(tcg_target_call_iarg_regs);
-}
-
static void tcg_target_init(TCGContext *s)
{
#if defined(CONFIG_DEBUG_TCG_INTERPRETER)
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index 30a0f21..a832f5c 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -67,7 +67,7 @@
#define TCG_TARGET_HAS_ext8u_i32 1
#define TCG_TARGET_HAS_ext16u_i32 1
#define TCG_TARGET_HAS_andc_i32 0
-#define TCG_TARGET_HAS_deposit_i32 0
+#define TCG_TARGET_HAS_deposit_i32 1
#define TCG_TARGET_HAS_eqv_i32 0
#define TCG_TARGET_HAS_nand_i32 0
#define TCG_TARGET_HAS_nor_i32 0
@@ -75,12 +75,13 @@
#define TCG_TARGET_HAS_not_i32 1
#define TCG_TARGET_HAS_orc_i32 0
#define TCG_TARGET_HAS_rot_i32 1
+#define TCG_TARGET_HAS_movcond_i32 0
#if TCG_TARGET_REG_BITS == 64
#define TCG_TARGET_HAS_bswap16_i64 1
#define TCG_TARGET_HAS_bswap32_i64 1
#define TCG_TARGET_HAS_bswap64_i64 1
-#define TCG_TARGET_HAS_deposit_i64 0
+#define TCG_TARGET_HAS_deposit_i64 1
/* Not more than one of the next two defines must be 1. */
#define TCG_TARGET_HAS_div_i64 0
#define TCG_TARGET_HAS_div2_i64 0
@@ -98,11 +99,9 @@
#define TCG_TARGET_HAS_not_i64 1
#define TCG_TARGET_HAS_orc_i64 0
#define TCG_TARGET_HAS_rot_i64 1
+#define TCG_TARGET_HAS_movcond_i64 0
#endif /* TCG_TARGET_REG_BITS == 64 */
-/* Offset to user memory in user mode. */
-#define TCG_TARGET_HAS_GUEST_BASE
-
/* Number of registers available.
For 32 bit hosts, we need more than 8 registers (call arguments). */
/* #define TCG_TARGET_NB_REGS 8 */