diff options
Diffstat (limited to 'target')
248 files changed, 15238 insertions, 14596 deletions
diff --git a/target/Kconfig b/target/Kconfig index 5275a93ad0..7f64112e9e 100644 --- a/target/Kconfig +++ b/target/Kconfig @@ -17,3 +17,6 @@ source sh4/Kconfig source sparc/Kconfig source tricore/Kconfig source xtensa/Kconfig + +config TARGET_BIG_ENDIAN + bool diff --git a/target/alpha/cpu-param.h b/target/alpha/cpu-param.h index c969cb016b..5ce213a9a1 100644 --- a/target/alpha/cpu-param.h +++ b/target/alpha/cpu-param.h @@ -27,4 +27,7 @@ # define TARGET_VIRT_ADDR_SPACE_BITS (30 + TARGET_PAGE_BITS) #endif +/* Alpha processors have a weak memory model */ +#define TCG_GUEST_DEFAULT_MO (0) + #endif diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c index 05f9ee41e9..0e2fbcb397 100644 --- a/target/alpha/cpu.c +++ b/target/alpha/cpu.c @@ -28,25 +28,37 @@ static void alpha_cpu_set_pc(CPUState *cs, vaddr value) { - AlphaCPU *cpu = ALPHA_CPU(cs); - - cpu->env.pc = value; + CPUAlphaState *env = cpu_env(cs); + env->pc = value; } static vaddr alpha_cpu_get_pc(CPUState *cs) { - AlphaCPU *cpu = ALPHA_CPU(cs); + CPUAlphaState *env = cpu_env(cs); + return env->pc; +} - return cpu->env.pc; +static void alpha_cpu_synchronize_from_tb(CPUState *cs, + const TranslationBlock *tb) +{ + /* The program counter is always up to date with CF_PCREL. */ + if (!(tb_cflags(tb) & CF_PCREL)) { + CPUAlphaState *env = cpu_env(cs); + env->pc = tb->pc; + } } static void alpha_restore_state_to_opc(CPUState *cs, const TranslationBlock *tb, const uint64_t *data) { - AlphaCPU *cpu = ALPHA_CPU(cs); + CPUAlphaState *env = cpu_env(cs); - cpu->env.pc = data[0]; + if (tb_cflags(tb) & CF_PCREL) { + env->pc = (env->pc & TARGET_PAGE_MASK) | data[0]; + } else { + env->pc = data[0]; + } } static bool alpha_cpu_has_work(CPUState *cs) @@ -81,6 +93,11 @@ static void alpha_cpu_realizefn(DeviceState *dev, Error **errp) AlphaCPUClass *acc = ALPHA_CPU_GET_CLASS(dev); Error *local_err = NULL; +#ifndef CONFIG_USER_ONLY + /* Use pc-relative instructions in system-mode */ + cs->tcg_cflags |= CF_PCREL; +#endif + cpu_exec_realizefn(cs, &local_err); if (local_err != NULL) { error_propagate(errp, local_err); @@ -193,6 +210,7 @@ static const struct SysemuCPUOps alpha_sysemu_ops = { static const TCGCPUOps alpha_tcg_ops = { .initialize = alpha_translate_init, + .synchronize_from_tb = alpha_cpu_synchronize_from_tb, .restore_state_to_opc = alpha_restore_state_to_opc, #ifdef CONFIG_USER_ONLY diff --git a/target/alpha/cpu.h b/target/alpha/cpu.h index 7188a409a0..f9e2ecb90a 100644 --- a/target/alpha/cpu.h +++ b/target/alpha/cpu.h @@ -24,9 +24,6 @@ #include "exec/cpu-defs.h" #include "qemu/cpu-float.h" -/* Alpha processors have a weak memory model */ -#define TCG_GUEST_DEFAULT_MO (0) - #define ICACHE_LINE_SIZE 32 #define DCACHE_LINE_SIZE 32 diff --git a/target/alpha/helper.c b/target/alpha/helper.c index d6d4353edd..2f1000c99f 100644 --- a/target/alpha/helper.c +++ b/target/alpha/helper.c @@ -21,6 +21,7 @@ #include "qemu/log.h" #include "cpu.h" #include "exec/exec-all.h" +#include "exec/page-protection.h" #include "fpu/softfloat-types.h" #include "exec/helper-proto.h" #include "qemu/qemu-print.h" @@ -124,7 +125,7 @@ void alpha_cpu_record_sigsegv(CPUState *cs, vaddr address, MMUAccessType access_type, bool maperr, uintptr_t retaddr) { - AlphaCPU *cpu = ALPHA_CPU(cs); + CPUAlphaState *env = cpu_env(cs); target_ulong mmcsr, cause; /* Assuming !maperr, infer the missing protection. */ @@ -155,9 +156,9 @@ void alpha_cpu_record_sigsegv(CPUState *cs, vaddr address, } /* Record the arguments that PALcode would give to the kernel. */ - cpu->env.trap_arg0 = address; - cpu->env.trap_arg1 = mmcsr; - cpu->env.trap_arg2 = cause; + env->trap_arg0 = address; + env->trap_arg1 = mmcsr; + env->trap_arg2 = cause; } #else /* Returns the OSF/1 entMM failure indication, or -1 on success. */ diff --git a/target/alpha/translate.c b/target/alpha/translate.c index a97cd54f0c..fb6cac4b53 100644 --- a/target/alpha/translate.c +++ b/target/alpha/translate.c @@ -20,7 +20,6 @@ #include "qemu/osdep.h" #include "cpu.h" #include "sysemu/cpus.h" -#include "disas/disas.h" #include "qemu/host-utils.h" #include "exec/exec-all.h" #include "tcg/tcg-op.h" @@ -54,6 +53,9 @@ struct DisasContext { uint32_t tbflags; int mem_idx; + /* True if generating pc-relative code. */ + bool pcrel; + /* implver and amask values for this CPU. */ int implver; int amask; @@ -252,6 +254,16 @@ static void st_flag_byte(TCGv val, unsigned shift) tcg_gen_st8_i64(val, tcg_env, get_flag_ofs(shift)); } +static void gen_pc_disp(DisasContext *ctx, TCGv dest, int32_t disp) +{ + uint64_t addr = ctx->base.pc_next + disp; + if (ctx->pcrel) { + tcg_gen_addi_i64(dest, cpu_pc, addr - ctx->base.pc_first); + } else { + tcg_gen_movi_i64(dest, addr); + } +} + static void gen_excp_1(int exception, int error_code) { TCGv_i32 tmp1, tmp2; @@ -263,7 +275,7 @@ static void gen_excp_1(int exception, int error_code) static DisasJumpType gen_excp(DisasContext *ctx, int exception, int error_code) { - tcg_gen_movi_i64(cpu_pc, ctx->base.pc_next); + gen_pc_disp(ctx, cpu_pc, 0); gen_excp_1(exception, error_code); return DISAS_NORETURN; } @@ -425,60 +437,49 @@ static DisasJumpType gen_store_conditional(DisasContext *ctx, int ra, int rb, return DISAS_NEXT; } -static bool use_goto_tb(DisasContext *ctx, uint64_t dest) +static void gen_goto_tb(DisasContext *ctx, int idx, int32_t disp) { - return translator_use_goto_tb(&ctx->base, dest); + if (translator_use_goto_tb(&ctx->base, ctx->base.pc_next + disp)) { + /* With PCREL, PC must always be up-to-date. */ + if (ctx->pcrel) { + gen_pc_disp(ctx, cpu_pc, disp); + tcg_gen_goto_tb(idx); + } else { + tcg_gen_goto_tb(idx); + gen_pc_disp(ctx, cpu_pc, disp); + } + tcg_gen_exit_tb(ctx->base.tb, idx); + } else { + gen_pc_disp(ctx, cpu_pc, disp); + tcg_gen_lookup_and_goto_ptr(); + } } static DisasJumpType gen_bdirect(DisasContext *ctx, int ra, int32_t disp) { - uint64_t dest = ctx->base.pc_next + (disp << 2); - if (ra != 31) { - tcg_gen_movi_i64(ctx->ir[ra], ctx->base.pc_next); + gen_pc_disp(ctx, ctx->ir[ra], 0); } /* Notice branch-to-next; used to initialize RA with the PC. */ if (disp == 0) { - return 0; - } else if (use_goto_tb(ctx, dest)) { - tcg_gen_goto_tb(0); - tcg_gen_movi_i64(cpu_pc, dest); - tcg_gen_exit_tb(ctx->base.tb, 0); - return DISAS_NORETURN; - } else { - tcg_gen_movi_i64(cpu_pc, dest); - return DISAS_PC_UPDATED; + return DISAS_NEXT; } + gen_goto_tb(ctx, 0, disp); + return DISAS_NORETURN; } static DisasJumpType gen_bcond_internal(DisasContext *ctx, TCGCond cond, TCGv cmp, uint64_t imm, int32_t disp) { - uint64_t dest = ctx->base.pc_next + (disp << 2); TCGLabel *lab_true = gen_new_label(); - if (use_goto_tb(ctx, dest)) { - tcg_gen_brcondi_i64(cond, cmp, imm, lab_true); - - tcg_gen_goto_tb(0); - tcg_gen_movi_i64(cpu_pc, ctx->base.pc_next); - tcg_gen_exit_tb(ctx->base.tb, 0); + tcg_gen_brcondi_i64(cond, cmp, imm, lab_true); + gen_goto_tb(ctx, 0, 0); + gen_set_label(lab_true); + gen_goto_tb(ctx, 1, disp); - gen_set_label(lab_true); - tcg_gen_goto_tb(1); - tcg_gen_movi_i64(cpu_pc, dest); - tcg_gen_exit_tb(ctx->base.tb, 1); - - return DISAS_NORETURN; - } else { - TCGv_i64 i = tcg_constant_i64(imm); - TCGv_i64 d = tcg_constant_i64(dest); - TCGv_i64 p = tcg_constant_i64(ctx->base.pc_next); - - tcg_gen_movcond_i64(cond, cpu_pc, cmp, i, d, p); - return DISAS_PC_UPDATED; - } + return DISAS_NORETURN; } static DisasJumpType gen_bcond(DisasContext *ctx, TCGCond cond, int ra, @@ -1106,7 +1107,7 @@ static DisasJumpType gen_call_pal(DisasContext *ctx, int palcode) } /* Allow interrupts to be recognized right away. */ - tcg_gen_movi_i64(cpu_pc, ctx->base.pc_next); + gen_pc_disp(ctx, cpu_pc, 0); return DISAS_PC_UPDATED_NOCHAIN; case 0x36: @@ -1153,19 +1154,17 @@ static DisasJumpType gen_call_pal(DisasContext *ctx, int palcode) #else { TCGv tmp = tcg_temp_new(); - uint64_t exc_addr = ctx->base.pc_next; - uint64_t entry = ctx->palbr; + uint64_t entry; + gen_pc_disp(ctx, tmp, 0); if (ctx->tbflags & ENV_FLAG_PAL_MODE) { - exc_addr |= 1; + tcg_gen_ori_i64(tmp, tmp, 1); } else { - tcg_gen_movi_i64(tmp, 1); - st_flag_byte(tmp, ENV_FLAG_PAL_SHIFT); + st_flag_byte(tcg_constant_i64(1), ENV_FLAG_PAL_SHIFT); } - - tcg_gen_movi_i64(tmp, exc_addr); tcg_gen_st_i64(tmp, tcg_env, offsetof(CPUAlphaState, exc_addr)); + entry = ctx->palbr; entry += (palcode & 0x80 ? 0x2000 + (palcode - 0x80) * 64 : 0x1000 + palcode * 64); @@ -1382,7 +1381,7 @@ static DisasJumpType translate_one(DisasContext *ctx, uint32_t insn) real_islit = islit = extract32(insn, 12, 1); lit = extract32(insn, 13, 8); - disp21 = sextract32(insn, 0, 21); + disp21 = sextract32(insn, 0, 21) * 4; disp16 = sextract32(insn, 0, 16); disp12 = sextract32(insn, 0, 12); @@ -2359,9 +2358,13 @@ static DisasJumpType translate_one(DisasContext *ctx, uint32_t insn) /* JMP, JSR, RET, JSR_COROUTINE. These only differ by the branch prediction stack action, which of course we don't implement. */ vb = load_gpr(ctx, rb); - tcg_gen_andi_i64(cpu_pc, vb, ~3); if (ra != 31) { - tcg_gen_movi_i64(ctx->ir[ra], ctx->base.pc_next); + tmp = tcg_temp_new(); + tcg_gen_andi_i64(tmp, vb, ~3); + gen_pc_disp(ctx, ctx->ir[ra], 0); + tcg_gen_mov_i64(cpu_pc, tmp); + } else { + tcg_gen_andi_i64(cpu_pc, vb, ~3); } ret = DISAS_PC_UPDATED; break; @@ -2862,6 +2865,7 @@ static void alpha_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu) ctx->tbflags = ctx->base.tb->flags; ctx->mem_idx = alpha_env_mmu_index(env); + ctx->pcrel = ctx->base.tb->cflags & CF_PCREL; ctx->implver = env->implver; ctx->amask = env->amask; @@ -2897,7 +2901,13 @@ static void alpha_tr_tb_start(DisasContextBase *db, CPUState *cpu) static void alpha_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) { - tcg_gen_insn_start(dcbase->pc_next); + DisasContext *ctx = container_of(dcbase, DisasContext, base); + + if (ctx->pcrel) { + tcg_gen_insn_start(dcbase->pc_next & ~TARGET_PAGE_MASK); + } else { + tcg_gen_insn_start(dcbase->pc_next); + } } static void alpha_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) @@ -2920,14 +2930,10 @@ static void alpha_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) case DISAS_NORETURN: break; case DISAS_TOO_MANY: - if (use_goto_tb(ctx, ctx->base.pc_next)) { - tcg_gen_goto_tb(0); - tcg_gen_movi_i64(cpu_pc, ctx->base.pc_next); - tcg_gen_exit_tb(ctx->base.tb, 0); - } - /* FALLTHRU */ + gen_goto_tb(ctx, 0, 0); + break; case DISAS_PC_STALE: - tcg_gen_movi_i64(cpu_pc, ctx->base.pc_next); + gen_pc_disp(ctx, cpu_pc, 0); /* FALLTHRU */ case DISAS_PC_UPDATED: tcg_gen_lookup_and_goto_ptr(); @@ -2940,20 +2946,12 @@ static void alpha_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) } } -static void alpha_tr_disas_log(const DisasContextBase *dcbase, - CPUState *cpu, FILE *logfile) -{ - fprintf(logfile, "IN: %s\n", lookup_symbol(dcbase->pc_first)); - target_disas(logfile, cpu, dcbase->pc_first, dcbase->tb->size); -} - static const TranslatorOps alpha_tr_ops = { .init_disas_context = alpha_tr_init_disas_context, .tb_start = alpha_tr_tb_start, .insn_start = alpha_tr_insn_start, .translate_insn = alpha_tr_translate_insn, .tb_stop = alpha_tr_tb_stop, - .disas_log = alpha_tr_disas_log, }; void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns, diff --git a/target/arm/Kconfig b/target/arm/Kconfig index bf57d739cd..7f8a2217ae 100644 --- a/target/arm/Kconfig +++ b/target/arm/Kconfig @@ -6,6 +6,10 @@ config ARM # translate.c v7m helpers under ARM_V7M. select ARM_V7M if TCG + select DEVICE_TREE # needed by boot.c + config AARCH64 bool select ARM + # kvm_arch_fixup_msi_route() needs to access PCIDevice + select PCI if KVM diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h index b300d0446d..c59ca104fe 100644 --- a/target/arm/cpu-features.h +++ b/target/arm/cpu-features.h @@ -571,6 +571,11 @@ static inline bool isar_feature_aa64_i8mm(const ARMISARegisters *id) return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, I8MM) != 0; } +static inline bool isar_feature_aa64_wfxt(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, WFXT) >= 2; +} + static inline bool isar_feature_aa64_hbc(const ARMISARegisters *id) { return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, BC) != 0; diff --git a/target/arm/cpu-param.h b/target/arm/cpu-param.h index da3243ab21..2d5f3aa312 100644 --- a/target/arm/cpu-param.h +++ b/target/arm/cpu-param.h @@ -27,14 +27,16 @@ # else # define TARGET_PAGE_BITS 12 # endif -#else +#else /* !CONFIG_USER_ONLY */ /* * ARMv7 and later CPUs have 4K pages minimum, but ARMv5 and v6 * have to support 1K tiny pages. */ # define TARGET_PAGE_BITS_VARY # define TARGET_PAGE_BITS_MIN 10 +#endif /* !CONFIG_USER_ONLY */ -#endif +/* ARM processors have a weak memory model */ +#define TCG_GUEST_DEFAULT_MO (0) #endif diff --git a/target/arm/cpu.c b/target/arm/cpu.c index a152def241..35fa281f1b 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -1132,6 +1132,35 @@ static bool arm_cpu_virtio_is_big_endian(CPUState *cs) return arm_cpu_data_is_big_endian(env); } +#ifdef CONFIG_TCG +static bool arm_cpu_exec_halt(CPUState *cs) +{ + bool leave_halt = cpu_has_work(cs); + + if (leave_halt) { + /* We're about to come out of WFI/WFE: disable the WFxT timer */ + ARMCPU *cpu = ARM_CPU(cs); + if (cpu->wfxt_timer) { + timer_del(cpu->wfxt_timer); + } + } + return leave_halt; +} +#endif + +static void arm_wfxt_timer_cb(void *opaque) +{ + ARMCPU *cpu = opaque; + CPUState *cs = CPU(cpu); + + /* + * We expect the CPU to be halted; this will cause arm_cpu_is_work() + * to return true (so we will come out of halt even with no other + * pending interrupt), and the TCG accelerator's cpu_exec_interrupt() + * function auto-clears the CPU_INTERRUPT_EXITTB flag for us. + */ + cpu_interrupt(cs, CPU_INTERRUPT_EXITTB); +} #endif static void arm_disas_set_info(CPUState *cpu, disassemble_info *info) @@ -1506,9 +1535,12 @@ static void arm_cpu_initfn(Object *obj) } } +/* + * 0 means "unset, use the default value". That default might vary depending + * on the CPU type, and is set in the realize fn. + */ static Property arm_cpu_gt_cntfrq_property = - DEFINE_PROP_UINT64("cntfrq", ARMCPU, gt_cntfrq_hz, - NANOSECONDS_PER_SECOND / GTIMER_SCALE); + DEFINE_PROP_UINT64("cntfrq", ARMCPU, gt_cntfrq_hz, 0); static Property arm_cpu_reset_cbar_property = DEFINE_PROP_UINT64("reset-cbar", ARMCPU, reset_cbar, 0); @@ -1874,6 +1906,9 @@ static void arm_cpu_finalizefn(Object *obj) if (cpu->pmu_timer) { timer_free(cpu->pmu_timer); } + if (cpu->wfxt_timer) { + timer_free(cpu->wfxt_timer); + } #endif } @@ -1938,7 +1973,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) #if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY) /* Use pc-relative instructions in system-mode */ - cs->tcg_cflags |= CF_PCREL; + tcg_cflags_set(cs, CF_PCREL); #endif /* If we needed to query the host kernel for the CPU features @@ -1954,6 +1989,26 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) return; } + if (!cpu->gt_cntfrq_hz) { + /* + * 0 means "the board didn't set a value, use the default". (We also + * get here for the CONFIG_USER_ONLY case.) + * ARMv8.6 and later CPUs architecturally must use a 1GHz timer; before + * that it was an IMPDEF choice, and QEMU initially picked 62.5MHz, + * which gives a 16ns tick period. + * + * We will use the back-compat value: + * - for QEMU CPU types added before we standardized on 1GHz + * - for versioned machine types with a version of 9.0 or earlier + */ + if (arm_feature(env, ARM_FEATURE_BACKCOMPAT_CNTFRQ) || + cpu->backcompat_cntfrq) { + cpu->gt_cntfrq_hz = GTIMER_BACKCOMPAT_HZ; + } else { + cpu->gt_cntfrq_hz = GTIMER_DEFAULT_HZ; + } + } + #ifndef CONFIG_USER_ONLY /* The NVIC and M-profile CPU are two halves of a single piece of * hardware; trying to use one without the other is a command line @@ -2002,18 +2057,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) } { - uint64_t scale; - - if (arm_feature(env, ARM_FEATURE_GENERIC_TIMER)) { - if (!cpu->gt_cntfrq_hz) { - error_setg(errp, "Invalid CNTFRQ: %"PRId64"Hz", - cpu->gt_cntfrq_hz); - return; - } - scale = gt_cntfrq_period_ns(cpu); - } else { - scale = GTIMER_SCALE; - } + uint64_t scale = gt_cntfrq_period_ns(cpu); cpu->gt_timer[GTIMER_PHYS] = timer_new(QEMU_CLOCK_VIRTUAL, scale, arm_gt_ptimer_cb, cpu); @@ -2357,6 +2401,13 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp) #endif } +#ifndef CONFIG_USER_ONLY + if (tcg_enabled() && cpu_isar_feature(aa64_wfxt, cpu)) { + cpu->wfxt_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, + arm_wfxt_timer_cb, cpu); + } +#endif + if (tcg_enabled()) { /* * Don't report some architectural features in the ID registers @@ -2571,6 +2622,8 @@ static Property arm_cpu_properties[] = { mp_affinity, ARM64_AFFINITY_INVALID), DEFINE_PROP_INT32("node-id", ARMCPU, node_id, CPU_UNSET_NUMA_NODE_ID), DEFINE_PROP_INT32("core-count", ARMCPU, core_count, -1), + /* True to default to the backward-compat old CNTFRQ rather than 1Ghz */ + DEFINE_PROP_BOOL("backcompat-cntfrq", ARMCPU, backcompat_cntfrq, false), DEFINE_PROP_END_OF_LIST() }; @@ -2611,6 +2664,7 @@ static const TCGCPUOps arm_tcg_ops = { #else .tlb_fill = arm_cpu_tlb_fill, .cpu_exec_interrupt = arm_cpu_exec_interrupt, + .cpu_exec_halt = arm_cpu_exec_halt, .do_interrupt = arm_cpu_do_interrupt, .do_transaction_failed = arm_cpu_do_transaction_failed, .do_unaligned_access = arm_cpu_do_unaligned_access, diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 97997dbd08..3841359d0f 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -26,13 +26,11 @@ #include "cpu-qom.h" #include "exec/cpu-defs.h" #include "exec/gdbstub.h" +#include "exec/page-protection.h" #include "qapi/qapi-types-common.h" #include "target/arm/multiprocessing.h" #include "target/arm/gtimer.h" -/* ARM processors have a weak memory model */ -#define TCG_GUEST_DEFAULT_MO (0) - #ifdef TARGET_AARCH64 #define KVM_HAVE_MCE_INJECTION 1 #endif @@ -868,6 +866,9 @@ struct ArchCPU { * pmu_op_finish() - it does not need other handling during migration */ QEMUTimer *pmu_timer; + /* Timer used for WFxT timeouts */ + QEMUTimer *wfxt_timer; + /* GPIO outputs for generic timer */ qemu_irq gt_timer_outputs[NUM_GTIMERS]; /* GPIO output for GICv3 maintenance interrupt signal */ @@ -959,6 +960,9 @@ struct ArchCPU { */ bool host_cpu_probe_failed; + /* QOM property to indicate we should use the back-compat CNTFRQ default */ + bool backcompat_cntfrq; + /* Specify the number of cores in this CPU cluster. Used for the L2CTLR * register. */ @@ -1014,6 +1018,7 @@ struct ArchCPU { uint64_t id_aa64mmfr0; uint64_t id_aa64mmfr1; uint64_t id_aa64mmfr2; + uint64_t id_aa64mmfr3; uint64_t id_aa64dfr0; uint64_t id_aa64dfr1; uint64_t id_aa64zfr0; @@ -2209,6 +2214,22 @@ FIELD(ID_AA64MMFR2, BBM, 52, 4) FIELD(ID_AA64MMFR2, EVT, 56, 4) FIELD(ID_AA64MMFR2, E0PD, 60, 4) +FIELD(ID_AA64MMFR3, TCRX, 0, 4) +FIELD(ID_AA64MMFR3, SCTLRX, 4, 4) +FIELD(ID_AA64MMFR3, S1PIE, 8, 4) +FIELD(ID_AA64MMFR3, S2PIE, 12, 4) +FIELD(ID_AA64MMFR3, S1POE, 16, 4) +FIELD(ID_AA64MMFR3, S2POE, 20, 4) +FIELD(ID_AA64MMFR3, AIE, 24, 4) +FIELD(ID_AA64MMFR3, MEC, 28, 4) +FIELD(ID_AA64MMFR3, D128, 32, 4) +FIELD(ID_AA64MMFR3, D128_2, 36, 4) +FIELD(ID_AA64MMFR3, SNERR, 40, 4) +FIELD(ID_AA64MMFR3, ANERR, 44, 4) +FIELD(ID_AA64MMFR3, SDERR, 52, 4) +FIELD(ID_AA64MMFR3, ADERR, 56, 4) +FIELD(ID_AA64MMFR3, SPEC_FPACC, 60, 4) + FIELD(ID_AA64DFR0, DEBUGVER, 0, 4) FIELD(ID_AA64DFR0, TRACEVER, 4, 4) FIELD(ID_AA64DFR0, PMUVER, 8, 4) @@ -2359,6 +2380,14 @@ enum arm_features { ARM_FEATURE_M_SECURITY, /* M profile Security Extension */ ARM_FEATURE_M_MAIN, /* M profile Main Extension */ ARM_FEATURE_V8_1M, /* M profile extras only in v8.1M and later */ + /* + * ARM_FEATURE_BACKCOMPAT_CNTFRQ makes the CPU default cntfrq be 62.5MHz + * if the board doesn't set a value, instead of 1GHz. It is for backwards + * compatibility and used only with CPU definitions that were already + * in QEMU before we changed the default. It should not be set on any + * CPU types added in future. + */ + ARM_FEATURE_BACKCOMPAT_CNTFRQ, /* 62.5MHz timer default */ }; static inline int arm_feature(CPUARMState *env, int feature) diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index 985b1efe16..862d2b92fa 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -109,7 +109,11 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp) * No explicit bits enabled, and no implicit bits from sve-max-vq. */ if (!cpu_isar_feature(aa64_sve, cpu)) { - /* SVE is disabled and so are all vector lengths. Good. */ + /* + * SVE is disabled and so are all vector lengths. Good. + * Disable all SVE extensions as well. + */ + cpu->isar.id_aa64zfr0 = 0; return; } @@ -599,6 +603,7 @@ static void aarch64_a57_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_V8); set_feature(&cpu->env, ARM_FEATURE_NEON); set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_BACKCOMPAT_CNTFRQ); set_feature(&cpu->env, ARM_FEATURE_AARCH64); set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); set_feature(&cpu->env, ARM_FEATURE_EL2); @@ -656,6 +661,7 @@ static void aarch64_a53_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_V8); set_feature(&cpu->env, ARM_FEATURE_NEON); set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_BACKCOMPAT_CNTFRQ); set_feature(&cpu->env, ARM_FEATURE_AARCH64); set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); set_feature(&cpu->env, ARM_FEATURE_EL2); diff --git a/target/arm/helper.c b/target/arm/helper.c index 6b224826fb..ce31957235 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -2474,6 +2474,13 @@ static const ARMCPRegInfo v6k_cp_reginfo[] = { .resetvalue = 0 }, }; +static void arm_gt_cntfrq_reset(CPUARMState *env, const ARMCPRegInfo *opaque) +{ + ARMCPU *cpu = env_archcpu(env); + + cpu->env.cp15.c14_cntfrq = cpu->gt_cntfrq_hz; +} + #ifndef CONFIG_USER_ONLY static CPAccessResult gt_cntfrq_access(CPUARMState *env, const ARMCPRegInfo *ri, @@ -2658,7 +2665,7 @@ static CPAccessResult gt_stimer_access(CPUARMState *env, } } -static uint64_t gt_get_countervalue(CPUARMState *env) +uint64_t gt_get_countervalue(CPUARMState *env) { ARMCPU *cpu = env_archcpu(env); @@ -2793,7 +2800,7 @@ static uint64_t gt_cnt_read(CPUARMState *env, const ARMCPRegInfo *ri) return gt_get_countervalue(env) - gt_phys_cnt_offset(env); } -static uint64_t gt_virt_cnt_offset(CPUARMState *env) +uint64_t gt_virt_cnt_offset(CPUARMState *env) { uint64_t hcr; @@ -3228,13 +3235,6 @@ void arm_gt_hvtimer_cb(void *opaque) gt_recalc_timer(cpu, GTIMER_HYPVIRT); } -static void arm_gt_cntfrq_reset(CPUARMState *env, const ARMCPRegInfo *opaque) -{ - ARMCPU *cpu = env_archcpu(env); - - cpu->env.cp15.c14_cntfrq = cpu->gt_cntfrq_hz; -} - static const ARMCPRegInfo generic_timer_cp_reginfo[] = { /* * Note that CNTFRQ is purely reads-as-written for the benefit @@ -3514,7 +3514,7 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = { .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 0, .type = ARM_CP_CONST, .access = PL0_R /* no PL1_RW in linux-user */, .fieldoffset = offsetof(CPUARMState, cp15.c14_cntfrq), - .resetvalue = NANOSECONDS_PER_SECOND / GTIMER_SCALE, + .resetfn = arm_gt_cntfrq_reset, }, { .name = "CNTVCT_EL0", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 0, .opc2 = 2, @@ -9004,11 +9004,11 @@ void register_cp_regs_for_features(ARMCPU *cpu) .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, .resetvalue = cpu->isar.id_aa64mmfr2 }, - { .name = "ID_AA64MMFR3_EL1_RESERVED", .state = ARM_CP_STATE_AA64, + { .name = "ID_AA64MMFR3_EL1", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 3, .access = PL1_R, .type = ARM_CP_CONST, .accessfn = access_aa64_tid3, - .resetvalue = 0 }, + .resetvalue = cpu->isar.id_aa64mmfr3 }, { .name = "ID_AA64MMFR4_EL1_RESERVED", .state = ARM_CP_STATE_AA64, .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 7, .opc2 = 4, .access = PL1_R, .type = ARM_CP_CONST, @@ -9165,6 +9165,8 @@ void register_cp_regs_for_features(ARMCPU *cpu) .exported_bits = R_ID_AA64MMFR1_AFP_MASK }, { .name = "ID_AA64MMFR2_EL1", .exported_bits = R_ID_AA64MMFR2_AT_MASK }, + { .name = "ID_AA64MMFR3_EL1", + .exported_bits = 0 }, { .name = "ID_AA64MMFR*_EL1_RESERVED", .is_glob = true }, { .name = "ID_AA64DFR0_EL1", diff --git a/target/arm/helper.h b/target/arm/helper.h index 2b02733305..eca2043fc2 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -53,6 +53,7 @@ DEF_HELPER_2(exception_pc_alignment, noreturn, env, tl) DEF_HELPER_1(setend, void, env) DEF_HELPER_2(wfi, void, env, i32) DEF_HELPER_1(wfe, void, env) +DEF_HELPER_2(wfit, void, env, i64) DEF_HELPER_1(yield, void, env) DEF_HELPER_1(pre_hvc, void, env) DEF_HELPER_2(pre_smc, void, env, i32) @@ -132,12 +133,6 @@ DEF_HELPER_3(vfp_maxnumd, f64, f64, f64, ptr) DEF_HELPER_3(vfp_minnumh, f16, f16, f16, ptr) DEF_HELPER_3(vfp_minnums, f32, f32, f32, ptr) DEF_HELPER_3(vfp_minnumd, f64, f64, f64, ptr) -DEF_HELPER_1(vfp_negh, f16, f16) -DEF_HELPER_1(vfp_negs, f32, f32) -DEF_HELPER_1(vfp_negd, f64, f64) -DEF_HELPER_1(vfp_absh, f16, f16) -DEF_HELPER_1(vfp_abss, f32, f32) -DEF_HELPER_1(vfp_absd, f64, f64) DEF_HELPER_2(vfp_sqrth, f16, f16, env) DEF_HELPER_2(vfp_sqrts, f32, f32, env) DEF_HELPER_2(vfp_sqrtd, f64, f64, env) @@ -274,50 +269,6 @@ DEF_HELPER_FLAGS_2(fjcvtzs, TCG_CALL_NO_RWG, i64, f64, ptr) DEF_HELPER_FLAGS_3(check_hcr_el2_trap, TCG_CALL_NO_WG, void, env, i32, i32) /* neon_helper.c */ -DEF_HELPER_FLAGS_3(neon_qadd_u8, TCG_CALL_NO_RWG, i32, env, i32, i32) -DEF_HELPER_FLAGS_3(neon_qadd_s8, TCG_CALL_NO_RWG, i32, env, i32, i32) -DEF_HELPER_FLAGS_3(neon_qadd_u16, TCG_CALL_NO_RWG, i32, env, i32, i32) -DEF_HELPER_FLAGS_3(neon_qadd_s16, TCG_CALL_NO_RWG, i32, env, i32, i32) -DEF_HELPER_FLAGS_3(neon_qadd_u32, TCG_CALL_NO_RWG, i32, env, i32, i32) -DEF_HELPER_FLAGS_3(neon_qadd_s32, TCG_CALL_NO_RWG, i32, env, i32, i32) -DEF_HELPER_FLAGS_3(neon_uqadd_s8, TCG_CALL_NO_RWG, i32, env, i32, i32) -DEF_HELPER_FLAGS_3(neon_uqadd_s16, TCG_CALL_NO_RWG, i32, env, i32, i32) -DEF_HELPER_FLAGS_3(neon_uqadd_s32, TCG_CALL_NO_RWG, i32, env, i32, i32) -DEF_HELPER_FLAGS_3(neon_uqadd_s64, TCG_CALL_NO_RWG, i64, env, i64, i64) -DEF_HELPER_FLAGS_3(neon_sqadd_u8, TCG_CALL_NO_RWG, i32, env, i32, i32) -DEF_HELPER_FLAGS_3(neon_sqadd_u16, TCG_CALL_NO_RWG, i32, env, i32, i32) -DEF_HELPER_FLAGS_3(neon_sqadd_u32, TCG_CALL_NO_RWG, i32, env, i32, i32) -DEF_HELPER_FLAGS_3(neon_sqadd_u64, TCG_CALL_NO_RWG, i64, env, i64, i64) -DEF_HELPER_3(neon_qsub_u8, i32, env, i32, i32) -DEF_HELPER_3(neon_qsub_s8, i32, env, i32, i32) -DEF_HELPER_3(neon_qsub_u16, i32, env, i32, i32) -DEF_HELPER_3(neon_qsub_s16, i32, env, i32, i32) -DEF_HELPER_3(neon_qsub_u32, i32, env, i32, i32) -DEF_HELPER_3(neon_qsub_s32, i32, env, i32, i32) -DEF_HELPER_3(neon_qadd_u64, i64, env, i64, i64) -DEF_HELPER_3(neon_qadd_s64, i64, env, i64, i64) -DEF_HELPER_3(neon_qsub_u64, i64, env, i64, i64) -DEF_HELPER_3(neon_qsub_s64, i64, env, i64, i64) - -DEF_HELPER_2(neon_hadd_s8, i32, i32, i32) -DEF_HELPER_2(neon_hadd_u8, i32, i32, i32) -DEF_HELPER_2(neon_hadd_s16, i32, i32, i32) -DEF_HELPER_2(neon_hadd_u16, i32, i32, i32) -DEF_HELPER_2(neon_hadd_s32, s32, s32, s32) -DEF_HELPER_2(neon_hadd_u32, i32, i32, i32) -DEF_HELPER_2(neon_rhadd_s8, i32, i32, i32) -DEF_HELPER_2(neon_rhadd_u8, i32, i32, i32) -DEF_HELPER_2(neon_rhadd_s16, i32, i32, i32) -DEF_HELPER_2(neon_rhadd_u16, i32, i32, i32) -DEF_HELPER_2(neon_rhadd_s32, s32, s32, s32) -DEF_HELPER_2(neon_rhadd_u32, i32, i32, i32) -DEF_HELPER_2(neon_hsub_s8, i32, i32, i32) -DEF_HELPER_2(neon_hsub_u8, i32, i32, i32) -DEF_HELPER_2(neon_hsub_s16, i32, i32, i32) -DEF_HELPER_2(neon_hsub_u16, i32, i32, i32) -DEF_HELPER_2(neon_hsub_s32, s32, s32, s32) -DEF_HELPER_2(neon_hsub_u32, i32, i32, i32) - DEF_HELPER_2(neon_pmin_u8, i32, i32, i32) DEF_HELPER_2(neon_pmin_s8, i32, i32, i32) DEF_HELPER_2(neon_pmin_u16, i32, i32, i32) @@ -357,11 +308,35 @@ DEF_HELPER_3(neon_qrshl_u32, i32, env, i32, i32) DEF_HELPER_3(neon_qrshl_s32, i32, env, i32, i32) DEF_HELPER_3(neon_qrshl_u64, i64, env, i64, i64) DEF_HELPER_3(neon_qrshl_s64, i64, env, i64, i64) +DEF_HELPER_FLAGS_5(neon_sqshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_uqshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_uqshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_uqshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_uqshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqrshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqrshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqrshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqrshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_uqrshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_uqrshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_uqrshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_uqrshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_srshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_srshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_srshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_srshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_urshl_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_urshl_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_urshl_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_urshl_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_2(neon_add_u8, i32, i32, i32) DEF_HELPER_2(neon_add_u16, i32, i32, i32) -DEF_HELPER_2(neon_padd_u8, i32, i32, i32) -DEF_HELPER_2(neon_padd_u16, i32, i32, i32) DEF_HELPER_2(neon_sub_u8, i32, i32, i32) DEF_HELPER_2(neon_sub_u16, i32, i32, i32) DEF_HELPER_2(neon_mul_u8, i32, i32, i32) @@ -656,13 +631,6 @@ DEF_HELPER_FLAGS_6(gvec_fcmlas_idx, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_6(gvec_fcmlad, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_paddh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_pmaxh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_pminh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_padds, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_pmaxs, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) -DEF_HELPER_FLAGS_5(neon_pmins, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) - DEF_HELPER_FLAGS_4(gvec_sstoh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_sitos, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(gvec_ustoh, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) @@ -730,33 +698,43 @@ DEF_HELPER_FLAGS_5(gvec_fmul_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fceq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fceq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fceq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fcge_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fcge_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fcge_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fcgt_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fcgt_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fcgt_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_facge_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_facge_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_facge_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_facgt_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_facgt_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_facgt_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fmax_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fmax_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmax_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fmin_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fmin_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmin_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fmaxnum_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fmaxnum_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmaxnum_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fminnum_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fminnum_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fminnum_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_recps_nf_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_recps_nf_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) @@ -772,9 +750,11 @@ DEF_HELPER_FLAGS_5(gvec_fmls_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_vfma_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_vfma_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_vfma_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) @@ -839,6 +819,22 @@ DEF_HELPER_FLAGS_5(gvec_sqsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_sqsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_usqadd_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_usqadd_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_usqadd_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_usqadd_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_suqadd_b, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_suqadd_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_suqadd_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_suqadd_d, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(gvec_fmlal_a32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) @@ -973,6 +969,16 @@ DEF_HELPER_FLAGS_5(neon_sqrdmulh_h, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_5(neon_sqrdmulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqdmulh_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqdmulh_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(neon_sqrdmulh_idx_h, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(neon_sqrdmulh_idx_s, TCG_CALL_NO_RWG, + void, ptr, ptr, ptr, ptr, i32) + DEF_HELPER_FLAGS_4(sve2_sqdmulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve2_sqdmulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve2_sqdmulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) @@ -1042,6 +1048,47 @@ DEF_HELPER_FLAGS_5(gvec_uclamp_s, TCG_CALL_NO_RWG, DEF_HELPER_FLAGS_5(gvec_uclamp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_faddp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_faddp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_faddp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_fmaxp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmaxp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmaxp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_fminp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fminp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fminp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_fmaxnump_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmaxnump_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmaxnump_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_fminnump_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fminnump_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fminnump_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_addp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_addp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_addp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_addp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_smaxp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_smaxp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_smaxp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_sminp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_sminp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_sminp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_umaxp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_umaxp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_umaxp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_4(gvec_uminp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_uminp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_4(gvec_uminp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) + #ifdef TARGET_AARCH64 #include "tcg/helper-a64.h" #include "tcg/helper-sve.h" diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c index 65a5601804..45e2218be5 100644 --- a/target/arm/hvf/hvf.c +++ b/target/arm/hvf/hvf.c @@ -150,7 +150,6 @@ void hvf_arm_init_debug(void) #define HVF_SYSREG(crn, crm, op0, op1, op2) \ ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP, crn, crm, op0, op1, op2) -#define PL1_WRITE_MASK 0x4 #define SYSREG_OP0_SHIFT 20 #define SYSREG_OP0_MASK 0x3 @@ -397,85 +396,85 @@ struct hvf_sreg_match { }; static struct hvf_sreg_match hvf_sreg_match[] = { - { HV_SYS_REG_DBGBVR0_EL1, HVF_SYSREG(0, 0, 14, 0, 4) }, - { HV_SYS_REG_DBGBCR0_EL1, HVF_SYSREG(0, 0, 14, 0, 5) }, - { HV_SYS_REG_DBGWVR0_EL1, HVF_SYSREG(0, 0, 14, 0, 6) }, - { HV_SYS_REG_DBGWCR0_EL1, HVF_SYSREG(0, 0, 14, 0, 7) }, - - { HV_SYS_REG_DBGBVR1_EL1, HVF_SYSREG(0, 1, 14, 0, 4) }, - { HV_SYS_REG_DBGBCR1_EL1, HVF_SYSREG(0, 1, 14, 0, 5) }, - { HV_SYS_REG_DBGWVR1_EL1, HVF_SYSREG(0, 1, 14, 0, 6) }, - { HV_SYS_REG_DBGWCR1_EL1, HVF_SYSREG(0, 1, 14, 0, 7) }, - - { HV_SYS_REG_DBGBVR2_EL1, HVF_SYSREG(0, 2, 14, 0, 4) }, - { HV_SYS_REG_DBGBCR2_EL1, HVF_SYSREG(0, 2, 14, 0, 5) }, - { HV_SYS_REG_DBGWVR2_EL1, HVF_SYSREG(0, 2, 14, 0, 6) }, - { HV_SYS_REG_DBGWCR2_EL1, HVF_SYSREG(0, 2, 14, 0, 7) }, - - { HV_SYS_REG_DBGBVR3_EL1, HVF_SYSREG(0, 3, 14, 0, 4) }, - { HV_SYS_REG_DBGBCR3_EL1, HVF_SYSREG(0, 3, 14, 0, 5) }, - { HV_SYS_REG_DBGWVR3_EL1, HVF_SYSREG(0, 3, 14, 0, 6) }, - { HV_SYS_REG_DBGWCR3_EL1, HVF_SYSREG(0, 3, 14, 0, 7) }, - - { HV_SYS_REG_DBGBVR4_EL1, HVF_SYSREG(0, 4, 14, 0, 4) }, - { HV_SYS_REG_DBGBCR4_EL1, HVF_SYSREG(0, 4, 14, 0, 5) }, - { HV_SYS_REG_DBGWVR4_EL1, HVF_SYSREG(0, 4, 14, 0, 6) }, - { HV_SYS_REG_DBGWCR4_EL1, HVF_SYSREG(0, 4, 14, 0, 7) }, - - { HV_SYS_REG_DBGBVR5_EL1, HVF_SYSREG(0, 5, 14, 0, 4) }, - { HV_SYS_REG_DBGBCR5_EL1, HVF_SYSREG(0, 5, 14, 0, 5) }, - { HV_SYS_REG_DBGWVR5_EL1, HVF_SYSREG(0, 5, 14, 0, 6) }, - { HV_SYS_REG_DBGWCR5_EL1, HVF_SYSREG(0, 5, 14, 0, 7) }, - - { HV_SYS_REG_DBGBVR6_EL1, HVF_SYSREG(0, 6, 14, 0, 4) }, - { HV_SYS_REG_DBGBCR6_EL1, HVF_SYSREG(0, 6, 14, 0, 5) }, - { HV_SYS_REG_DBGWVR6_EL1, HVF_SYSREG(0, 6, 14, 0, 6) }, - { HV_SYS_REG_DBGWCR6_EL1, HVF_SYSREG(0, 6, 14, 0, 7) }, - - { HV_SYS_REG_DBGBVR7_EL1, HVF_SYSREG(0, 7, 14, 0, 4) }, - { HV_SYS_REG_DBGBCR7_EL1, HVF_SYSREG(0, 7, 14, 0, 5) }, - { HV_SYS_REG_DBGWVR7_EL1, HVF_SYSREG(0, 7, 14, 0, 6) }, - { HV_SYS_REG_DBGWCR7_EL1, HVF_SYSREG(0, 7, 14, 0, 7) }, - - { HV_SYS_REG_DBGBVR8_EL1, HVF_SYSREG(0, 8, 14, 0, 4) }, - { HV_SYS_REG_DBGBCR8_EL1, HVF_SYSREG(0, 8, 14, 0, 5) }, - { HV_SYS_REG_DBGWVR8_EL1, HVF_SYSREG(0, 8, 14, 0, 6) }, - { HV_SYS_REG_DBGWCR8_EL1, HVF_SYSREG(0, 8, 14, 0, 7) }, - - { HV_SYS_REG_DBGBVR9_EL1, HVF_SYSREG(0, 9, 14, 0, 4) }, - { HV_SYS_REG_DBGBCR9_EL1, HVF_SYSREG(0, 9, 14, 0, 5) }, - { HV_SYS_REG_DBGWVR9_EL1, HVF_SYSREG(0, 9, 14, 0, 6) }, - { HV_SYS_REG_DBGWCR9_EL1, HVF_SYSREG(0, 9, 14, 0, 7) }, - - { HV_SYS_REG_DBGBVR10_EL1, HVF_SYSREG(0, 10, 14, 0, 4) }, - { HV_SYS_REG_DBGBCR10_EL1, HVF_SYSREG(0, 10, 14, 0, 5) }, - { HV_SYS_REG_DBGWVR10_EL1, HVF_SYSREG(0, 10, 14, 0, 6) }, - { HV_SYS_REG_DBGWCR10_EL1, HVF_SYSREG(0, 10, 14, 0, 7) }, - - { HV_SYS_REG_DBGBVR11_EL1, HVF_SYSREG(0, 11, 14, 0, 4) }, - { HV_SYS_REG_DBGBCR11_EL1, HVF_SYSREG(0, 11, 14, 0, 5) }, - { HV_SYS_REG_DBGWVR11_EL1, HVF_SYSREG(0, 11, 14, 0, 6) }, - { HV_SYS_REG_DBGWCR11_EL1, HVF_SYSREG(0, 11, 14, 0, 7) }, - - { HV_SYS_REG_DBGBVR12_EL1, HVF_SYSREG(0, 12, 14, 0, 4) }, - { HV_SYS_REG_DBGBCR12_EL1, HVF_SYSREG(0, 12, 14, 0, 5) }, - { HV_SYS_REG_DBGWVR12_EL1, HVF_SYSREG(0, 12, 14, 0, 6) }, - { HV_SYS_REG_DBGWCR12_EL1, HVF_SYSREG(0, 12, 14, 0, 7) }, - - { HV_SYS_REG_DBGBVR13_EL1, HVF_SYSREG(0, 13, 14, 0, 4) }, - { HV_SYS_REG_DBGBCR13_EL1, HVF_SYSREG(0, 13, 14, 0, 5) }, - { HV_SYS_REG_DBGWVR13_EL1, HVF_SYSREG(0, 13, 14, 0, 6) }, - { HV_SYS_REG_DBGWCR13_EL1, HVF_SYSREG(0, 13, 14, 0, 7) }, - - { HV_SYS_REG_DBGBVR14_EL1, HVF_SYSREG(0, 14, 14, 0, 4) }, - { HV_SYS_REG_DBGBCR14_EL1, HVF_SYSREG(0, 14, 14, 0, 5) }, - { HV_SYS_REG_DBGWVR14_EL1, HVF_SYSREG(0, 14, 14, 0, 6) }, - { HV_SYS_REG_DBGWCR14_EL1, HVF_SYSREG(0, 14, 14, 0, 7) }, - - { HV_SYS_REG_DBGBVR15_EL1, HVF_SYSREG(0, 15, 14, 0, 4) }, - { HV_SYS_REG_DBGBCR15_EL1, HVF_SYSREG(0, 15, 14, 0, 5) }, - { HV_SYS_REG_DBGWVR15_EL1, HVF_SYSREG(0, 15, 14, 0, 6) }, - { HV_SYS_REG_DBGWCR15_EL1, HVF_SYSREG(0, 15, 14, 0, 7) }, + { HV_SYS_REG_DBGBVR0_EL1, HVF_SYSREG(0, 0, 2, 0, 4) }, + { HV_SYS_REG_DBGBCR0_EL1, HVF_SYSREG(0, 0, 2, 0, 5) }, + { HV_SYS_REG_DBGWVR0_EL1, HVF_SYSREG(0, 0, 2, 0, 6) }, + { HV_SYS_REG_DBGWCR0_EL1, HVF_SYSREG(0, 0, 2, 0, 7) }, + + { HV_SYS_REG_DBGBVR1_EL1, HVF_SYSREG(0, 1, 2, 0, 4) }, + { HV_SYS_REG_DBGBCR1_EL1, HVF_SYSREG(0, 1, 2, 0, 5) }, + { HV_SYS_REG_DBGWVR1_EL1, HVF_SYSREG(0, 1, 2, 0, 6) }, + { HV_SYS_REG_DBGWCR1_EL1, HVF_SYSREG(0, 1, 2, 0, 7) }, + + { HV_SYS_REG_DBGBVR2_EL1, HVF_SYSREG(0, 2, 2, 0, 4) }, + { HV_SYS_REG_DBGBCR2_EL1, HVF_SYSREG(0, 2, 2, 0, 5) }, + { HV_SYS_REG_DBGWVR2_EL1, HVF_SYSREG(0, 2, 2, 0, 6) }, + { HV_SYS_REG_DBGWCR2_EL1, HVF_SYSREG(0, 2, 2, 0, 7) }, + + { HV_SYS_REG_DBGBVR3_EL1, HVF_SYSREG(0, 3, 2, 0, 4) }, + { HV_SYS_REG_DBGBCR3_EL1, HVF_SYSREG(0, 3, 2, 0, 5) }, + { HV_SYS_REG_DBGWVR3_EL1, HVF_SYSREG(0, 3, 2, 0, 6) }, + { HV_SYS_REG_DBGWCR3_EL1, HVF_SYSREG(0, 3, 2, 0, 7) }, + + { HV_SYS_REG_DBGBVR4_EL1, HVF_SYSREG(0, 4, 2, 0, 4) }, + { HV_SYS_REG_DBGBCR4_EL1, HVF_SYSREG(0, 4, 2, 0, 5) }, + { HV_SYS_REG_DBGWVR4_EL1, HVF_SYSREG(0, 4, 2, 0, 6) }, + { HV_SYS_REG_DBGWCR4_EL1, HVF_SYSREG(0, 4, 2, 0, 7) }, + + { HV_SYS_REG_DBGBVR5_EL1, HVF_SYSREG(0, 5, 2, 0, 4) }, + { HV_SYS_REG_DBGBCR5_EL1, HVF_SYSREG(0, 5, 2, 0, 5) }, + { HV_SYS_REG_DBGWVR5_EL1, HVF_SYSREG(0, 5, 2, 0, 6) }, + { HV_SYS_REG_DBGWCR5_EL1, HVF_SYSREG(0, 5, 2, 0, 7) }, + + { HV_SYS_REG_DBGBVR6_EL1, HVF_SYSREG(0, 6, 2, 0, 4) }, + { HV_SYS_REG_DBGBCR6_EL1, HVF_SYSREG(0, 6, 2, 0, 5) }, + { HV_SYS_REG_DBGWVR6_EL1, HVF_SYSREG(0, 6, 2, 0, 6) }, + { HV_SYS_REG_DBGWCR6_EL1, HVF_SYSREG(0, 6, 2, 0, 7) }, + + { HV_SYS_REG_DBGBVR7_EL1, HVF_SYSREG(0, 7, 2, 0, 4) }, + { HV_SYS_REG_DBGBCR7_EL1, HVF_SYSREG(0, 7, 2, 0, 5) }, + { HV_SYS_REG_DBGWVR7_EL1, HVF_SYSREG(0, 7, 2, 0, 6) }, + { HV_SYS_REG_DBGWCR7_EL1, HVF_SYSREG(0, 7, 2, 0, 7) }, + + { HV_SYS_REG_DBGBVR8_EL1, HVF_SYSREG(0, 8, 2, 0, 4) }, + { HV_SYS_REG_DBGBCR8_EL1, HVF_SYSREG(0, 8, 2, 0, 5) }, + { HV_SYS_REG_DBGWVR8_EL1, HVF_SYSREG(0, 8, 2, 0, 6) }, + { HV_SYS_REG_DBGWCR8_EL1, HVF_SYSREG(0, 8, 2, 0, 7) }, + + { HV_SYS_REG_DBGBVR9_EL1, HVF_SYSREG(0, 9, 2, 0, 4) }, + { HV_SYS_REG_DBGBCR9_EL1, HVF_SYSREG(0, 9, 2, 0, 5) }, + { HV_SYS_REG_DBGWVR9_EL1, HVF_SYSREG(0, 9, 2, 0, 6) }, + { HV_SYS_REG_DBGWCR9_EL1, HVF_SYSREG(0, 9, 2, 0, 7) }, + + { HV_SYS_REG_DBGBVR10_EL1, HVF_SYSREG(0, 10, 2, 0, 4) }, + { HV_SYS_REG_DBGBCR10_EL1, HVF_SYSREG(0, 10, 2, 0, 5) }, + { HV_SYS_REG_DBGWVR10_EL1, HVF_SYSREG(0, 10, 2, 0, 6) }, + { HV_SYS_REG_DBGWCR10_EL1, HVF_SYSREG(0, 10, 2, 0, 7) }, + + { HV_SYS_REG_DBGBVR11_EL1, HVF_SYSREG(0, 11, 2, 0, 4) }, + { HV_SYS_REG_DBGBCR11_EL1, HVF_SYSREG(0, 11, 2, 0, 5) }, + { HV_SYS_REG_DBGWVR11_EL1, HVF_SYSREG(0, 11, 2, 0, 6) }, + { HV_SYS_REG_DBGWCR11_EL1, HVF_SYSREG(0, 11, 2, 0, 7) }, + + { HV_SYS_REG_DBGBVR12_EL1, HVF_SYSREG(0, 12, 2, 0, 4) }, + { HV_SYS_REG_DBGBCR12_EL1, HVF_SYSREG(0, 12, 2, 0, 5) }, + { HV_SYS_REG_DBGWVR12_EL1, HVF_SYSREG(0, 12, 2, 0, 6) }, + { HV_SYS_REG_DBGWCR12_EL1, HVF_SYSREG(0, 12, 2, 0, 7) }, + + { HV_SYS_REG_DBGBVR13_EL1, HVF_SYSREG(0, 13, 2, 0, 4) }, + { HV_SYS_REG_DBGBCR13_EL1, HVF_SYSREG(0, 13, 2, 0, 5) }, + { HV_SYS_REG_DBGWVR13_EL1, HVF_SYSREG(0, 13, 2, 0, 6) }, + { HV_SYS_REG_DBGWCR13_EL1, HVF_SYSREG(0, 13, 2, 0, 7) }, + + { HV_SYS_REG_DBGBVR14_EL1, HVF_SYSREG(0, 14, 2, 0, 4) }, + { HV_SYS_REG_DBGBCR14_EL1, HVF_SYSREG(0, 14, 2, 0, 5) }, + { HV_SYS_REG_DBGWVR14_EL1, HVF_SYSREG(0, 14, 2, 0, 6) }, + { HV_SYS_REG_DBGWCR14_EL1, HVF_SYSREG(0, 14, 2, 0, 7) }, + + { HV_SYS_REG_DBGBVR15_EL1, HVF_SYSREG(0, 15, 2, 0, 4) }, + { HV_SYS_REG_DBGBCR15_EL1, HVF_SYSREG(0, 15, 2, 0, 5) }, + { HV_SYS_REG_DBGWVR15_EL1, HVF_SYSREG(0, 15, 2, 0, 6) }, + { HV_SYS_REG_DBGWCR15_EL1, HVF_SYSREG(0, 15, 2, 0, 7) }, #ifdef SYNC_NO_RAW_REGS /* @@ -487,7 +486,7 @@ static struct hvf_sreg_match hvf_sreg_match[] = { { HV_SYS_REG_MPIDR_EL1, HVF_SYSREG(0, 0, 3, 0, 5) }, { HV_SYS_REG_ID_AA64PFR0_EL1, HVF_SYSREG(0, 4, 3, 0, 0) }, #endif - { HV_SYS_REG_ID_AA64PFR1_EL1, HVF_SYSREG(0, 4, 3, 0, 2) }, + { HV_SYS_REG_ID_AA64PFR1_EL1, HVF_SYSREG(0, 4, 3, 0, 1) }, { HV_SYS_REG_ID_AA64DFR0_EL1, HVF_SYSREG(0, 5, 3, 0, 0) }, { HV_SYS_REG_ID_AA64DFR1_EL1, HVF_SYSREG(0, 5, 3, 0, 1) }, { HV_SYS_REG_ID_AA64ISAR0_EL1, HVF_SYSREG(0, 6, 3, 0, 0) }, @@ -498,6 +497,7 @@ static struct hvf_sreg_match hvf_sreg_match[] = { #endif { HV_SYS_REG_ID_AA64MMFR1_EL1, HVF_SYSREG(0, 7, 3, 0, 1) }, { HV_SYS_REG_ID_AA64MMFR2_EL1, HVF_SYSREG(0, 7, 3, 0, 2) }, + /* Add ID_AA64MMFR3_EL1 here when HVF supports it */ { HV_SYS_REG_MDSCR_EL1, HVF_SYSREG(0, 2, 2, 0, 2) }, { HV_SYS_REG_SCTLR_EL1, HVF_SYSREG(1, 0, 3, 0, 0) }, @@ -806,9 +806,9 @@ int hvf_put_registers(CPUState *cpu) static void flush_cpu_state(CPUState *cpu) { - if (cpu->vcpu_dirty) { + if (cpu->accel->dirty) { hvf_put_registers(cpu); - cpu->vcpu_dirty = false; + cpu->accel->dirty = false; } } @@ -856,6 +856,7 @@ static bool hvf_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) { HV_SYS_REG_ID_AA64MMFR0_EL1, &host_isar.id_aa64mmfr0 }, { HV_SYS_REG_ID_AA64MMFR1_EL1, &host_isar.id_aa64mmfr1 }, { HV_SYS_REG_ID_AA64MMFR2_EL1, &host_isar.id_aa64mmfr2 }, + /* Add ID_AA64MMFR3_EL1 here when HVF supports it */ }; hv_vcpu_t fd; hv_return_t r = HV_SUCCESS; diff --git a/target/arm/internals.h b/target/arm/internals.h index b53f5e8ff2..11b5da2562 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -25,6 +25,7 @@ #ifndef TARGET_ARM_INTERNALS_H #define TARGET_ARM_INTERNALS_H +#include "exec/breakpoint.h" #include "hw/registerfields.h" #include "tcg/tcg-gvec-desc.h" #include "syndrome.h" @@ -59,10 +60,19 @@ static inline bool excp_is_internal(int excp) || excp == EXCP_SEMIHOST; } -/* Scale factor for generic timers, ie number of ns per tick. - * This gives a 62.5MHz timer. +/* + * Default frequency for the generic timer, in Hz. + * ARMv8.6 and later CPUs architecturally must use a 1GHz timer; before + * that it was an IMPDEF choice, and QEMU initially picked 62.5MHz, + * which gives a 16ns tick period. + * + * We will use the back-compat value: + * - for QEMU CPU types added before we standardized on 1GHz + * - for versioned machine types with a version of 9.0 or earlier + * In any case, the machine model may override via the cntfrq property. */ -#define GTIMER_SCALE 16 +#define GTIMER_DEFAULT_HZ 1000000000 +#define GTIMER_BACKCOMPAT_HZ 62500000 /* Bit definitions for the v7M CONTROL register */ FIELD(V7M_CONTROL, NPRIV, 0, 1) @@ -1760,4 +1770,12 @@ bool check_watchpoint_in_range(int i, target_ulong addr); CPUWatchpoint *find_hw_watchpoint(CPUState *cpu, target_ulong addr); int insert_hw_watchpoint(target_ulong addr, target_ulong len, int type); int delete_hw_watchpoint(target_ulong addr, target_ulong len, int type); + +/* Return the current value of the system counter in ticks */ +uint64_t gt_get_countervalue(CPUARMState *env); +/* + * Return the currently applicable offset between the system counter + * and CNTVCT_EL0 (this will be either 0 or the value of CNTVOFF_EL2). + */ +uint64_t gt_virt_cnt_offset(CPUARMState *env); #endif diff --git a/target/arm/kvm-consts.h b/target/arm/kvm-consts.h index 7c6adc14f6..c44d23dbe7 100644 --- a/target/arm/kvm-consts.h +++ b/target/arm/kvm-consts.h @@ -14,13 +14,13 @@ #ifndef ARM_KVM_CONSTS_H #define ARM_KVM_CONSTS_H -#ifdef NEED_CPU_H +#ifdef COMPILING_PER_TARGET #ifdef CONFIG_KVM #include <linux/kvm.h> #include <linux/psci.h> #define MISMATCH_CHECK(X, Y) QEMU_BUILD_BUG_ON(X != Y) #endif -#endif +#endif /* COMPILING_PER_TARGET */ #ifndef MISMATCH_CHECK #define MISMATCH_CHECK(X, Y) QEMU_BUILD_BUG_ON(0) diff --git a/target/arm/kvm.c b/target/arm/kvm.c index 21ebbf3b8f..7cf5cf31de 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -331,6 +331,8 @@ static bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) ARM64_SYS_REG(3, 0, 0, 7, 1)); err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr2, ARM64_SYS_REG(3, 0, 0, 7, 2)); + err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr3, + ARM64_SYS_REG(3, 0, 0, 7, 3)); /* * Note that if AArch32 support is not present in the host, diff --git a/target/arm/machine.c b/target/arm/machine.c index b2b39b2475..0a722ca7e7 100644 --- a/target/arm/machine.c +++ b/target/arm/machine.c @@ -242,6 +242,25 @@ static const VMStateDescription vmstate_irq_line_state = { } }; +static bool wfxt_timer_needed(void *opaque) +{ + ARMCPU *cpu = opaque; + + /* We'll only have the timer object if FEAT_WFxT is implemented */ + return cpu->wfxt_timer; +} + +static const VMStateDescription vmstate_wfxt_timer = { + .name = "cpu/wfxt-timer", + .version_id = 1, + .minimum_version_id = 1, + .needed = wfxt_timer_needed, + .fields = (const VMStateField[]) { + VMSTATE_TIMER_PTR(wfxt_timer, ARMCPU), + VMSTATE_END_OF_LIST() + } +}; + static bool m_needed(void *opaque) { ARMCPU *cpu = opaque; @@ -957,6 +976,7 @@ const VMStateDescription vmstate_arm_cpu = { #endif &vmstate_serror, &vmstate_irq_line_state, + &vmstate_wfxt_timer, NULL } }; diff --git a/target/arm/ptw.c b/target/arm/ptw.c index 31ae43f60e..4476b32ff5 100644 --- a/target/arm/ptw.c +++ b/target/arm/ptw.c @@ -11,6 +11,7 @@ #include "qemu/range.h" #include "qemu/main-loop.h" #include "exec/exec-all.h" +#include "exec/page-protection.h" #include "cpu.h" #include "internals.h" #include "cpu-features.h" diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode index 0e7656fd15..2b7a3254a0 100644 --- a/target/arm/tcg/a64.decode +++ b/target/arm/tcg/a64.decode @@ -19,11 +19,58 @@ # This file is processed by scripts/decodetree.py # -&r rn -&ri rd imm -&rri_sf rd rn imm sf -&i imm - +%rd 0:5 +%esz_sd 22:1 !function=plus_2 +%esz_hsd 22:2 !function=xor_2 +%hl 11:1 21:1 +%hlm 11:1 20:2 + +&r rn +&ri rd imm +&rri_sf rd rn imm sf +&i imm +&rr_e rd rn esz +&rrr_e rd rn rm esz +&rrx_e rd rn rm idx esz +&rrrr_e rd rn rm ra esz +&qrr_e q rd rn esz +&qrrr_e q rd rn rm esz +&qrrx_e q rd rn rm idx esz +&qrrrr_e q rd rn rm ra esz + +@rr_h ........ ... ..... ...... rn:5 rd:5 &rr_e esz=1 +@rr_d ........ ... ..... ...... rn:5 rd:5 &rr_e esz=3 +@rr_sd ........ ... ..... ...... rn:5 rd:5 &rr_e esz=%esz_sd + +@rrr_h ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=1 +@rrr_d ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=3 +@rrr_sd ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=%esz_sd +@rrr_hsd ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=%esz_hsd +@rrr_e ........ esz:2 . rm:5 ...... rn:5 rd:5 &rrr_e +@r2r_e ........ esz:2 . ..... ...... rm:5 rd:5 &rrr_e rn=%rd + +@rrx_h ........ .. .. rm:4 .... . . rn:5 rd:5 &rrx_e esz=1 idx=%hlm +@rrx_s ........ .. . rm:5 .... . . rn:5 rd:5 &rrx_e esz=2 idx=%hl +@rrx_d ........ .. . rm:5 .... idx:1 . rn:5 rd:5 &rrx_e esz=3 + +@rr_q1e0 ........ ........ ...... rn:5 rd:5 &qrr_e q=1 esz=0 +@r2r_q1e0 ........ ........ ...... rm:5 rd:5 &qrrr_e rn=%rd q=1 esz=0 +@rrr_q1e0 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=0 +@rrr_q1e3 ........ ... rm:5 ...... rn:5 rd:5 &qrrr_e q=1 esz=3 +@rrrr_q1e3 ........ ... rm:5 . ra:5 rn:5 rd:5 &qrrrr_e q=1 esz=3 + +@qrrr_b . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=0 +@qrrr_h . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=1 +@qrrr_sd . q:1 ...... ... rm:5 ...... rn:5 rd:5 &qrrr_e esz=%esz_sd +@qrrr_e . q:1 ...... esz:2 . rm:5 ...... rn:5 rd:5 &qrrr_e +@qr2r_e . q:1 ...... esz:2 . ..... ...... rm:5 rd:5 &qrrr_e rn=%rd + +@qrrx_h . q:1 .. .... .. .. rm:4 .... . . rn:5 rd:5 \ + &qrrx_e esz=1 idx=%hlm +@qrrx_s . q:1 .. .... .. . rm:5 .... . . rn:5 rd:5 \ + &qrrx_e esz=2 idx=%hl +@qrrx_d . q:1 .. .... .. . rm:5 .... idx:1 . rn:5 rd:5 \ + &qrrx_e esz=3 ### Data Processing - Immediate @@ -183,6 +230,10 @@ ERETA 1101011 0100 11111 00001 m:1 11111 11111 &reta # ERETAA, ERETAB NOP 1101 0101 0000 0011 0010 ---- --- 11111 } +# System instructions with register argument +WFET 1101 0101 0000 0011 0001 0000 000 rd:5 +WFIT 1101 0101 0000 0011 0001 0000 001 rd:5 + # Barriers CLREX 1101 0101 0000 0011 0011 ---- 010 11111 @@ -590,3 +641,378 @@ CPYFE 00 011 0 01100 ..... .... 01 ..... ..... @cpy CPYP 00 011 1 01000 ..... .... 01 ..... ..... @cpy CPYM 00 011 1 01010 ..... .... 01 ..... ..... @cpy CPYE 00 011 1 01100 ..... .... 01 ..... ..... @cpy + +### Cryptographic AES + +AESE 01001110 00 10100 00100 10 ..... ..... @r2r_q1e0 +AESD 01001110 00 10100 00101 10 ..... ..... @r2r_q1e0 +AESMC 01001110 00 10100 00110 10 ..... ..... @rr_q1e0 +AESIMC 01001110 00 10100 00111 10 ..... ..... @rr_q1e0 + +### Cryptographic three-register SHA + +SHA1C 0101 1110 000 ..... 000000 ..... ..... @rrr_q1e0 +SHA1P 0101 1110 000 ..... 000100 ..... ..... @rrr_q1e0 +SHA1M 0101 1110 000 ..... 001000 ..... ..... @rrr_q1e0 +SHA1SU0 0101 1110 000 ..... 001100 ..... ..... @rrr_q1e0 +SHA256H 0101 1110 000 ..... 010000 ..... ..... @rrr_q1e0 +SHA256H2 0101 1110 000 ..... 010100 ..... ..... @rrr_q1e0 +SHA256SU1 0101 1110 000 ..... 011000 ..... ..... @rrr_q1e0 + +### Cryptographic two-register SHA + +SHA1H 0101 1110 0010 1000 0000 10 ..... ..... @rr_q1e0 +SHA1SU1 0101 1110 0010 1000 0001 10 ..... ..... @rr_q1e0 +SHA256SU0 0101 1110 0010 1000 0010 10 ..... ..... @rr_q1e0 + +### Cryptographic three-register SHA512 + +SHA512H 1100 1110 011 ..... 100000 ..... ..... @rrr_q1e0 +SHA512H2 1100 1110 011 ..... 100001 ..... ..... @rrr_q1e0 +SHA512SU1 1100 1110 011 ..... 100010 ..... ..... @rrr_q1e0 +RAX1 1100 1110 011 ..... 100011 ..... ..... @rrr_q1e3 +SM3PARTW1 1100 1110 011 ..... 110000 ..... ..... @rrr_q1e0 +SM3PARTW2 1100 1110 011 ..... 110001 ..... ..... @rrr_q1e0 +SM4EKEY 1100 1110 011 ..... 110010 ..... ..... @rrr_q1e0 + +### Cryptographic two-register SHA512 + +SHA512SU0 1100 1110 110 00000 100000 ..... ..... @rr_q1e0 +SM4E 1100 1110 110 00000 100001 ..... ..... @r2r_q1e0 + +### Cryptographic four-register + +EOR3 1100 1110 000 ..... 0 ..... ..... ..... @rrrr_q1e3 +BCAX 1100 1110 001 ..... 0 ..... ..... ..... @rrrr_q1e3 +SM3SS1 1100 1110 010 ..... 0 ..... ..... ..... @rrrr_q1e3 + +### Cryptographic three-register, imm2 + +&crypto3i rd rn rm imm +@crypto3i ........ ... rm:5 .. imm:2 .. rn:5 rd:5 &crypto3i + +SM3TT1A 11001110 010 ..... 10 .. 00 ..... ..... @crypto3i +SM3TT1B 11001110 010 ..... 10 .. 01 ..... ..... @crypto3i +SM3TT2A 11001110 010 ..... 10 .. 10 ..... ..... @crypto3i +SM3TT2B 11001110 010 ..... 10 .. 11 ..... ..... @crypto3i + +### Cryptographic XAR + +XAR 1100 1110 100 rm:5 imm:6 rn:5 rd:5 + +### Advanced SIMD scalar copy + +DUP_element_s 0101 1110 000 imm:5 0 0000 1 rn:5 rd:5 + +### Advanced SIMD copy + +DUP_element_v 0 q:1 00 1110 000 imm:5 0 0000 1 rn:5 rd:5 +DUP_general 0 q:1 00 1110 000 imm:5 0 0001 1 rn:5 rd:5 +INS_general 0 1 00 1110 000 imm:5 0 0011 1 rn:5 rd:5 +SMOV 0 q:1 00 1110 000 imm:5 0 0101 1 rn:5 rd:5 +UMOV 0 q:1 00 1110 000 imm:5 0 0111 1 rn:5 rd:5 +INS_element 0 1 10 1110 000 di:5 0 si:4 1 rn:5 rd:5 + +### Advanced SIMD scalar three same + +FADD_s 0001 1110 ..1 ..... 0010 10 ..... ..... @rrr_hsd +FSUB_s 0001 1110 ..1 ..... 0011 10 ..... ..... @rrr_hsd +FDIV_s 0001 1110 ..1 ..... 0001 10 ..... ..... @rrr_hsd +FMUL_s 0001 1110 ..1 ..... 0000 10 ..... ..... @rrr_hsd +FNMUL_s 0001 1110 ..1 ..... 1000 10 ..... ..... @rrr_hsd + +FMAX_s 0001 1110 ..1 ..... 0100 10 ..... ..... @rrr_hsd +FMIN_s 0001 1110 ..1 ..... 0101 10 ..... ..... @rrr_hsd +FMAXNM_s 0001 1110 ..1 ..... 0110 10 ..... ..... @rrr_hsd +FMINNM_s 0001 1110 ..1 ..... 0111 10 ..... ..... @rrr_hsd + +FMULX_s 0101 1110 010 ..... 00011 1 ..... ..... @rrr_h +FMULX_s 0101 1110 0.1 ..... 11011 1 ..... ..... @rrr_sd + +FCMEQ_s 0101 1110 010 ..... 00100 1 ..... ..... @rrr_h +FCMEQ_s 0101 1110 0.1 ..... 11100 1 ..... ..... @rrr_sd + +FCMGE_s 0111 1110 010 ..... 00100 1 ..... ..... @rrr_h +FCMGE_s 0111 1110 0.1 ..... 11100 1 ..... ..... @rrr_sd + +FCMGT_s 0111 1110 110 ..... 00100 1 ..... ..... @rrr_h +FCMGT_s 0111 1110 1.1 ..... 11100 1 ..... ..... @rrr_sd + +FACGE_s 0111 1110 010 ..... 00101 1 ..... ..... @rrr_h +FACGE_s 0111 1110 0.1 ..... 11101 1 ..... ..... @rrr_sd + +FACGT_s 0111 1110 110 ..... 00101 1 ..... ..... @rrr_h +FACGT_s 0111 1110 1.1 ..... 11101 1 ..... ..... @rrr_sd + +FABD_s 0111 1110 110 ..... 00010 1 ..... ..... @rrr_h +FABD_s 0111 1110 1.1 ..... 11010 1 ..... ..... @rrr_sd + +FRECPS_s 0101 1110 010 ..... 00111 1 ..... ..... @rrr_h +FRECPS_s 0101 1110 0.1 ..... 11111 1 ..... ..... @rrr_sd + +FRSQRTS_s 0101 1110 110 ..... 00111 1 ..... ..... @rrr_h +FRSQRTS_s 0101 1110 1.1 ..... 11111 1 ..... ..... @rrr_sd + +SQADD_s 0101 1110 ..1 ..... 00001 1 ..... ..... @rrr_e +UQADD_s 0111 1110 ..1 ..... 00001 1 ..... ..... @rrr_e +SQSUB_s 0101 1110 ..1 ..... 00101 1 ..... ..... @rrr_e +UQSUB_s 0111 1110 ..1 ..... 00101 1 ..... ..... @rrr_e + +SUQADD_s 0101 1110 ..1 00000 00111 0 ..... ..... @r2r_e +USQADD_s 0111 1110 ..1 00000 00111 0 ..... ..... @r2r_e + +SSHL_s 0101 1110 111 ..... 01000 1 ..... ..... @rrr_d +USHL_s 0111 1110 111 ..... 01000 1 ..... ..... @rrr_d +SRSHL_s 0101 1110 111 ..... 01010 1 ..... ..... @rrr_d +URSHL_s 0111 1110 111 ..... 01010 1 ..... ..... @rrr_d +SQSHL_s 0101 1110 ..1 ..... 01001 1 ..... ..... @rrr_e +UQSHL_s 0111 1110 ..1 ..... 01001 1 ..... ..... @rrr_e +SQRSHL_s 0101 1110 ..1 ..... 01011 1 ..... ..... @rrr_e +UQRSHL_s 0111 1110 ..1 ..... 01011 1 ..... ..... @rrr_e + +ADD_s 0101 1110 111 ..... 10000 1 ..... ..... @rrr_d +SUB_s 0111 1110 111 ..... 10000 1 ..... ..... @rrr_d +CMGT_s 0101 1110 111 ..... 00110 1 ..... ..... @rrr_d +CMHI_s 0111 1110 111 ..... 00110 1 ..... ..... @rrr_d +CMGE_s 0101 1110 111 ..... 00111 1 ..... ..... @rrr_d +CMHS_s 0111 1110 111 ..... 00111 1 ..... ..... @rrr_d +CMTST_s 0101 1110 111 ..... 10001 1 ..... ..... @rrr_d +CMEQ_s 0111 1110 111 ..... 10001 1 ..... ..... @rrr_d + +SQDMULH_s 0101 1110 ..1 ..... 10110 1 ..... ..... @rrr_e +SQRDMULH_s 0111 1110 ..1 ..... 10110 1 ..... ..... @rrr_e + +### Advanced SIMD scalar pairwise + +FADDP_s 0101 1110 0011 0000 1101 10 ..... ..... @rr_h +FADDP_s 0111 1110 0.11 0000 1101 10 ..... ..... @rr_sd + +FMAXP_s 0101 1110 0011 0000 1111 10 ..... ..... @rr_h +FMAXP_s 0111 1110 0.11 0000 1111 10 ..... ..... @rr_sd + +FMINP_s 0101 1110 1011 0000 1111 10 ..... ..... @rr_h +FMINP_s 0111 1110 1.11 0000 1111 10 ..... ..... @rr_sd + +FMAXNMP_s 0101 1110 0011 0000 1100 10 ..... ..... @rr_h +FMAXNMP_s 0111 1110 0.11 0000 1100 10 ..... ..... @rr_sd + +FMINNMP_s 0101 1110 1011 0000 1100 10 ..... ..... @rr_h +FMINNMP_s 0111 1110 1.11 0000 1100 10 ..... ..... @rr_sd + +ADDP_s 0101 1110 1111 0001 1011 10 ..... ..... @rr_d + +### Advanced SIMD three same + +FADD_v 0.00 1110 010 ..... 00010 1 ..... ..... @qrrr_h +FADD_v 0.00 1110 0.1 ..... 11010 1 ..... ..... @qrrr_sd + +FSUB_v 0.00 1110 110 ..... 00010 1 ..... ..... @qrrr_h +FSUB_v 0.00 1110 1.1 ..... 11010 1 ..... ..... @qrrr_sd + +FDIV_v 0.10 1110 010 ..... 00111 1 ..... ..... @qrrr_h +FDIV_v 0.10 1110 0.1 ..... 11111 1 ..... ..... @qrrr_sd + +FMUL_v 0.10 1110 010 ..... 00011 1 ..... ..... @qrrr_h +FMUL_v 0.10 1110 0.1 ..... 11011 1 ..... ..... @qrrr_sd + +FMAX_v 0.00 1110 010 ..... 00110 1 ..... ..... @qrrr_h +FMAX_v 0.00 1110 0.1 ..... 11110 1 ..... ..... @qrrr_sd + +FMIN_v 0.00 1110 110 ..... 00110 1 ..... ..... @qrrr_h +FMIN_v 0.00 1110 1.1 ..... 11110 1 ..... ..... @qrrr_sd + +FMAXNM_v 0.00 1110 010 ..... 00000 1 ..... ..... @qrrr_h +FMAXNM_v 0.00 1110 0.1 ..... 11000 1 ..... ..... @qrrr_sd + +FMINNM_v 0.00 1110 110 ..... 00000 1 ..... ..... @qrrr_h +FMINNM_v 0.00 1110 1.1 ..... 11000 1 ..... ..... @qrrr_sd + +FMULX_v 0.00 1110 010 ..... 00011 1 ..... ..... @qrrr_h +FMULX_v 0.00 1110 0.1 ..... 11011 1 ..... ..... @qrrr_sd + +FMLA_v 0.00 1110 010 ..... 00001 1 ..... ..... @qrrr_h +FMLA_v 0.00 1110 0.1 ..... 11001 1 ..... ..... @qrrr_sd + +FMLS_v 0.00 1110 110 ..... 00001 1 ..... ..... @qrrr_h +FMLS_v 0.00 1110 1.1 ..... 11001 1 ..... ..... @qrrr_sd + +FMLAL_v 0.00 1110 001 ..... 11101 1 ..... ..... @qrrr_h +FMLSL_v 0.00 1110 101 ..... 11101 1 ..... ..... @qrrr_h +FMLAL2_v 0.10 1110 001 ..... 11001 1 ..... ..... @qrrr_h +FMLSL2_v 0.10 1110 101 ..... 11001 1 ..... ..... @qrrr_h + +FCMEQ_v 0.00 1110 010 ..... 00100 1 ..... ..... @qrrr_h +FCMEQ_v 0.00 1110 0.1 ..... 11100 1 ..... ..... @qrrr_sd + +FCMGE_v 0.10 1110 010 ..... 00100 1 ..... ..... @qrrr_h +FCMGE_v 0.10 1110 0.1 ..... 11100 1 ..... ..... @qrrr_sd + +FCMGT_v 0.10 1110 110 ..... 00100 1 ..... ..... @qrrr_h +FCMGT_v 0.10 1110 1.1 ..... 11100 1 ..... ..... @qrrr_sd + +FACGE_v 0.10 1110 010 ..... 00101 1 ..... ..... @qrrr_h +FACGE_v 0.10 1110 0.1 ..... 11101 1 ..... ..... @qrrr_sd + +FACGT_v 0.10 1110 110 ..... 00101 1 ..... ..... @qrrr_h +FACGT_v 0.10 1110 1.1 ..... 11101 1 ..... ..... @qrrr_sd + +FABD_v 0.10 1110 110 ..... 00010 1 ..... ..... @qrrr_h +FABD_v 0.10 1110 1.1 ..... 11010 1 ..... ..... @qrrr_sd + +FRECPS_v 0.00 1110 010 ..... 00111 1 ..... ..... @qrrr_h +FRECPS_v 0.00 1110 0.1 ..... 11111 1 ..... ..... @qrrr_sd + +FRSQRTS_v 0.00 1110 110 ..... 00111 1 ..... ..... @qrrr_h +FRSQRTS_v 0.00 1110 1.1 ..... 11111 1 ..... ..... @qrrr_sd + +FADDP_v 0.10 1110 010 ..... 00010 1 ..... ..... @qrrr_h +FADDP_v 0.10 1110 0.1 ..... 11010 1 ..... ..... @qrrr_sd + +FMAXP_v 0.10 1110 010 ..... 00110 1 ..... ..... @qrrr_h +FMAXP_v 0.10 1110 0.1 ..... 11110 1 ..... ..... @qrrr_sd + +FMINP_v 0.10 1110 110 ..... 00110 1 ..... ..... @qrrr_h +FMINP_v 0.10 1110 1.1 ..... 11110 1 ..... ..... @qrrr_sd + +FMAXNMP_v 0.10 1110 010 ..... 00000 1 ..... ..... @qrrr_h +FMAXNMP_v 0.10 1110 0.1 ..... 11000 1 ..... ..... @qrrr_sd + +FMINNMP_v 0.10 1110 110 ..... 00000 1 ..... ..... @qrrr_h +FMINNMP_v 0.10 1110 1.1 ..... 11000 1 ..... ..... @qrrr_sd + +ADDP_v 0.00 1110 ..1 ..... 10111 1 ..... ..... @qrrr_e +SMAXP_v 0.00 1110 ..1 ..... 10100 1 ..... ..... @qrrr_e +SMINP_v 0.00 1110 ..1 ..... 10101 1 ..... ..... @qrrr_e +UMAXP_v 0.10 1110 ..1 ..... 10100 1 ..... ..... @qrrr_e +UMINP_v 0.10 1110 ..1 ..... 10101 1 ..... ..... @qrrr_e + +AND_v 0.00 1110 001 ..... 00011 1 ..... ..... @qrrr_b +BIC_v 0.00 1110 011 ..... 00011 1 ..... ..... @qrrr_b +ORR_v 0.00 1110 101 ..... 00011 1 ..... ..... @qrrr_b +ORN_v 0.00 1110 111 ..... 00011 1 ..... ..... @qrrr_b +EOR_v 0.10 1110 001 ..... 00011 1 ..... ..... @qrrr_b +BSL_v 0.10 1110 011 ..... 00011 1 ..... ..... @qrrr_b +BIT_v 0.10 1110 101 ..... 00011 1 ..... ..... @qrrr_b +BIF_v 0.10 1110 111 ..... 00011 1 ..... ..... @qrrr_b + +SQADD_v 0.00 1110 ..1 ..... 00001 1 ..... ..... @qrrr_e +UQADD_v 0.10 1110 ..1 ..... 00001 1 ..... ..... @qrrr_e +SQSUB_v 0.00 1110 ..1 ..... 00101 1 ..... ..... @qrrr_e +UQSUB_v 0.10 1110 ..1 ..... 00101 1 ..... ..... @qrrr_e + +SUQADD_v 0.00 1110 ..1 00000 00111 0 ..... ..... @qr2r_e +USQADD_v 0.10 1110 ..1 00000 00111 0 ..... ..... @qr2r_e + +SSHL_v 0.00 1110 ..1 ..... 01000 1 ..... ..... @qrrr_e +USHL_v 0.10 1110 ..1 ..... 01000 1 ..... ..... @qrrr_e +SRSHL_v 0.00 1110 ..1 ..... 01010 1 ..... ..... @qrrr_e +URSHL_v 0.10 1110 ..1 ..... 01010 1 ..... ..... @qrrr_e +SQSHL_v 0.00 1110 ..1 ..... 01001 1 ..... ..... @qrrr_e +UQSHL_v 0.10 1110 ..1 ..... 01001 1 ..... ..... @qrrr_e +SQRSHL_v 0.00 1110 ..1 ..... 01011 1 ..... ..... @qrrr_e +UQRSHL_v 0.10 1110 ..1 ..... 01011 1 ..... ..... @qrrr_e + +ADD_v 0.00 1110 ..1 ..... 10000 1 ..... ..... @qrrr_e +SUB_v 0.10 1110 ..1 ..... 10000 1 ..... ..... @qrrr_e +CMGT_v 0.00 1110 ..1 ..... 00110 1 ..... ..... @qrrr_e +CMHI_v 0.10 1110 ..1 ..... 00110 1 ..... ..... @qrrr_e +CMGE_v 0.00 1110 ..1 ..... 00111 1 ..... ..... @qrrr_e +CMHS_v 0.10 1110 ..1 ..... 00111 1 ..... ..... @qrrr_e +CMTST_v 0.00 1110 ..1 ..... 10001 1 ..... ..... @qrrr_e +CMEQ_v 0.10 1110 ..1 ..... 10001 1 ..... ..... @qrrr_e +SHADD_v 0.00 1110 ..1 ..... 00000 1 ..... ..... @qrrr_e +UHADD_v 0.10 1110 ..1 ..... 00000 1 ..... ..... @qrrr_e +SHSUB_v 0.00 1110 ..1 ..... 00100 1 ..... ..... @qrrr_e +UHSUB_v 0.10 1110 ..1 ..... 00100 1 ..... ..... @qrrr_e +SRHADD_v 0.00 1110 ..1 ..... 00010 1 ..... ..... @qrrr_e +URHADD_v 0.10 1110 ..1 ..... 00010 1 ..... ..... @qrrr_e +SMAX_v 0.00 1110 ..1 ..... 01100 1 ..... ..... @qrrr_e +UMAX_v 0.10 1110 ..1 ..... 01100 1 ..... ..... @qrrr_e +SMIN_v 0.00 1110 ..1 ..... 01101 1 ..... ..... @qrrr_e +UMIN_v 0.10 1110 ..1 ..... 01101 1 ..... ..... @qrrr_e +SABD_v 0.00 1110 ..1 ..... 01110 1 ..... ..... @qrrr_e +UABD_v 0.10 1110 ..1 ..... 01110 1 ..... ..... @qrrr_e +SABA_v 0.00 1110 ..1 ..... 01111 1 ..... ..... @qrrr_e +UABA_v 0.10 1110 ..1 ..... 01111 1 ..... ..... @qrrr_e +MUL_v 0.00 1110 ..1 ..... 10011 1 ..... ..... @qrrr_e +PMUL_v 0.10 1110 001 ..... 10011 1 ..... ..... @qrrr_b +MLA_v 0.00 1110 ..1 ..... 10010 1 ..... ..... @qrrr_e +MLS_v 0.10 1110 ..1 ..... 10010 1 ..... ..... @qrrr_e + +SQDMULH_v 0.00 1110 ..1 ..... 10110 1 ..... ..... @qrrr_e +SQRDMULH_v 0.10 1110 ..1 ..... 10110 1 ..... ..... @qrrr_e + +### Advanced SIMD scalar x indexed element + +FMUL_si 0101 1111 00 .. .... 1001 . 0 ..... ..... @rrx_h +FMUL_si 0101 1111 10 . ..... 1001 . 0 ..... ..... @rrx_s +FMUL_si 0101 1111 11 0 ..... 1001 . 0 ..... ..... @rrx_d + +FMLA_si 0101 1111 00 .. .... 0001 . 0 ..... ..... @rrx_h +FMLA_si 0101 1111 10 .. .... 0001 . 0 ..... ..... @rrx_s +FMLA_si 0101 1111 11 0. .... 0001 . 0 ..... ..... @rrx_d + +FMLS_si 0101 1111 00 .. .... 0101 . 0 ..... ..... @rrx_h +FMLS_si 0101 1111 10 .. .... 0101 . 0 ..... ..... @rrx_s +FMLS_si 0101 1111 11 0. .... 0101 . 0 ..... ..... @rrx_d + +FMULX_si 0111 1111 00 .. .... 1001 . 0 ..... ..... @rrx_h +FMULX_si 0111 1111 10 . ..... 1001 . 0 ..... ..... @rrx_s +FMULX_si 0111 1111 11 0 ..... 1001 . 0 ..... ..... @rrx_d + +SQDMULH_si 0101 1111 01 .. .... 1100 . 0 ..... ..... @rrx_h +SQDMULH_si 0101 1111 10 .. .... 1100 . 0 ..... ..... @rrx_s + +SQRDMULH_si 0101 1111 01 .. .... 1101 . 0 ..... ..... @rrx_h +SQRDMULH_si 0101 1111 10 . ..... 1101 . 0 ..... ..... @rrx_s + +### Advanced SIMD vector x indexed element + +FMUL_vi 0.00 1111 00 .. .... 1001 . 0 ..... ..... @qrrx_h +FMUL_vi 0.00 1111 10 . ..... 1001 . 0 ..... ..... @qrrx_s +FMUL_vi 0.00 1111 11 0 ..... 1001 . 0 ..... ..... @qrrx_d + +FMLA_vi 0.00 1111 00 .. .... 0001 . 0 ..... ..... @qrrx_h +FMLA_vi 0.00 1111 10 . ..... 0001 . 0 ..... ..... @qrrx_s +FMLA_vi 0.00 1111 11 0 ..... 0001 . 0 ..... ..... @qrrx_d + +FMLS_vi 0.00 1111 00 .. .... 0101 . 0 ..... ..... @qrrx_h +FMLS_vi 0.00 1111 10 . ..... 0101 . 0 ..... ..... @qrrx_s +FMLS_vi 0.00 1111 11 0 ..... 0101 . 0 ..... ..... @qrrx_d + +FMULX_vi 0.10 1111 00 .. .... 1001 . 0 ..... ..... @qrrx_h +FMULX_vi 0.10 1111 10 . ..... 1001 . 0 ..... ..... @qrrx_s +FMULX_vi 0.10 1111 11 0 ..... 1001 . 0 ..... ..... @qrrx_d + +FMLAL_vi 0.00 1111 10 .. .... 0000 . 0 ..... ..... @qrrx_h +FMLSL_vi 0.00 1111 10 .. .... 0100 . 0 ..... ..... @qrrx_h +FMLAL2_vi 0.10 1111 10 .. .... 1000 . 0 ..... ..... @qrrx_h +FMLSL2_vi 0.10 1111 10 .. .... 1100 . 0 ..... ..... @qrrx_h + +MUL_vi 0.00 1111 01 .. .... 1000 . 0 ..... ..... @qrrx_h +MUL_vi 0.00 1111 10 . ..... 1000 . 0 ..... ..... @qrrx_s + +MLA_vi 0.10 1111 01 .. .... 0000 . 0 ..... ..... @qrrx_h +MLA_vi 0.10 1111 10 . ..... 0000 . 0 ..... ..... @qrrx_s + +MLS_vi 0.10 1111 01 .. .... 0100 . 0 ..... ..... @qrrx_h +MLS_vi 0.10 1111 10 . ..... 0100 . 0 ..... ..... @qrrx_s + +SQDMULH_vi 0.00 1111 01 .. .... 1100 . 0 ..... ..... @qrrx_h +SQDMULH_vi 0.00 1111 10 . ..... 1100 . 0 ..... ..... @qrrx_s + +SQRDMULH_vi 0.00 1111 01 .. .... 1101 . 0 ..... ..... @qrrx_h +SQRDMULH_vi 0.00 1111 10 . ..... 1101 . 0 ..... ..... @qrrx_s + +# Floating-point conditional select + +FCSEL 0001 1110 .. 1 rm:5 cond:4 11 rn:5 rd:5 esz=%esz_hsd + +# Floating-point data-processing (3 source) + +@rrrr_hsd .... .... .. . rm:5 . ra:5 rn:5 rd:5 &rrrr_e esz=%esz_hsd + +FMADD 0001 1111 .. 0 ..... 0 ..... ..... ..... @rrrr_hsd +FMSUB 0001 1111 .. 0 ..... 1 ..... ..... ..... @rrrr_hsd +FNMADD 0001 1111 .. 1 ..... 0 ..... ..... ..... @rrrr_hsd +FNMSUB 0001 1111 .. 1 ..... 1 ..... ..... ..... @rrrr_hsd diff --git a/target/arm/tcg/cpu32.c b/target/arm/tcg/cpu32.c index de8f2be941..bdd82d912a 100644 --- a/target/arm/tcg/cpu32.c +++ b/target/arm/tcg/cpu32.c @@ -67,7 +67,7 @@ void aa32_max_features(ARMCPU *cpu) cpu->isar.id_mmfr4 = t; t = cpu->isar.id_mmfr5; - t = FIELD_DP32(t, ID_MMFR5, ETS, 1); /* FEAT_ETS */ + t = FIELD_DP32(t, ID_MMFR5, ETS, 2); /* FEAT_ETS2 */ cpu->isar.id_mmfr5 = t; t = cpu->isar.id_pfr0; @@ -457,6 +457,7 @@ static void cortex_a7_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_NEON); set_feature(&cpu->env, ARM_FEATURE_THUMB2EE); set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_BACKCOMPAT_CNTFRQ); set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); set_feature(&cpu->env, ARM_FEATURE_EL2); @@ -505,6 +506,7 @@ static void cortex_a15_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_NEON); set_feature(&cpu->env, ARM_FEATURE_THUMB2EE); set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_BACKCOMPAT_CNTFRQ); set_feature(&cpu->env, ARM_FEATURE_DUMMY_C15_REGS); set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); set_feature(&cpu->env, ARM_FEATURE_EL2); @@ -696,6 +698,7 @@ static void cortex_r52_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_PMSA); set_feature(&cpu->env, ARM_FEATURE_NEON); set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_BACKCOMPAT_CNTFRQ); set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); set_feature(&cpu->env, ARM_FEATURE_AUXCR); cpu->midr = 0x411fd133; /* r1p3 */ @@ -924,6 +927,7 @@ static void arm_max_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_V8); set_feature(&cpu->env, ARM_FEATURE_NEON); set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_BACKCOMPAT_CNTFRQ); set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); set_feature(&cpu->env, ARM_FEATURE_EL2); set_feature(&cpu->env, ARM_FEATURE_EL3); diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c index 62c4663512..0899251eef 100644 --- a/target/arm/tcg/cpu64.c +++ b/target/arm/tcg/cpu64.c @@ -63,6 +63,7 @@ static void aarch64_a35_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_V8); set_feature(&cpu->env, ARM_FEATURE_NEON); set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_BACKCOMPAT_CNTFRQ); set_feature(&cpu->env, ARM_FEATURE_AARCH64); set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); set_feature(&cpu->env, ARM_FEATURE_EL2); @@ -231,6 +232,7 @@ static void aarch64_a55_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_V8); set_feature(&cpu->env, ARM_FEATURE_NEON); set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_BACKCOMPAT_CNTFRQ); set_feature(&cpu->env, ARM_FEATURE_AARCH64); set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); set_feature(&cpu->env, ARM_FEATURE_EL2); @@ -299,6 +301,7 @@ static void aarch64_a72_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_V8); set_feature(&cpu->env, ARM_FEATURE_NEON); set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_BACKCOMPAT_CNTFRQ); set_feature(&cpu->env, ARM_FEATURE_AARCH64); set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); set_feature(&cpu->env, ARM_FEATURE_EL2); @@ -354,6 +357,7 @@ static void aarch64_a76_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_V8); set_feature(&cpu->env, ARM_FEATURE_NEON); set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_BACKCOMPAT_CNTFRQ); set_feature(&cpu->env, ARM_FEATURE_AARCH64); set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); set_feature(&cpu->env, ARM_FEATURE_EL2); @@ -423,6 +427,7 @@ static void aarch64_a64fx_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_V8); set_feature(&cpu->env, ARM_FEATURE_NEON); set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_BACKCOMPAT_CNTFRQ); set_feature(&cpu->env, ARM_FEATURE_AARCH64); set_feature(&cpu->env, ARM_FEATURE_EL2); set_feature(&cpu->env, ARM_FEATURE_EL3); @@ -592,6 +597,7 @@ static void aarch64_neoverse_n1_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_V8); set_feature(&cpu->env, ARM_FEATURE_NEON); set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_BACKCOMPAT_CNTFRQ); set_feature(&cpu->env, ARM_FEATURE_AARCH64); set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); set_feature(&cpu->env, ARM_FEATURE_EL2); @@ -663,6 +669,7 @@ static void aarch64_neoverse_v1_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_V8); set_feature(&cpu->env, ARM_FEATURE_NEON); set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_BACKCOMPAT_CNTFRQ); set_feature(&cpu->env, ARM_FEATURE_AARCH64); set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); set_feature(&cpu->env, ARM_FEATURE_EL2); @@ -885,6 +892,7 @@ static void aarch64_a710_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_V8); set_feature(&cpu->env, ARM_FEATURE_NEON); set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_BACKCOMPAT_CNTFRQ); set_feature(&cpu->env, ARM_FEATURE_AARCH64); set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); set_feature(&cpu->env, ARM_FEATURE_EL2); @@ -982,6 +990,7 @@ static void aarch64_neoverse_n2_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_V8); set_feature(&cpu->env, ARM_FEATURE_NEON); set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER); + set_feature(&cpu->env, ARM_FEATURE_BACKCOMPAT_CNTFRQ); set_feature(&cpu->env, ARM_FEATURE_AARCH64); set_feature(&cpu->env, ARM_FEATURE_CBAR_RO); set_feature(&cpu->env, ARM_FEATURE_EL2); @@ -1078,6 +1087,15 @@ void aarch64_max_tcg_initfn(Object *obj) uint32_t u; /* + * Unset ARM_FEATURE_BACKCOMPAT_CNTFRQ, which we would otherwise default + * to because we started with aarch64_a57_initfn(). A 'max' CPU might + * be a v8.6-or-later one, in which case the cntfrq must be 1GHz; and + * because it is our "may change" CPU type we are OK with it not being + * backwards-compatible with how it worked in old QEMU. + */ + unset_feature(&cpu->env, ARM_FEATURE_BACKCOMPAT_CNTFRQ); + + /* * Reset MIDR so the guest doesn't mistake our 'max' CPU type for a real * one and try to apply errata workarounds or use impdef features we * don't provide. @@ -1150,6 +1168,7 @@ void aarch64_max_tcg_initfn(Object *obj) t = cpu->isar.id_aa64isar2; t = FIELD_DP64(t, ID_AA64ISAR2, MOPS, 1); /* FEAT_MOPS */ t = FIELD_DP64(t, ID_AA64ISAR2, BC, 1); /* FEAT_HBC */ + t = FIELD_DP64(t, ID_AA64ISAR2, WFXT, 2); /* FEAT_WFxT */ cpu->isar.id_aa64isar2 = t; t = cpu->isar.id_aa64pfr0; @@ -1159,7 +1178,7 @@ void aarch64_max_tcg_initfn(Object *obj) t = FIELD_DP64(t, ID_AA64PFR0, SVE, 1); t = FIELD_DP64(t, ID_AA64PFR0, SEL2, 1); /* FEAT_SEL2 */ t = FIELD_DP64(t, ID_AA64PFR0, DIT, 1); /* FEAT_DIT */ - t = FIELD_DP64(t, ID_AA64PFR0, CSV2, 2); /* FEAT_CSV2_2 */ + t = FIELD_DP64(t, ID_AA64PFR0, CSV2, 3); /* FEAT_CSV2_3 */ t = FIELD_DP64(t, ID_AA64PFR0, CSV3, 1); /* FEAT_CSV3 */ cpu->isar.id_aa64pfr0 = t; @@ -1174,7 +1193,7 @@ void aarch64_max_tcg_initfn(Object *obj) t = FIELD_DP64(t, ID_AA64PFR1, MTE, 3); /* FEAT_MTE3 */ t = FIELD_DP64(t, ID_AA64PFR1, RAS_FRAC, 0); /* FEAT_RASv1p1 + FEAT_DoubleFault */ t = FIELD_DP64(t, ID_AA64PFR1, SME, 1); /* FEAT_SME */ - t = FIELD_DP64(t, ID_AA64PFR1, CSV2_FRAC, 0); /* FEAT_CSV2_2 */ + t = FIELD_DP64(t, ID_AA64PFR1, CSV2_FRAC, 0); /* FEAT_CSV2_3 */ t = FIELD_DP64(t, ID_AA64PFR1, NMI, 1); /* FEAT_NMI */ cpu->isar.id_aa64pfr1 = t; @@ -1196,7 +1215,7 @@ void aarch64_max_tcg_initfn(Object *obj) t = FIELD_DP64(t, ID_AA64MMFR1, LO, 1); /* FEAT_LOR */ t = FIELD_DP64(t, ID_AA64MMFR1, PAN, 3); /* FEAT_PAN3 */ t = FIELD_DP64(t, ID_AA64MMFR1, XNX, 1); /* FEAT_XNX */ - t = FIELD_DP64(t, ID_AA64MMFR1, ETS, 1); /* FEAT_ETS */ + t = FIELD_DP64(t, ID_AA64MMFR1, ETS, 2); /* FEAT_ETS2 */ t = FIELD_DP64(t, ID_AA64MMFR1, HCX, 1); /* FEAT_HCX */ t = FIELD_DP64(t, ID_AA64MMFR1, TIDCP1, 1); /* FEAT_TIDCP1 */ cpu->isar.id_aa64mmfr1 = t; @@ -1217,6 +1236,10 @@ void aarch64_max_tcg_initfn(Object *obj) t = FIELD_DP64(t, ID_AA64MMFR2, E0PD, 1); /* FEAT_E0PD */ cpu->isar.id_aa64mmfr2 = t; + t = cpu->isar.id_aa64mmfr3; + t = FIELD_DP64(t, ID_AA64MMFR3, SPEC_FPACC, 1); /* FEAT_FPACC_SPEC */ + cpu->isar.id_aa64mmfr3 = t; + t = cpu->isar.id_aa64zfr0; t = FIELD_DP64(t, ID_AA64ZFR0, SVEVER, 1); t = FIELD_DP64(t, ID_AA64ZFR0, AES, 2); /* FEAT_SVE_PMULL128 */ diff --git a/target/arm/tcg/gengvec.c b/target/arm/tcg/gengvec.c new file mode 100644 index 0000000000..56a1dc1f75 --- /dev/null +++ b/target/arm/tcg/gengvec.c @@ -0,0 +1,2315 @@ +/* + * ARM generic vector expansion + * + * Copyright (c) 2003 Fabrice Bellard + * Copyright (c) 2005-2007 CodeSourcery + * Copyright (c) 2007 OpenedHand, Ltd. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "translate.h" + + +static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, + uint32_t opr_sz, uint32_t max_sz, + gen_helper_gvec_3_ptr *fn) +{ + TCGv_ptr qc_ptr = tcg_temp_new_ptr(); + + tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); + tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc)); + tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr, + opr_sz, max_sz, 0, fn); +} + +void gen_gvec_sqdmulh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3_ptr * const fns[2] = { + gen_helper_neon_sqdmulh_h, gen_helper_neon_sqdmulh_s + }; + tcg_debug_assert(vece >= 1 && vece <= 2); + gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); +} + +void gen_gvec_sqrdmulh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3_ptr * const fns[2] = { + gen_helper_neon_sqrdmulh_h, gen_helper_neon_sqrdmulh_s + }; + tcg_debug_assert(vece >= 1 && vece <= 2); + gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); +} + +void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3_ptr * const fns[2] = { + gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32 + }; + tcg_debug_assert(vece >= 1 && vece <= 2); + gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); +} + +void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3_ptr * const fns[2] = { + gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32 + }; + tcg_debug_assert(vece >= 1 && vece <= 2); + gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); +} + +#define GEN_CMP0(NAME, COND) \ + void NAME(unsigned vece, uint32_t d, uint32_t m, \ + uint32_t opr_sz, uint32_t max_sz) \ + { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); } + +GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ) +GEN_CMP0(gen_gvec_cle0, TCG_COND_LE) +GEN_CMP0(gen_gvec_cge0, TCG_COND_GE) +GEN_CMP0(gen_gvec_clt0, TCG_COND_LT) +GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT) + +#undef GEN_CMP0 + +static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_vec_sar8i_i64(a, a, shift); + tcg_gen_vec_add8_i64(d, d, a); +} + +static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_vec_sar16i_i64(a, a, shift); + tcg_gen_vec_add16_i64(d, d, a); +} + +static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) +{ + tcg_gen_sari_i32(a, a, shift); + tcg_gen_add_i32(d, d, a); +} + +static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_sari_i64(a, a, shift); + tcg_gen_add_i64(d, d, a); +} + +static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + tcg_gen_sari_vec(vece, a, a, sh); + tcg_gen_add_vec(vece, d, d, a); +} + +void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_sari_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen2i ops[4] = { + { .fni8 = gen_ssra8_i64, + .fniv = gen_ssra_vec, + .fno = gen_helper_gvec_ssra_b, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = gen_ssra16_i64, + .fniv = gen_ssra_vec, + .fno = gen_helper_gvec_ssra_h, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_ssra32_i32, + .fniv = gen_ssra_vec, + .fno = gen_helper_gvec_ssra_s, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_ssra64_i64, + .fniv = gen_ssra_vec, + .fno = gen_helper_gvec_ssra_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_64 }, + }; + + /* tszimm encoding produces immediates in the range [1..esize]. */ + tcg_debug_assert(shift > 0); + tcg_debug_assert(shift <= (8 << vece)); + + /* + * Shifts larger than the element size are architecturally valid. + * Signed results in all sign bits. + */ + shift = MIN(shift, (8 << vece) - 1); + tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); +} + +static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_vec_shr8i_i64(a, a, shift); + tcg_gen_vec_add8_i64(d, d, a); +} + +static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_vec_shr16i_i64(a, a, shift); + tcg_gen_vec_add16_i64(d, d, a); +} + +static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) +{ + tcg_gen_shri_i32(a, a, shift); + tcg_gen_add_i32(d, d, a); +} + +static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_shri_i64(a, a, shift); + tcg_gen_add_i64(d, d, a); +} + +static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + tcg_gen_shri_vec(vece, a, a, sh); + tcg_gen_add_vec(vece, d, d, a); +} + +void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen2i ops[4] = { + { .fni8 = gen_usra8_i64, + .fniv = gen_usra_vec, + .fno = gen_helper_gvec_usra_b, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_8, }, + { .fni8 = gen_usra16_i64, + .fniv = gen_usra_vec, + .fno = gen_helper_gvec_usra_h, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_16, }, + { .fni4 = gen_usra32_i32, + .fniv = gen_usra_vec, + .fno = gen_helper_gvec_usra_s, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_32, }, + { .fni8 = gen_usra64_i64, + .fniv = gen_usra_vec, + .fno = gen_helper_gvec_usra_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_64, }, + }; + + /* tszimm encoding produces immediates in the range [1..esize]. */ + tcg_debug_assert(shift > 0); + tcg_debug_assert(shift <= (8 << vece)); + + /* + * Shifts larger than the element size are architecturally valid. + * Unsigned results in all zeros as input to accumulate: nop. + */ + if (shift < (8 << vece)) { + tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); + } else { + /* Nop, but we do need to clear the tail. */ + tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); + } +} + +/* + * Shift one less than the requested amount, and the low bit is + * the rounding bit. For the 8 and 16-bit operations, because we + * mask the low bit, we can perform a normal integer shift instead + * of a vector shift. + */ +static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_shri_i64(t, a, sh - 1); + tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); + tcg_gen_vec_sar8i_i64(d, a, sh); + tcg_gen_vec_add8_i64(d, d, t); +} + +static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_shri_i64(t, a, sh - 1); + tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); + tcg_gen_vec_sar16i_i64(d, a, sh); + tcg_gen_vec_add16_i64(d, d, t); +} + +void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) +{ + TCGv_i32 t; + + /* Handle shift by the input size for the benefit of trans_SRSHR_ri */ + if (sh == 32) { + tcg_gen_movi_i32(d, 0); + return; + } + t = tcg_temp_new_i32(); + tcg_gen_extract_i32(t, a, sh - 1, 1); + tcg_gen_sari_i32(d, a, sh); + tcg_gen_add_i32(d, d, t); +} + + void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_extract_i64(t, a, sh - 1, 1); + tcg_gen_sari_i64(d, a, sh); + tcg_gen_add_i64(d, d, t); +} + +static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + TCGv_vec ones = tcg_temp_new_vec_matching(d); + + tcg_gen_shri_vec(vece, t, a, sh - 1); + tcg_gen_dupi_vec(vece, ones, 1); + tcg_gen_and_vec(vece, t, t, ones); + tcg_gen_sari_vec(vece, d, a, sh); + tcg_gen_add_vec(vece, d, d, t); +} + +void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen2i ops[4] = { + { .fni8 = gen_srshr8_i64, + .fniv = gen_srshr_vec, + .fno = gen_helper_gvec_srshr_b, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = gen_srshr16_i64, + .fniv = gen_srshr_vec, + .fno = gen_helper_gvec_srshr_h, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_srshr32_i32, + .fniv = gen_srshr_vec, + .fno = gen_helper_gvec_srshr_s, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_srshr64_i64, + .fniv = gen_srshr_vec, + .fno = gen_helper_gvec_srshr_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + + /* tszimm encoding produces immediates in the range [1..esize] */ + tcg_debug_assert(shift > 0); + tcg_debug_assert(shift <= (8 << vece)); + + if (shift == (8 << vece)) { + /* + * Shifts larger than the element size are architecturally valid. + * Signed results in all sign bits. With rounding, this produces + * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. + * I.e. always zero. + */ + tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0); + } else { + tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); + } +} + +static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + gen_srshr8_i64(t, a, sh); + tcg_gen_vec_add8_i64(d, d, t); +} + +static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + gen_srshr16_i64(t, a, sh); + tcg_gen_vec_add16_i64(d, d, t); +} + +static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) +{ + TCGv_i32 t = tcg_temp_new_i32(); + + gen_srshr32_i32(t, a, sh); + tcg_gen_add_i32(d, d, t); +} + +static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + gen_srshr64_i64(t, a, sh); + tcg_gen_add_i64(d, d, t); +} + +static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + + gen_srshr_vec(vece, t, a, sh); + tcg_gen_add_vec(vece, d, d, t); +} + +void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen2i ops[4] = { + { .fni8 = gen_srsra8_i64, + .fniv = gen_srsra_vec, + .fno = gen_helper_gvec_srsra_b, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_8 }, + { .fni8 = gen_srsra16_i64, + .fniv = gen_srsra_vec, + .fno = gen_helper_gvec_srsra_h, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_16 }, + { .fni4 = gen_srsra32_i32, + .fniv = gen_srsra_vec, + .fno = gen_helper_gvec_srsra_s, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_32 }, + { .fni8 = gen_srsra64_i64, + .fniv = gen_srsra_vec, + .fno = gen_helper_gvec_srsra_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_64 }, + }; + + /* tszimm encoding produces immediates in the range [1..esize] */ + tcg_debug_assert(shift > 0); + tcg_debug_assert(shift <= (8 << vece)); + + /* + * Shifts larger than the element size are architecturally valid. + * Signed results in all sign bits. With rounding, this produces + * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. + * I.e. always zero. With accumulation, this leaves D unchanged. + */ + if (shift == (8 << vece)) { + /* Nop, but we do need to clear the tail. */ + tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); + } else { + tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); + } +} + +static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_shri_i64(t, a, sh - 1); + tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); + tcg_gen_vec_shr8i_i64(d, a, sh); + tcg_gen_vec_add8_i64(d, d, t); +} + +static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_shri_i64(t, a, sh - 1); + tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); + tcg_gen_vec_shr16i_i64(d, a, sh); + tcg_gen_vec_add16_i64(d, d, t); +} + +void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) +{ + TCGv_i32 t; + + /* Handle shift by the input size for the benefit of trans_URSHR_ri */ + if (sh == 32) { + tcg_gen_extract_i32(d, a, sh - 1, 1); + return; + } + t = tcg_temp_new_i32(); + tcg_gen_extract_i32(t, a, sh - 1, 1); + tcg_gen_shri_i32(d, a, sh); + tcg_gen_add_i32(d, d, t); +} + +void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_extract_i64(t, a, sh - 1, 1); + tcg_gen_shri_i64(d, a, sh); + tcg_gen_add_i64(d, d, t); +} + +static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + TCGv_vec ones = tcg_temp_new_vec_matching(d); + + tcg_gen_shri_vec(vece, t, a, shift - 1); + tcg_gen_dupi_vec(vece, ones, 1); + tcg_gen_and_vec(vece, t, t, ones); + tcg_gen_shri_vec(vece, d, a, shift); + tcg_gen_add_vec(vece, d, d, t); +} + +void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen2i ops[4] = { + { .fni8 = gen_urshr8_i64, + .fniv = gen_urshr_vec, + .fno = gen_helper_gvec_urshr_b, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = gen_urshr16_i64, + .fniv = gen_urshr_vec, + .fno = gen_helper_gvec_urshr_h, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_urshr32_i32, + .fniv = gen_urshr_vec, + .fno = gen_helper_gvec_urshr_s, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_urshr64_i64, + .fniv = gen_urshr_vec, + .fno = gen_helper_gvec_urshr_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + + /* tszimm encoding produces immediates in the range [1..esize] */ + tcg_debug_assert(shift > 0); + tcg_debug_assert(shift <= (8 << vece)); + + if (shift == (8 << vece)) { + /* + * Shifts larger than the element size are architecturally valid. + * Unsigned results in zero. With rounding, this produces a + * copy of the most significant bit. + */ + tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz); + } else { + tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); + } +} + +static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + if (sh == 8) { + tcg_gen_vec_shr8i_i64(t, a, 7); + } else { + gen_urshr8_i64(t, a, sh); + } + tcg_gen_vec_add8_i64(d, d, t); +} + +static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + if (sh == 16) { + tcg_gen_vec_shr16i_i64(t, a, 15); + } else { + gen_urshr16_i64(t, a, sh); + } + tcg_gen_vec_add16_i64(d, d, t); +} + +static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) +{ + TCGv_i32 t = tcg_temp_new_i32(); + + if (sh == 32) { + tcg_gen_shri_i32(t, a, 31); + } else { + gen_urshr32_i32(t, a, sh); + } + tcg_gen_add_i32(d, d, t); +} + +static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + if (sh == 64) { + tcg_gen_shri_i64(t, a, 63); + } else { + gen_urshr64_i64(t, a, sh); + } + tcg_gen_add_i64(d, d, t); +} + +static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + + if (sh == (8 << vece)) { + tcg_gen_shri_vec(vece, t, a, sh - 1); + } else { + gen_urshr_vec(vece, t, a, sh); + } + tcg_gen_add_vec(vece, d, d, t); +} + +void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen2i ops[4] = { + { .fni8 = gen_ursra8_i64, + .fniv = gen_ursra_vec, + .fno = gen_helper_gvec_ursra_b, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_8 }, + { .fni8 = gen_ursra16_i64, + .fniv = gen_ursra_vec, + .fno = gen_helper_gvec_ursra_h, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_16 }, + { .fni4 = gen_ursra32_i32, + .fniv = gen_ursra_vec, + .fno = gen_helper_gvec_ursra_s, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_32 }, + { .fni8 = gen_ursra64_i64, + .fniv = gen_ursra_vec, + .fno = gen_helper_gvec_ursra_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_64 }, + }; + + /* tszimm encoding produces immediates in the range [1..esize] */ + tcg_debug_assert(shift > 0); + tcg_debug_assert(shift <= (8 << vece)); + + tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); +} + +static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + uint64_t mask = dup_const(MO_8, 0xff >> shift); + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_shri_i64(t, a, shift); + tcg_gen_andi_i64(t, t, mask); + tcg_gen_andi_i64(d, d, ~mask); + tcg_gen_or_i64(d, d, t); +} + +static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + uint64_t mask = dup_const(MO_16, 0xffff >> shift); + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_shri_i64(t, a, shift); + tcg_gen_andi_i64(t, t, mask); + tcg_gen_andi_i64(d, d, ~mask); + tcg_gen_or_i64(d, d, t); +} + +static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) +{ + tcg_gen_shri_i32(a, a, shift); + tcg_gen_deposit_i32(d, d, a, 0, 32 - shift); +} + +static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_shri_i64(a, a, shift); + tcg_gen_deposit_i64(d, d, a, 0, 64 - shift); +} + +static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + TCGv_vec m = tcg_temp_new_vec_matching(d); + + tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh)); + tcg_gen_shri_vec(vece, t, a, sh); + tcg_gen_and_vec(vece, d, d, m); + tcg_gen_or_vec(vece, d, d, t); +} + +void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 }; + const GVecGen2i ops[4] = { + { .fni8 = gen_shr8_ins_i64, + .fniv = gen_shr_ins_vec, + .fno = gen_helper_gvec_sri_b, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = gen_shr16_ins_i64, + .fniv = gen_shr_ins_vec, + .fno = gen_helper_gvec_sri_h, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_shr32_ins_i32, + .fniv = gen_shr_ins_vec, + .fno = gen_helper_gvec_sri_s, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_shr64_ins_i64, + .fniv = gen_shr_ins_vec, + .fno = gen_helper_gvec_sri_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + + /* tszimm encoding produces immediates in the range [1..esize]. */ + tcg_debug_assert(shift > 0); + tcg_debug_assert(shift <= (8 << vece)); + + /* Shift of esize leaves destination unchanged. */ + if (shift < (8 << vece)) { + tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); + } else { + /* Nop, but we do need to clear the tail. */ + tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); + } +} + +static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + uint64_t mask = dup_const(MO_8, 0xff << shift); + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_shli_i64(t, a, shift); + tcg_gen_andi_i64(t, t, mask); + tcg_gen_andi_i64(d, d, ~mask); + tcg_gen_or_i64(d, d, t); +} + +static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + uint64_t mask = dup_const(MO_16, 0xffff << shift); + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_shli_i64(t, a, shift); + tcg_gen_andi_i64(t, t, mask); + tcg_gen_andi_i64(d, d, ~mask); + tcg_gen_or_i64(d, d, t); +} + +static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) +{ + tcg_gen_deposit_i32(d, d, a, shift, 32 - shift); +} + +static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_deposit_i64(d, d, a, shift, 64 - shift); +} + +static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + TCGv_vec m = tcg_temp_new_vec_matching(d); + + tcg_gen_shli_vec(vece, t, a, sh); + tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh)); + tcg_gen_and_vec(vece, d, d, m); + tcg_gen_or_vec(vece, d, d, t); +} + +void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 }; + const GVecGen2i ops[4] = { + { .fni8 = gen_shl8_ins_i64, + .fniv = gen_shl_ins_vec, + .fno = gen_helper_gvec_sli_b, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = gen_shl16_ins_i64, + .fniv = gen_shl_ins_vec, + .fno = gen_helper_gvec_sli_h, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_shl32_ins_i32, + .fniv = gen_shl_ins_vec, + .fno = gen_helper_gvec_sli_s, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_shl64_ins_i64, + .fniv = gen_shl_ins_vec, + .fno = gen_helper_gvec_sli_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + + /* tszimm encoding produces immediates in the range [0..esize-1]. */ + tcg_debug_assert(shift >= 0); + tcg_debug_assert(shift < (8 << vece)); + + if (shift == 0) { + tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz); + } else { + tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); + } +} + +static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + gen_helper_neon_mul_u8(a, a, b); + gen_helper_neon_add_u8(d, d, a); +} + +static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + gen_helper_neon_mul_u8(a, a, b); + gen_helper_neon_sub_u8(d, d, a); +} + +static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + gen_helper_neon_mul_u16(a, a, b); + gen_helper_neon_add_u16(d, d, a); +} + +static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + gen_helper_neon_mul_u16(a, a, b); + gen_helper_neon_sub_u16(d, d, a); +} + +static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + tcg_gen_mul_i32(a, a, b); + tcg_gen_add_i32(d, d, a); +} + +static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + tcg_gen_mul_i32(a, a, b); + tcg_gen_sub_i32(d, d, a); +} + +static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_mul_i64(a, a, b); + tcg_gen_add_i64(d, d, a); +} + +static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_mul_i64(a, a, b); + tcg_gen_sub_i64(d, d, a); +} + +static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + tcg_gen_mul_vec(vece, a, a, b); + tcg_gen_add_vec(vece, d, d, a); +} + +static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + tcg_gen_mul_vec(vece, a, a, b); + tcg_gen_sub_vec(vece, d, d, a); +} + +/* Note that while NEON does not support VMLA and VMLS as 64-bit ops, + * these tables are shared with AArch64 which does support them. + */ +void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_mul_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen3 ops[4] = { + { .fni4 = gen_mla8_i32, + .fniv = gen_mla_vec, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni4 = gen_mla16_i32, + .fniv = gen_mla_vec, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_mla32_i32, + .fniv = gen_mla_vec, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_mla64_i64, + .fniv = gen_mla_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} + +void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_mul_vec, INDEX_op_sub_vec, 0 + }; + static const GVecGen3 ops[4] = { + { .fni4 = gen_mls8_i32, + .fniv = gen_mls_vec, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni4 = gen_mls16_i32, + .fniv = gen_mls_vec, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_mls32_i32, + .fniv = gen_mls_vec, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_mls64_i64, + .fniv = gen_mls_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} + +/* CMTST : test is "if (X & Y != 0)". */ +static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + tcg_gen_negsetcond_i32(TCG_COND_TSTNE, d, a, b); +} + +void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_negsetcond_i64(TCG_COND_TSTNE, d, a, b); +} + +static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + tcg_gen_cmp_vec(TCG_COND_TSTNE, vece, d, a, b); +} + +void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 }; + static const GVecGen3 ops[4] = { + { .fni4 = gen_helper_neon_tst_u8, + .fniv = gen_cmtst_vec, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni4 = gen_helper_neon_tst_u16, + .fniv = gen_cmtst_vec, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_cmtst_i32, + .fniv = gen_cmtst_vec, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_cmtst_i64, + .fniv = gen_cmtst_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} + +void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) +{ + TCGv_i32 lval = tcg_temp_new_i32(); + TCGv_i32 rval = tcg_temp_new_i32(); + TCGv_i32 lsh = tcg_temp_new_i32(); + TCGv_i32 rsh = tcg_temp_new_i32(); + TCGv_i32 zero = tcg_constant_i32(0); + TCGv_i32 max = tcg_constant_i32(32); + + /* + * Rely on the TCG guarantee that out of range shifts produce + * unspecified results, not undefined behaviour (i.e. no trap). + * Discard out-of-range results after the fact. + */ + tcg_gen_ext8s_i32(lsh, shift); + tcg_gen_neg_i32(rsh, lsh); + tcg_gen_shl_i32(lval, src, lsh); + tcg_gen_shr_i32(rval, src, rsh); + tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero); + tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst); +} + +void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) +{ + TCGv_i64 lval = tcg_temp_new_i64(); + TCGv_i64 rval = tcg_temp_new_i64(); + TCGv_i64 lsh = tcg_temp_new_i64(); + TCGv_i64 rsh = tcg_temp_new_i64(); + TCGv_i64 zero = tcg_constant_i64(0); + TCGv_i64 max = tcg_constant_i64(64); + + /* + * Rely on the TCG guarantee that out of range shifts produce + * unspecified results, not undefined behaviour (i.e. no trap). + * Discard out-of-range results after the fact. + */ + tcg_gen_ext8s_i64(lsh, shift); + tcg_gen_neg_i64(rsh, lsh); + tcg_gen_shl_i64(lval, src, lsh); + tcg_gen_shr_i64(rval, src, rsh); + tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero); + tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst); +} + +static void gen_ushl_vec(unsigned vece, TCGv_vec dst, + TCGv_vec src, TCGv_vec shift) +{ + TCGv_vec lval = tcg_temp_new_vec_matching(dst); + TCGv_vec rval = tcg_temp_new_vec_matching(dst); + TCGv_vec lsh = tcg_temp_new_vec_matching(dst); + TCGv_vec rsh = tcg_temp_new_vec_matching(dst); + TCGv_vec msk, max; + + tcg_gen_neg_vec(vece, rsh, shift); + if (vece == MO_8) { + tcg_gen_mov_vec(lsh, shift); + } else { + msk = tcg_temp_new_vec_matching(dst); + tcg_gen_dupi_vec(vece, msk, 0xff); + tcg_gen_and_vec(vece, lsh, shift, msk); + tcg_gen_and_vec(vece, rsh, rsh, msk); + } + + /* + * Rely on the TCG guarantee that out of range shifts produce + * unspecified results, not undefined behaviour (i.e. no trap). + * Discard out-of-range results after the fact. + */ + tcg_gen_shlv_vec(vece, lval, src, lsh); + tcg_gen_shrv_vec(vece, rval, src, rsh); + + max = tcg_temp_new_vec_matching(dst); + tcg_gen_dupi_vec(vece, max, 8 << vece); + + /* + * The choice of LT (signed) and GEU (unsigned) are biased toward + * the instructions of the x86_64 host. For MO_8, the whole byte + * is significant so we must use an unsigned compare; otherwise we + * have already masked to a byte and so a signed compare works. + * Other tcg hosts have a full set of comparisons and do not care. + */ + if (vece == MO_8) { + tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max); + tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max); + tcg_gen_andc_vec(vece, lval, lval, lsh); + tcg_gen_andc_vec(vece, rval, rval, rsh); + } else { + tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max); + tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max); + tcg_gen_and_vec(vece, lval, lval, lsh); + tcg_gen_and_vec(vece, rval, rval, rsh); + } + tcg_gen_or_vec(vece, dst, lval, rval); +} + +void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_neg_vec, INDEX_op_shlv_vec, + INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0 + }; + static const GVecGen3 ops[4] = { + { .fniv = gen_ushl_vec, + .fno = gen_helper_gvec_ushl_b, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fniv = gen_ushl_vec, + .fno = gen_helper_gvec_ushl_h, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_ushl_i32, + .fniv = gen_ushl_vec, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_ushl_i64, + .fniv = gen_ushl_vec, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} + +void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) +{ + TCGv_i32 lval = tcg_temp_new_i32(); + TCGv_i32 rval = tcg_temp_new_i32(); + TCGv_i32 lsh = tcg_temp_new_i32(); + TCGv_i32 rsh = tcg_temp_new_i32(); + TCGv_i32 zero = tcg_constant_i32(0); + TCGv_i32 max = tcg_constant_i32(31); + + /* + * Rely on the TCG guarantee that out of range shifts produce + * unspecified results, not undefined behaviour (i.e. no trap). + * Discard out-of-range results after the fact. + */ + tcg_gen_ext8s_i32(lsh, shift); + tcg_gen_neg_i32(rsh, lsh); + tcg_gen_shl_i32(lval, src, lsh); + tcg_gen_umin_i32(rsh, rsh, max); + tcg_gen_sar_i32(rval, src, rsh); + tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero); + tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval); +} + +void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) +{ + TCGv_i64 lval = tcg_temp_new_i64(); + TCGv_i64 rval = tcg_temp_new_i64(); + TCGv_i64 lsh = tcg_temp_new_i64(); + TCGv_i64 rsh = tcg_temp_new_i64(); + TCGv_i64 zero = tcg_constant_i64(0); + TCGv_i64 max = tcg_constant_i64(63); + + /* + * Rely on the TCG guarantee that out of range shifts produce + * unspecified results, not undefined behaviour (i.e. no trap). + * Discard out-of-range results after the fact. + */ + tcg_gen_ext8s_i64(lsh, shift); + tcg_gen_neg_i64(rsh, lsh); + tcg_gen_shl_i64(lval, src, lsh); + tcg_gen_umin_i64(rsh, rsh, max); + tcg_gen_sar_i64(rval, src, rsh); + tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero); + tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval); +} + +static void gen_sshl_vec(unsigned vece, TCGv_vec dst, + TCGv_vec src, TCGv_vec shift) +{ + TCGv_vec lval = tcg_temp_new_vec_matching(dst); + TCGv_vec rval = tcg_temp_new_vec_matching(dst); + TCGv_vec lsh = tcg_temp_new_vec_matching(dst); + TCGv_vec rsh = tcg_temp_new_vec_matching(dst); + TCGv_vec tmp = tcg_temp_new_vec_matching(dst); + + /* + * Rely on the TCG guarantee that out of range shifts produce + * unspecified results, not undefined behaviour (i.e. no trap). + * Discard out-of-range results after the fact. + */ + tcg_gen_neg_vec(vece, rsh, shift); + if (vece == MO_8) { + tcg_gen_mov_vec(lsh, shift); + } else { + tcg_gen_dupi_vec(vece, tmp, 0xff); + tcg_gen_and_vec(vece, lsh, shift, tmp); + tcg_gen_and_vec(vece, rsh, rsh, tmp); + } + + /* Bound rsh so out of bound right shift gets -1. */ + tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1); + tcg_gen_umin_vec(vece, rsh, rsh, tmp); + tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp); + + tcg_gen_shlv_vec(vece, lval, src, lsh); + tcg_gen_sarv_vec(vece, rval, src, rsh); + + /* Select in-bound left shift. */ + tcg_gen_andc_vec(vece, lval, lval, tmp); + + /* Select between left and right shift. */ + if (vece == MO_8) { + tcg_gen_dupi_vec(vece, tmp, 0); + tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval); + } else { + tcg_gen_dupi_vec(vece, tmp, 0x80); + tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval); + } +} + +void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec, + INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0 + }; + static const GVecGen3 ops[4] = { + { .fniv = gen_sshl_vec, + .fno = gen_helper_gvec_sshl_b, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fniv = gen_sshl_vec, + .fno = gen_helper_gvec_sshl_h, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_sshl_i32, + .fniv = gen_sshl_vec, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_sshl_i64, + .fniv = gen_sshl_vec, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} + +void gen_gvec_srshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3 * const fns[] = { + gen_helper_gvec_srshl_b, gen_helper_gvec_srshl_h, + gen_helper_gvec_srshl_s, gen_helper_gvec_srshl_d, + }; + tcg_debug_assert(vece <= MO_64); + tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); +} + +void gen_gvec_urshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3 * const fns[] = { + gen_helper_gvec_urshl_b, gen_helper_gvec_urshl_h, + gen_helper_gvec_urshl_s, gen_helper_gvec_urshl_d, + }; + tcg_debug_assert(vece <= MO_64); + tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); +} + +void gen_neon_sqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3_ptr * const fns[] = { + gen_helper_neon_sqshl_b, gen_helper_neon_sqshl_h, + gen_helper_neon_sqshl_s, gen_helper_neon_sqshl_d, + }; + tcg_debug_assert(vece <= MO_64); + tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, + opr_sz, max_sz, 0, fns[vece]); +} + +void gen_neon_uqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3_ptr * const fns[] = { + gen_helper_neon_uqshl_b, gen_helper_neon_uqshl_h, + gen_helper_neon_uqshl_s, gen_helper_neon_uqshl_d, + }; + tcg_debug_assert(vece <= MO_64); + tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, + opr_sz, max_sz, 0, fns[vece]); +} + +void gen_neon_sqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3_ptr * const fns[] = { + gen_helper_neon_sqrshl_b, gen_helper_neon_sqrshl_h, + gen_helper_neon_sqrshl_s, gen_helper_neon_sqrshl_d, + }; + tcg_debug_assert(vece <= MO_64); + tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, + opr_sz, max_sz, 0, fns[vece]); +} + +void gen_neon_uqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3_ptr * const fns[] = { + gen_helper_neon_uqrshl_b, gen_helper_neon_uqrshl_h, + gen_helper_neon_uqrshl_s, gen_helper_neon_uqrshl_d, + }; + tcg_debug_assert(vece <= MO_64); + tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, tcg_env, + opr_sz, max_sz, 0, fns[vece]); +} + +void gen_uqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) +{ + uint64_t max = MAKE_64BIT_MASK(0, 8 << esz); + TCGv_i64 tmp = tcg_temp_new_i64(); + + tcg_gen_add_i64(tmp, a, b); + tcg_gen_umin_i64(res, tmp, tcg_constant_i64(max)); + tcg_gen_xor_i64(tmp, tmp, res); + tcg_gen_or_i64(qc, qc, tmp); +} + +void gen_uqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_add_i64(t, a, b); + tcg_gen_movcond_i64(TCG_COND_LTU, res, t, a, + tcg_constant_i64(UINT64_MAX), t); + tcg_gen_xor_i64(t, t, res); + tcg_gen_or_i64(qc, qc, t); +} + +static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, + TCGv_vec a, TCGv_vec b) +{ + TCGv_vec x = tcg_temp_new_vec_matching(t); + tcg_gen_add_vec(vece, x, a, b); + tcg_gen_usadd_vec(vece, t, a, b); + tcg_gen_xor_vec(vece, x, x, t); + tcg_gen_or_vec(vece, qc, qc, x); +} + +void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_usadd_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen4 ops[4] = { + { .fniv = gen_uqadd_vec, + .fno = gen_helper_gvec_uqadd_b, + .write_aofs = true, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fniv = gen_uqadd_vec, + .fno = gen_helper_gvec_uqadd_h, + .write_aofs = true, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fniv = gen_uqadd_vec, + .fno = gen_helper_gvec_uqadd_s, + .write_aofs = true, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fniv = gen_uqadd_vec, + .fni8 = gen_uqadd_d, + .fno = gen_helper_gvec_uqadd_d, + .write_aofs = true, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + + tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); + tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), + rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} + +void gen_sqadd_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) +{ + int64_t max = MAKE_64BIT_MASK(0, (8 << esz) - 1); + int64_t min = -1ll - max; + TCGv_i64 tmp = tcg_temp_new_i64(); + + tcg_gen_add_i64(tmp, a, b); + tcg_gen_smin_i64(res, tmp, tcg_constant_i64(max)); + tcg_gen_smax_i64(res, res, tcg_constant_i64(min)); + tcg_gen_xor_i64(tmp, tmp, res); + tcg_gen_or_i64(qc, qc, tmp); +} + +void gen_sqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i64 t2 = tcg_temp_new_i64(); + + tcg_gen_add_i64(t0, a, b); + + /* Compute signed overflow indication into T1 */ + tcg_gen_xor_i64(t1, a, b); + tcg_gen_xor_i64(t2, t0, a); + tcg_gen_andc_i64(t1, t2, t1); + + /* Compute saturated value into T2 */ + tcg_gen_sari_i64(t2, a, 63); + tcg_gen_xori_i64(t2, t2, INT64_MAX); + + tcg_gen_movcond_i64(TCG_COND_LT, res, t1, tcg_constant_i64(0), t2, t0); + tcg_gen_xor_i64(t0, t0, res); + tcg_gen_or_i64(qc, qc, t0); +} + +static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, + TCGv_vec a, TCGv_vec b) +{ + TCGv_vec x = tcg_temp_new_vec_matching(t); + tcg_gen_add_vec(vece, x, a, b); + tcg_gen_ssadd_vec(vece, t, a, b); + tcg_gen_xor_vec(vece, x, x, t); + tcg_gen_or_vec(vece, qc, qc, x); +} + +void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_ssadd_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen4 ops[4] = { + { .fniv = gen_sqadd_vec, + .fno = gen_helper_gvec_sqadd_b, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_8 }, + { .fniv = gen_sqadd_vec, + .fno = gen_helper_gvec_sqadd_h, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_16 }, + { .fniv = gen_sqadd_vec, + .fno = gen_helper_gvec_sqadd_s, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_32 }, + { .fniv = gen_sqadd_vec, + .fni8 = gen_sqadd_d, + .fno = gen_helper_gvec_sqadd_d, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_64 }, + }; + + tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); + tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), + rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} + +void gen_uqsub_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) +{ + TCGv_i64 tmp = tcg_temp_new_i64(); + + tcg_gen_sub_i64(tmp, a, b); + tcg_gen_smax_i64(res, tmp, tcg_constant_i64(0)); + tcg_gen_xor_i64(tmp, tmp, res); + tcg_gen_or_i64(qc, qc, tmp); +} + +void gen_uqsub_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_sub_i64(t, a, b); + tcg_gen_movcond_i64(TCG_COND_LTU, res, a, b, tcg_constant_i64(0), t); + tcg_gen_xor_i64(t, t, res); + tcg_gen_or_i64(qc, qc, t); +} + +static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, + TCGv_vec a, TCGv_vec b) +{ + TCGv_vec x = tcg_temp_new_vec_matching(t); + tcg_gen_sub_vec(vece, x, a, b); + tcg_gen_ussub_vec(vece, t, a, b); + tcg_gen_xor_vec(vece, x, x, t); + tcg_gen_or_vec(vece, qc, qc, x); +} + +void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_ussub_vec, INDEX_op_sub_vec, 0 + }; + static const GVecGen4 ops[4] = { + { .fniv = gen_uqsub_vec, + .fno = gen_helper_gvec_uqsub_b, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_8 }, + { .fniv = gen_uqsub_vec, + .fno = gen_helper_gvec_uqsub_h, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_16 }, + { .fniv = gen_uqsub_vec, + .fno = gen_helper_gvec_uqsub_s, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_32 }, + { .fniv = gen_uqsub_vec, + .fni8 = gen_uqsub_d, + .fno = gen_helper_gvec_uqsub_d, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_64 }, + }; + + tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); + tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), + rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} + +void gen_sqsub_bhs(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b, MemOp esz) +{ + int64_t max = MAKE_64BIT_MASK(0, (8 << esz) - 1); + int64_t min = -1ll - max; + TCGv_i64 tmp = tcg_temp_new_i64(); + + tcg_gen_sub_i64(tmp, a, b); + tcg_gen_smin_i64(res, tmp, tcg_constant_i64(max)); + tcg_gen_smax_i64(res, res, tcg_constant_i64(min)); + tcg_gen_xor_i64(tmp, tmp, res); + tcg_gen_or_i64(qc, qc, tmp); +} + +void gen_sqsub_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + TCGv_i64 t2 = tcg_temp_new_i64(); + + tcg_gen_sub_i64(t0, a, b); + + /* Compute signed overflow indication into T1 */ + tcg_gen_xor_i64(t1, a, b); + tcg_gen_xor_i64(t2, t0, a); + tcg_gen_and_i64(t1, t1, t2); + + /* Compute saturated value into T2 */ + tcg_gen_sari_i64(t2, a, 63); + tcg_gen_xori_i64(t2, t2, INT64_MAX); + + tcg_gen_movcond_i64(TCG_COND_LT, res, t1, tcg_constant_i64(0), t2, t0); + tcg_gen_xor_i64(t0, t0, res); + tcg_gen_or_i64(qc, qc, t0); +} + +static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, + TCGv_vec a, TCGv_vec b) +{ + TCGv_vec x = tcg_temp_new_vec_matching(t); + tcg_gen_sub_vec(vece, x, a, b); + tcg_gen_sssub_vec(vece, t, a, b); + tcg_gen_xor_vec(vece, x, x, t); + tcg_gen_or_vec(vece, qc, qc, x); +} + +void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_sssub_vec, INDEX_op_sub_vec, 0 + }; + static const GVecGen4 ops[4] = { + { .fniv = gen_sqsub_vec, + .fno = gen_helper_gvec_sqsub_b, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_8 }, + { .fniv = gen_sqsub_vec, + .fno = gen_helper_gvec_sqsub_h, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_16 }, + { .fniv = gen_sqsub_vec, + .fno = gen_helper_gvec_sqsub_s, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_32 }, + { .fniv = gen_sqsub_vec, + .fni8 = gen_sqsub_d, + .fno = gen_helper_gvec_sqsub_d, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_64 }, + }; + + tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); + tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), + rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} + +static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(); + + tcg_gen_sub_i32(t, a, b); + tcg_gen_sub_i32(d, b, a); + tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t); +} + +static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_sub_i64(t, a, b); + tcg_gen_sub_i64(d, b, a); + tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t); +} + +static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + + tcg_gen_smin_vec(vece, t, a, b); + tcg_gen_smax_vec(vece, d, a, b); + tcg_gen_sub_vec(vece, d, d, t); +} + +void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0 + }; + static const GVecGen3 ops[4] = { + { .fniv = gen_sabd_vec, + .fno = gen_helper_gvec_sabd_b, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fniv = gen_sabd_vec, + .fno = gen_helper_gvec_sabd_h, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_sabd_i32, + .fniv = gen_sabd_vec, + .fno = gen_helper_gvec_sabd_s, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_sabd_i64, + .fniv = gen_sabd_vec, + .fno = gen_helper_gvec_sabd_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} + +static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(); + + tcg_gen_sub_i32(t, a, b); + tcg_gen_sub_i32(d, b, a); + tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t); +} + +static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_sub_i64(t, a, b); + tcg_gen_sub_i64(d, b, a); + tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t); +} + +static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + + tcg_gen_umin_vec(vece, t, a, b); + tcg_gen_umax_vec(vece, d, a, b); + tcg_gen_sub_vec(vece, d, d, t); +} + +void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0 + }; + static const GVecGen3 ops[4] = { + { .fniv = gen_uabd_vec, + .fno = gen_helper_gvec_uabd_b, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fniv = gen_uabd_vec, + .fno = gen_helper_gvec_uabd_h, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_uabd_i32, + .fniv = gen_uabd_vec, + .fno = gen_helper_gvec_uabd_s, + .opt_opc = vecop_list, + .vece = MO_32 }, + { .fni8 = gen_uabd_i64, + .fniv = gen_uabd_vec, + .fno = gen_helper_gvec_uabd_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} + +static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(); + gen_sabd_i32(t, a, b); + tcg_gen_add_i32(d, d, t); +} + +static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + gen_sabd_i64(t, a, b); + tcg_gen_add_i64(d, d, t); +} + +static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + gen_sabd_vec(vece, t, a, b); + tcg_gen_add_vec(vece, d, d, t); +} + +void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_sub_vec, INDEX_op_add_vec, + INDEX_op_smin_vec, INDEX_op_smax_vec, 0 + }; + static const GVecGen3 ops[4] = { + { .fniv = gen_saba_vec, + .fno = gen_helper_gvec_saba_b, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_8 }, + { .fniv = gen_saba_vec, + .fno = gen_helper_gvec_saba_h, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_16 }, + { .fni4 = gen_saba_i32, + .fniv = gen_saba_vec, + .fno = gen_helper_gvec_saba_s, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_32 }, + { .fni8 = gen_saba_i64, + .fniv = gen_saba_vec, + .fno = gen_helper_gvec_saba_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} + +static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(); + gen_uabd_i32(t, a, b); + tcg_gen_add_i32(d, d, t); +} + +static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + gen_uabd_i64(t, a, b); + tcg_gen_add_i64(d, d, t); +} + +static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + gen_uabd_vec(vece, t, a, b); + tcg_gen_add_vec(vece, d, d, t); +} + +void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_sub_vec, INDEX_op_add_vec, + INDEX_op_umin_vec, INDEX_op_umax_vec, 0 + }; + static const GVecGen3 ops[4] = { + { .fniv = gen_uaba_vec, + .fno = gen_helper_gvec_uaba_b, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_8 }, + { .fniv = gen_uaba_vec, + .fno = gen_helper_gvec_uaba_h, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_16 }, + { .fni4 = gen_uaba_i32, + .fniv = gen_uaba_vec, + .fno = gen_helper_gvec_uaba_s, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_32 }, + { .fni8 = gen_uaba_i64, + .fniv = gen_uaba_vec, + .fno = gen_helper_gvec_uaba_d, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .opt_opc = vecop_list, + .load_dest = true, + .vece = MO_64 }, + }; + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} + +void gen_gvec_addp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3 * const fns[4] = { + gen_helper_gvec_addp_b, + gen_helper_gvec_addp_h, + gen_helper_gvec_addp_s, + gen_helper_gvec_addp_d, + }; + tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); +} + +void gen_gvec_smaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3 * const fns[4] = { + gen_helper_gvec_smaxp_b, + gen_helper_gvec_smaxp_h, + gen_helper_gvec_smaxp_s, + }; + tcg_debug_assert(vece <= MO_32); + tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); +} + +void gen_gvec_sminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3 * const fns[4] = { + gen_helper_gvec_sminp_b, + gen_helper_gvec_sminp_h, + gen_helper_gvec_sminp_s, + }; + tcg_debug_assert(vece <= MO_32); + tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); +} + +void gen_gvec_umaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3 * const fns[4] = { + gen_helper_gvec_umaxp_b, + gen_helper_gvec_umaxp_h, + gen_helper_gvec_umaxp_s, + }; + tcg_debug_assert(vece <= MO_32); + tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); +} + +void gen_gvec_uminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static gen_helper_gvec_3 * const fns[4] = { + gen_helper_gvec_uminp_b, + gen_helper_gvec_uminp_h, + gen_helper_gvec_uminp_s, + }; + tcg_debug_assert(vece <= MO_32); + tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, 0, fns[vece]); +} + +static void gen_shadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_and_i64(t, a, b); + tcg_gen_vec_sar8i_i64(a, a, 1); + tcg_gen_vec_sar8i_i64(b, b, 1); + tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); + tcg_gen_vec_add8_i64(d, a, b); + tcg_gen_vec_add8_i64(d, d, t); +} + +static void gen_shadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_and_i64(t, a, b); + tcg_gen_vec_sar16i_i64(a, a, 1); + tcg_gen_vec_sar16i_i64(b, b, 1); + tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); + tcg_gen_vec_add16_i64(d, a, b); + tcg_gen_vec_add16_i64(d, d, t); +} + +static void gen_shadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(); + + tcg_gen_and_i32(t, a, b); + tcg_gen_sari_i32(a, a, 1); + tcg_gen_sari_i32(b, b, 1); + tcg_gen_andi_i32(t, t, 1); + tcg_gen_add_i32(d, a, b); + tcg_gen_add_i32(d, d, t); +} + +static void gen_shadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + + tcg_gen_and_vec(vece, t, a, b); + tcg_gen_sari_vec(vece, a, a, 1); + tcg_gen_sari_vec(vece, b, b, 1); + tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); + tcg_gen_add_vec(vece, d, a, b); + tcg_gen_add_vec(vece, d, d, t); +} + +void gen_gvec_shadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_sari_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen3 g[] = { + { .fni8 = gen_shadd8_i64, + .fniv = gen_shadd_vec, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = gen_shadd16_i64, + .fniv = gen_shadd_vec, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_shadd_i32, + .fniv = gen_shadd_vec, + .opt_opc = vecop_list, + .vece = MO_32 }, + }; + tcg_debug_assert(vece <= MO_32); + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); +} + +static void gen_uhadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_and_i64(t, a, b); + tcg_gen_vec_shr8i_i64(a, a, 1); + tcg_gen_vec_shr8i_i64(b, b, 1); + tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); + tcg_gen_vec_add8_i64(d, a, b); + tcg_gen_vec_add8_i64(d, d, t); +} + +static void gen_uhadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_and_i64(t, a, b); + tcg_gen_vec_shr16i_i64(a, a, 1); + tcg_gen_vec_shr16i_i64(b, b, 1); + tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); + tcg_gen_vec_add16_i64(d, a, b); + tcg_gen_vec_add16_i64(d, d, t); +} + +static void gen_uhadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(); + + tcg_gen_and_i32(t, a, b); + tcg_gen_shri_i32(a, a, 1); + tcg_gen_shri_i32(b, b, 1); + tcg_gen_andi_i32(t, t, 1); + tcg_gen_add_i32(d, a, b); + tcg_gen_add_i32(d, d, t); +} + +static void gen_uhadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + + tcg_gen_and_vec(vece, t, a, b); + tcg_gen_shri_vec(vece, a, a, 1); + tcg_gen_shri_vec(vece, b, b, 1); + tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); + tcg_gen_add_vec(vece, d, a, b); + tcg_gen_add_vec(vece, d, d, t); +} + +void gen_gvec_uhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen3 g[] = { + { .fni8 = gen_uhadd8_i64, + .fniv = gen_uhadd_vec, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = gen_uhadd16_i64, + .fniv = gen_uhadd_vec, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_uhadd_i32, + .fniv = gen_uhadd_vec, + .opt_opc = vecop_list, + .vece = MO_32 }, + }; + tcg_debug_assert(vece <= MO_32); + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); +} + +static void gen_shsub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_andc_i64(t, b, a); + tcg_gen_vec_sar8i_i64(a, a, 1); + tcg_gen_vec_sar8i_i64(b, b, 1); + tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); + tcg_gen_vec_sub8_i64(d, a, b); + tcg_gen_vec_sub8_i64(d, d, t); +} + +static void gen_shsub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_andc_i64(t, b, a); + tcg_gen_vec_sar16i_i64(a, a, 1); + tcg_gen_vec_sar16i_i64(b, b, 1); + tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); + tcg_gen_vec_sub16_i64(d, a, b); + tcg_gen_vec_sub16_i64(d, d, t); +} + +static void gen_shsub_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(); + + tcg_gen_andc_i32(t, b, a); + tcg_gen_sari_i32(a, a, 1); + tcg_gen_sari_i32(b, b, 1); + tcg_gen_andi_i32(t, t, 1); + tcg_gen_sub_i32(d, a, b); + tcg_gen_sub_i32(d, d, t); +} + +static void gen_shsub_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + + tcg_gen_andc_vec(vece, t, b, a); + tcg_gen_sari_vec(vece, a, a, 1); + tcg_gen_sari_vec(vece, b, b, 1); + tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); + tcg_gen_sub_vec(vece, d, a, b); + tcg_gen_sub_vec(vece, d, d, t); +} + +void gen_gvec_shsub(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_sari_vec, INDEX_op_sub_vec, 0 + }; + static const GVecGen3 g[4] = { + { .fni8 = gen_shsub8_i64, + .fniv = gen_shsub_vec, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = gen_shsub16_i64, + .fniv = gen_shsub_vec, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_shsub_i32, + .fniv = gen_shsub_vec, + .opt_opc = vecop_list, + .vece = MO_32 }, + }; + assert(vece <= MO_32); + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); +} + +static void gen_uhsub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_andc_i64(t, b, a); + tcg_gen_vec_shr8i_i64(a, a, 1); + tcg_gen_vec_shr8i_i64(b, b, 1); + tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); + tcg_gen_vec_sub8_i64(d, a, b); + tcg_gen_vec_sub8_i64(d, d, t); +} + +static void gen_uhsub16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_andc_i64(t, b, a); + tcg_gen_vec_shr16i_i64(a, a, 1); + tcg_gen_vec_shr16i_i64(b, b, 1); + tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); + tcg_gen_vec_sub16_i64(d, a, b); + tcg_gen_vec_sub16_i64(d, d, t); +} + +static void gen_uhsub_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(); + + tcg_gen_andc_i32(t, b, a); + tcg_gen_shri_i32(a, a, 1); + tcg_gen_shri_i32(b, b, 1); + tcg_gen_andi_i32(t, t, 1); + tcg_gen_sub_i32(d, a, b); + tcg_gen_sub_i32(d, d, t); +} + +static void gen_uhsub_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + + tcg_gen_andc_vec(vece, t, b, a); + tcg_gen_shri_vec(vece, a, a, 1); + tcg_gen_shri_vec(vece, b, b, 1); + tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); + tcg_gen_sub_vec(vece, d, a, b); + tcg_gen_sub_vec(vece, d, d, t); +} + +void gen_gvec_uhsub(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_sub_vec, 0 + }; + static const GVecGen3 g[4] = { + { .fni8 = gen_uhsub8_i64, + .fniv = gen_uhsub_vec, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = gen_uhsub16_i64, + .fniv = gen_uhsub_vec, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_uhsub_i32, + .fniv = gen_uhsub_vec, + .opt_opc = vecop_list, + .vece = MO_32 }, + }; + assert(vece <= MO_32); + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); +} + +static void gen_srhadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_or_i64(t, a, b); + tcg_gen_vec_sar8i_i64(a, a, 1); + tcg_gen_vec_sar8i_i64(b, b, 1); + tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); + tcg_gen_vec_add8_i64(d, a, b); + tcg_gen_vec_add8_i64(d, d, t); +} + +static void gen_srhadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_or_i64(t, a, b); + tcg_gen_vec_sar16i_i64(a, a, 1); + tcg_gen_vec_sar16i_i64(b, b, 1); + tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); + tcg_gen_vec_add16_i64(d, a, b); + tcg_gen_vec_add16_i64(d, d, t); +} + +static void gen_srhadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(); + + tcg_gen_or_i32(t, a, b); + tcg_gen_sari_i32(a, a, 1); + tcg_gen_sari_i32(b, b, 1); + tcg_gen_andi_i32(t, t, 1); + tcg_gen_add_i32(d, a, b); + tcg_gen_add_i32(d, d, t); +} + +static void gen_srhadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + + tcg_gen_or_vec(vece, t, a, b); + tcg_gen_sari_vec(vece, a, a, 1); + tcg_gen_sari_vec(vece, b, b, 1); + tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); + tcg_gen_add_vec(vece, d, a, b); + tcg_gen_add_vec(vece, d, d, t); +} + +void gen_gvec_srhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_sari_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen3 g[] = { + { .fni8 = gen_srhadd8_i64, + .fniv = gen_srhadd_vec, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = gen_srhadd16_i64, + .fniv = gen_srhadd_vec, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_srhadd_i32, + .fniv = gen_srhadd_vec, + .opt_opc = vecop_list, + .vece = MO_32 }, + }; + assert(vece <= MO_32); + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); +} + +static void gen_urhadd8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_or_i64(t, a, b); + tcg_gen_vec_shr8i_i64(a, a, 1); + tcg_gen_vec_shr8i_i64(b, b, 1); + tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); + tcg_gen_vec_add8_i64(d, a, b); + tcg_gen_vec_add8_i64(d, d, t); +} + +static void gen_urhadd16_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_or_i64(t, a, b); + tcg_gen_vec_shr16i_i64(a, a, 1); + tcg_gen_vec_shr16i_i64(b, b, 1); + tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); + tcg_gen_vec_add16_i64(d, a, b); + tcg_gen_vec_add16_i64(d, d, t); +} + +static void gen_urhadd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + TCGv_i32 t = tcg_temp_new_i32(); + + tcg_gen_or_i32(t, a, b); + tcg_gen_shri_i32(a, a, 1); + tcg_gen_shri_i32(b, b, 1); + tcg_gen_andi_i32(t, t, 1); + tcg_gen_add_i32(d, a, b); + tcg_gen_add_i32(d, d, t); +} + +static void gen_urhadd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + + tcg_gen_or_vec(vece, t, a, b); + tcg_gen_shri_vec(vece, a, a, 1); + tcg_gen_shri_vec(vece, b, b, 1); + tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(d, vece, 1)); + tcg_gen_add_vec(vece, d, a, b); + tcg_gen_add_vec(vece, d, d, t); +} + +void gen_gvec_urhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_shri_vec, INDEX_op_add_vec, 0 + }; + static const GVecGen3 g[] = { + { .fni8 = gen_urhadd8_i64, + .fniv = gen_urhadd_vec, + .opt_opc = vecop_list, + .vece = MO_8 }, + { .fni8 = gen_urhadd16_i64, + .fniv = gen_urhadd_vec, + .opt_opc = vecop_list, + .vece = MO_16 }, + { .fni4 = gen_urhadd_i32, + .fniv = gen_urhadd_vec, + .opt_opc = vecop_list, + .vece = MO_32 }, + }; + assert(vece <= MO_32); + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &g[vece]); +} diff --git a/target/arm/tcg/gengvec64.c b/target/arm/tcg/gengvec64.c new file mode 100644 index 0000000000..2617cde0a5 --- /dev/null +++ b/target/arm/tcg/gengvec64.c @@ -0,0 +1,371 @@ +/* + * AArch64 generic vector expansion + * + * Copyright (c) 2013 Alexander Graf <agraf@suse.de> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "translate.h" +#include "translate-a64.h" + + +static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) +{ + tcg_gen_rotli_i64(d, m, 1); + tcg_gen_xor_i64(d, d, n); +} + +static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m) +{ + tcg_gen_rotli_vec(vece, d, m, 1); + tcg_gen_xor_vec(vece, d, d, n); +} + +void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 }; + static const GVecGen3 op = { + .fni8 = gen_rax1_i64, + .fniv = gen_rax1_vec, + .opt_opc = vecop_list, + .fno = gen_helper_crypto_rax1, + .vece = MO_64, + }; + tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op); +} + +static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) +{ + TCGv_i64 t = tcg_temp_new_i64(); + uint64_t mask = dup_const(MO_8, 0xff >> sh); + + tcg_gen_xor_i64(t, n, m); + tcg_gen_shri_i64(d, t, sh); + tcg_gen_shli_i64(t, t, 8 - sh); + tcg_gen_andi_i64(d, d, mask); + tcg_gen_andi_i64(t, t, ~mask); + tcg_gen_or_i64(d, d, t); +} + +static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) +{ + TCGv_i64 t = tcg_temp_new_i64(); + uint64_t mask = dup_const(MO_16, 0xffff >> sh); + + tcg_gen_xor_i64(t, n, m); + tcg_gen_shri_i64(d, t, sh); + tcg_gen_shli_i64(t, t, 16 - sh); + tcg_gen_andi_i64(d, d, mask); + tcg_gen_andi_i64(t, t, ~mask); + tcg_gen_or_i64(d, d, t); +} + +static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh) +{ + tcg_gen_xor_i32(d, n, m); + tcg_gen_rotri_i32(d, d, sh); +} + +static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) +{ + tcg_gen_xor_i64(d, n, m); + tcg_gen_rotri_i64(d, d, sh); +} + +static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n, + TCGv_vec m, int64_t sh) +{ + tcg_gen_xor_vec(vece, d, n, m); + tcg_gen_rotri_vec(vece, d, d, sh); +} + +void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, int64_t shift, + uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 }; + static const GVecGen3i ops[4] = { + { .fni8 = gen_xar8_i64, + .fniv = gen_xar_vec, + .fno = gen_helper_sve2_xar_b, + .opt_opc = vecop, + .vece = MO_8 }, + { .fni8 = gen_xar16_i64, + .fniv = gen_xar_vec, + .fno = gen_helper_sve2_xar_h, + .opt_opc = vecop, + .vece = MO_16 }, + { .fni4 = gen_xar_i32, + .fniv = gen_xar_vec, + .fno = gen_helper_sve2_xar_s, + .opt_opc = vecop, + .vece = MO_32 }, + { .fni8 = gen_xar_i64, + .fniv = gen_xar_vec, + .fno = gen_helper_gvec_xar_d, + .opt_opc = vecop, + .vece = MO_64 } + }; + int esize = 8 << vece; + + /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */ + tcg_debug_assert(shift >= 0); + tcg_debug_assert(shift <= esize); + shift &= esize - 1; + + if (shift == 0) { + /* xar with no rotate devolves to xor. */ + tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz); + } else { + tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, + shift, &ops[vece]); + } +} + +static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) +{ + tcg_gen_xor_i64(d, n, m); + tcg_gen_xor_i64(d, d, k); +} + +static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n, + TCGv_vec m, TCGv_vec k) +{ + tcg_gen_xor_vec(vece, d, n, m); + tcg_gen_xor_vec(vece, d, d, k); +} + +void gen_gvec_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m, + uint32_t a, uint32_t oprsz, uint32_t maxsz) +{ + static const GVecGen4 op = { + .fni8 = gen_eor3_i64, + .fniv = gen_eor3_vec, + .fno = gen_helper_sve2_eor3, + .vece = MO_64, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + }; + tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); +} + +static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) +{ + tcg_gen_andc_i64(d, m, k); + tcg_gen_xor_i64(d, d, n); +} + +static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n, + TCGv_vec m, TCGv_vec k) +{ + tcg_gen_andc_vec(vece, d, m, k); + tcg_gen_xor_vec(vece, d, d, n); +} + +void gen_gvec_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m, + uint32_t a, uint32_t oprsz, uint32_t maxsz) +{ + static const GVecGen4 op = { + .fni8 = gen_bcax_i64, + .fniv = gen_bcax_vec, + .fno = gen_helper_sve2_bcax, + .vece = MO_64, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + }; + tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); +} + +/* + * Set @res to the correctly saturated result. + * Set @qc non-zero if saturation occured. + */ +void gen_suqadd_bhs(TCGv_i64 res, TCGv_i64 qc, + TCGv_i64 a, TCGv_i64 b, MemOp esz) +{ + TCGv_i64 max = tcg_constant_i64((1ull << ((8 << esz) - 1)) - 1); + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_add_i64(t, a, b); + tcg_gen_smin_i64(res, t, max); + tcg_gen_xor_i64(t, t, res); + tcg_gen_or_i64(qc, qc, t); +} + +void gen_suqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 max = tcg_constant_i64(INT64_MAX); + TCGv_i64 t = tcg_temp_new_i64(); + + /* Maximum value that can be added to @a without overflow. */ + tcg_gen_sub_i64(t, max, a); + + /* Constrain addend so that the next addition never overflows. */ + tcg_gen_umin_i64(t, t, b); + tcg_gen_add_i64(res, a, t); + + tcg_gen_xor_i64(t, t, b); + tcg_gen_or_i64(qc, qc, t); +} + +static void gen_suqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, + TCGv_vec a, TCGv_vec b) +{ + TCGv_vec max = + tcg_constant_vec_matching(t, vece, (1ull << ((8 << vece) - 1)) - 1); + TCGv_vec u = tcg_temp_new_vec_matching(t); + + /* Maximum value that can be added to @a without overflow. */ + tcg_gen_sub_vec(vece, u, max, a); + + /* Constrain addend so that the next addition never overflows. */ + tcg_gen_umin_vec(vece, u, u, b); + tcg_gen_add_vec(vece, t, u, a); + + /* Compute QC by comparing the adjusted @b. */ + tcg_gen_xor_vec(vece, u, u, b); + tcg_gen_or_vec(vece, qc, qc, u); +} + +void gen_gvec_suqadd_qc(unsigned vece, uint32_t rd_ofs, + uint32_t rn_ofs, uint32_t rm_ofs, + uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_add_vec, INDEX_op_sub_vec, INDEX_op_umin_vec, 0 + }; + static const GVecGen4 ops[4] = { + { .fniv = gen_suqadd_vec, + .fno = gen_helper_gvec_suqadd_b, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_8 }, + { .fniv = gen_suqadd_vec, + .fno = gen_helper_gvec_suqadd_h, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_16 }, + { .fniv = gen_suqadd_vec, + .fno = gen_helper_gvec_suqadd_s, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_32 }, + { .fniv = gen_suqadd_vec, + .fni8 = gen_suqadd_d, + .fno = gen_helper_gvec_suqadd_d, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_64 }, + }; + + tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); + tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), + rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} + +void gen_usqadd_bhs(TCGv_i64 res, TCGv_i64 qc, + TCGv_i64 a, TCGv_i64 b, MemOp esz) +{ + TCGv_i64 max = tcg_constant_i64(MAKE_64BIT_MASK(0, 8 << esz)); + TCGv_i64 zero = tcg_constant_i64(0); + TCGv_i64 tmp = tcg_temp_new_i64(); + + tcg_gen_add_i64(tmp, a, b); + tcg_gen_smin_i64(res, tmp, max); + tcg_gen_smax_i64(res, res, zero); + tcg_gen_xor_i64(tmp, tmp, res); + tcg_gen_or_i64(qc, qc, tmp); +} + +void gen_usqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b) +{ + TCGv_i64 tmp = tcg_temp_new_i64(); + TCGv_i64 tneg = tcg_temp_new_i64(); + TCGv_i64 tpos = tcg_temp_new_i64(); + TCGv_i64 max = tcg_constant_i64(UINT64_MAX); + TCGv_i64 zero = tcg_constant_i64(0); + + tcg_gen_add_i64(tmp, a, b); + + /* If @b is positive, saturate if (a + b) < a, aka unsigned overflow. */ + tcg_gen_movcond_i64(TCG_COND_LTU, tpos, tmp, a, max, tmp); + + /* If @b is negative, saturate if a < -b, ie subtraction is negative. */ + tcg_gen_neg_i64(tneg, b); + tcg_gen_movcond_i64(TCG_COND_LTU, tneg, a, tneg, zero, tmp); + + /* Select correct result from sign of @b. */ + tcg_gen_movcond_i64(TCG_COND_LT, res, b, zero, tneg, tpos); + tcg_gen_xor_i64(tmp, tmp, res); + tcg_gen_or_i64(qc, qc, tmp); +} + +static void gen_usqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec qc, + TCGv_vec a, TCGv_vec b) +{ + TCGv_vec u = tcg_temp_new_vec_matching(t); + TCGv_vec z = tcg_constant_vec_matching(t, vece, 0); + + /* Compute unsigned saturation of add for +b and sub for -b. */ + tcg_gen_neg_vec(vece, t, b); + tcg_gen_usadd_vec(vece, u, a, b); + tcg_gen_ussub_vec(vece, t, a, t); + + /* Select the correct result depending on the sign of b. */ + tcg_gen_cmpsel_vec(TCG_COND_LT, vece, t, b, z, t, u); + + /* Compute QC by comparing against the non-saturated result. */ + tcg_gen_add_vec(vece, u, a, b); + tcg_gen_xor_vec(vece, u, u, t); + tcg_gen_or_vec(vece, qc, qc, u); +} + +void gen_gvec_usqadd_qc(unsigned vece, uint32_t rd_ofs, + uint32_t rn_ofs, uint32_t rm_ofs, + uint32_t opr_sz, uint32_t max_sz) +{ + static const TCGOpcode vecop_list[] = { + INDEX_op_neg_vec, INDEX_op_add_vec, + INDEX_op_usadd_vec, INDEX_op_ussub_vec, + INDEX_op_cmpsel_vec, 0 + }; + static const GVecGen4 ops[4] = { + { .fniv = gen_usqadd_vec, + .fno = gen_helper_gvec_usqadd_b, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_8 }, + { .fniv = gen_usqadd_vec, + .fno = gen_helper_gvec_usqadd_h, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_16 }, + { .fniv = gen_usqadd_vec, + .fno = gen_helper_gvec_usqadd_s, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_32 }, + { .fniv = gen_usqadd_vec, + .fni8 = gen_usqadd_d, + .fno = gen_helper_gvec_usqadd_d, + .opt_opc = vecop_list, + .write_aofs = true, + .vece = MO_64 }, + }; + + tcg_debug_assert(opr_sz <= sizeof_field(CPUARMState, vfp.qc)); + tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), + rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); +} diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h index 0518165399..371388f61b 100644 --- a/target/arm/tcg/helper-a64.h +++ b/target/arm/tcg/helper-a64.h @@ -132,3 +132,15 @@ DEF_HELPER_4(cpye, void, env, i32, i32, i32) DEF_HELPER_4(cpyfp, void, env, i32, i32, i32) DEF_HELPER_4(cpyfm, void, env, i32, i32, i32) DEF_HELPER_4(cpyfe, void, env, i32, i32, i32) + +DEF_HELPER_FLAGS_5(gvec_fdiv_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fdiv_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fdiv_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_fmulx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmulx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmulx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + +DEF_HELPER_FLAGS_5(gvec_fmulx_idx_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmulx_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(gvec_fmulx_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c index 5da1b0fc1d..f03977b4b0 100644 --- a/target/arm/tcg/hflags.c +++ b/target/arm/tcg/hflags.c @@ -38,8 +38,16 @@ static bool aprofile_require_alignment(CPUARMState *env, int el, uint64_t sctlr) } /* - * If translation is disabled, then the default memory type is - * Device(-nGnRnE) instead of Normal, which requires that alignment + * With PMSA, when the MPU is disabled, all memory types in the + * default map are Normal, so don't need aligment enforcing. + */ + if (arm_feature(env, ARM_FEATURE_PMSA)) { + return false; + } + + /* + * With VMSA, if translation is disabled, then the default memory type + * is Device(-nGnRnE) instead of Normal, which requires that alignment * be enforced. Since this affects all ram, it is most efficient * to handle this during translation. */ diff --git a/target/arm/tcg/m_helper.c b/target/arm/tcg/m_helper.c index d1f1e02acc..23d7f73035 100644 --- a/target/arm/tcg/m_helper.c +++ b/target/arm/tcg/m_helper.c @@ -16,6 +16,7 @@ #include "qemu/bitops.h" #include "qemu/log.h" #include "exec/exec-all.h" +#include "exec/page-protection.h" #ifdef CONFIG_TCG #include "exec/cpu_ldst.h" #include "semihosting/common-semi.h" diff --git a/target/arm/tcg/meson.build b/target/arm/tcg/meson.build index 3b1a9f0fc5..508932a249 100644 --- a/target/arm/tcg/meson.build +++ b/target/arm/tcg/meson.build @@ -24,6 +24,7 @@ arm_ss.add(when: 'TARGET_AARCH64', if_true: gen_a64) arm_ss.add(files( 'cpu32.c', + 'gengvec.c', 'translate.c', 'translate-m-nocp.c', 'translate-mve.c', @@ -42,6 +43,7 @@ arm_ss.add(files( arm_ss.add(when: 'TARGET_AARCH64', if_true: files( 'cpu64.c', + 'gengvec64.c', 'translate-a64.c', 'translate-sve.c', 'translate-sme.c', diff --git a/target/arm/tcg/mte_helper.c b/target/arm/tcg/mte_helper.c index d971b81370..037ac6dd60 100644 --- a/target/arm/tcg/mte_helper.c +++ b/target/arm/tcg/mte_helper.c @@ -22,6 +22,7 @@ #include "cpu.h" #include "internals.h" #include "exec/exec-all.h" +#include "exec/page-protection.h" #include "exec/ram_addr.h" #include "exec/cpu_ldst.h" #include "exec/helper-proto.h" diff --git a/target/arm/tcg/neon-dp.decode b/target/arm/tcg/neon-dp.decode index fd3a01bfa0..788578c8fa 100644 --- a/target/arm/tcg/neon-dp.decode +++ b/target/arm/tcg/neon-dp.decode @@ -102,37 +102,12 @@ VCGE_U_3s 1111 001 1 0 . .. .... .... 0011 . . . 1 .... @3same VSHL_S_3s 1111 001 0 0 . .. .... .... 0100 . . . 0 .... @3same_rev VSHL_U_3s 1111 001 1 0 . .. .... .... 0100 . . . 0 .... @3same_rev - -# Insns operating on 64-bit elements (size!=0b11 handled elsewhere) -# The _rev suffix indicates that Vn and Vm are reversed (as explained -# by the comment for the @3same_rev format). -@3same_64_rev .... ... . . . 11 .... .... .... . q:1 . . .... \ - &3same vm=%vn_dp vn=%vm_dp vd=%vd_dp size=3 - -{ - VQSHL_S64_3s 1111 001 0 0 . .. .... .... 0100 . . . 1 .... @3same_64_rev - VQSHL_S_3s 1111 001 0 0 . .. .... .... 0100 . . . 1 .... @3same_rev -} -{ - VQSHL_U64_3s 1111 001 1 0 . .. .... .... 0100 . . . 1 .... @3same_64_rev - VQSHL_U_3s 1111 001 1 0 . .. .... .... 0100 . . . 1 .... @3same_rev -} -{ - VRSHL_S64_3s 1111 001 0 0 . .. .... .... 0101 . . . 0 .... @3same_64_rev - VRSHL_S_3s 1111 001 0 0 . .. .... .... 0101 . . . 0 .... @3same_rev -} -{ - VRSHL_U64_3s 1111 001 1 0 . .. .... .... 0101 . . . 0 .... @3same_64_rev - VRSHL_U_3s 1111 001 1 0 . .. .... .... 0101 . . . 0 .... @3same_rev -} -{ - VQRSHL_S64_3s 1111 001 0 0 . .. .... .... 0101 . . . 1 .... @3same_64_rev - VQRSHL_S_3s 1111 001 0 0 . .. .... .... 0101 . . . 1 .... @3same_rev -} -{ - VQRSHL_U64_3s 1111 001 1 0 . .. .... .... 0101 . . . 1 .... @3same_64_rev - VQRSHL_U_3s 1111 001 1 0 . .. .... .... 0101 . . . 1 .... @3same_rev -} +VQSHL_S_3s 1111 001 0 0 . .. .... .... 0100 . . . 1 .... @3same_rev +VQSHL_U_3s 1111 001 1 0 . .. .... .... 0100 . . . 1 .... @3same_rev +VRSHL_S_3s 1111 001 0 0 . .. .... .... 0101 . . . 0 .... @3same_rev +VRSHL_U_3s 1111 001 1 0 . .. .... .... 0101 . . . 0 .... @3same_rev +VQRSHL_S_3s 1111 001 0 0 . .. .... .... 0101 . . . 1 .... @3same_rev +VQRSHL_U_3s 1111 001 1 0 . .. .... .... 0101 . . . 1 .... @3same_rev VMAX_S_3s 1111 001 0 0 . .. .... .... 0110 . . . 0 .... @3same VMAX_U_3s 1111 001 1 0 . .. .... .... 0110 . . . 0 .... @3same diff --git a/target/arm/tcg/neon_helper.c b/target/arm/tcg/neon_helper.c index bc6c4a54e9..082bfd88ad 100644 --- a/target/arm/tcg/neon_helper.c +++ b/target/arm/tcg/neon_helper.c @@ -6,10 +6,11 @@ * * This code is licensed under the GNU GPL v2. */ -#include "qemu/osdep.h" +#include "qemu/osdep.h" #include "cpu.h" #include "exec/helper-proto.h" +#include "tcg/tcg-gvec-desc.h" #include "fpu/softfloat.h" #include "vec_internal.h" @@ -117,6 +118,29 @@ NEON_VOP_BODY(vtype, n) uint32_t HELPER(glue(neon_,name))(CPUARMState *env, uint32_t arg1, uint32_t arg2) \ NEON_VOP_BODY(vtype, n) +#define NEON_GVEC_VOP2(name, vtype) \ +void HELPER(name)(void *vd, void *vn, void *vm, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + vtype *d = vd, *n = vn, *m = vm; \ + for (i = 0; i < opr_sz / sizeof(vtype); i++) { \ + NEON_FN(d[i], n[i], m[i]); \ + } \ + clear_tail(d, opr_sz, simd_maxsz(desc)); \ +} + +#define NEON_GVEC_VOP2_ENV(name, vtype) \ +void HELPER(name)(void *vd, void *vn, void *vm, void *venv, uint32_t desc) \ +{ \ + intptr_t i, opr_sz = simd_oprsz(desc); \ + vtype *d = vd, *n = vn, *m = vm; \ + CPUARMState *env = venv; \ + for (i = 0; i < opr_sz / sizeof(vtype); i++) { \ + NEON_FN(d[i], n[i], m[i]); \ + } \ + clear_tail(d, opr_sz, simd_maxsz(desc)); \ +} + /* Pairwise operations. */ /* For 32-bit elements each segment only contains a single element, so the elementwise and pairwise operations are the same. */ @@ -155,414 +179,6 @@ uint32_t HELPER(glue(neon_,name))(uint32_t arg) \ return arg; \ } - -#define NEON_USAT(dest, src1, src2, type) do { \ - uint32_t tmp = (uint32_t)src1 + (uint32_t)src2; \ - if (tmp != (type)tmp) { \ - SET_QC(); \ - dest = ~0; \ - } else { \ - dest = tmp; \ - }} while(0) -#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t) -NEON_VOP_ENV(qadd_u8, neon_u8, 4) -#undef NEON_FN -#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t) -NEON_VOP_ENV(qadd_u16, neon_u16, 2) -#undef NEON_FN -#undef NEON_USAT - -uint32_t HELPER(neon_qadd_u32)(CPUARMState *env, uint32_t a, uint32_t b) -{ - uint32_t res = a + b; - if (res < a) { - SET_QC(); - res = ~0; - } - return res; -} - -uint64_t HELPER(neon_qadd_u64)(CPUARMState *env, uint64_t src1, uint64_t src2) -{ - uint64_t res; - - res = src1 + src2; - if (res < src1) { - SET_QC(); - res = ~(uint64_t)0; - } - return res; -} - -#define NEON_SSAT(dest, src1, src2, type) do { \ - int32_t tmp = (uint32_t)src1 + (uint32_t)src2; \ - if (tmp != (type)tmp) { \ - SET_QC(); \ - if (src2 > 0) { \ - tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \ - } else { \ - tmp = 1 << (sizeof(type) * 8 - 1); \ - } \ - } \ - dest = tmp; \ - } while(0) -#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t) -NEON_VOP_ENV(qadd_s8, neon_s8, 4) -#undef NEON_FN -#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t) -NEON_VOP_ENV(qadd_s16, neon_s16, 2) -#undef NEON_FN -#undef NEON_SSAT - -uint32_t HELPER(neon_qadd_s32)(CPUARMState *env, uint32_t a, uint32_t b) -{ - uint32_t res = a + b; - if (((res ^ a) & SIGNBIT) && !((a ^ b) & SIGNBIT)) { - SET_QC(); - res = ~(((int32_t)a >> 31) ^ SIGNBIT); - } - return res; -} - -uint64_t HELPER(neon_qadd_s64)(CPUARMState *env, uint64_t src1, uint64_t src2) -{ - uint64_t res; - - res = src1 + src2; - if (((res ^ src1) & SIGNBIT64) && !((src1 ^ src2) & SIGNBIT64)) { - SET_QC(); - res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64; - } - return res; -} - -/* Unsigned saturating accumulate of signed value - * - * Op1/Rn is treated as signed - * Op2/Rd is treated as unsigned - * - * Explicit casting is used to ensure the correct sign extension of - * inputs. The result is treated as a unsigned value and saturated as such. - * - * We use a macro for the 8/16 bit cases which expects signed integers of va, - * vb, and vr for interim calculation and an unsigned 32 bit result value r. - */ - -#define USATACC(bits, shift) \ - do { \ - va = sextract32(a, shift, bits); \ - vb = extract32(b, shift, bits); \ - vr = va + vb; \ - if (vr > UINT##bits##_MAX) { \ - SET_QC(); \ - vr = UINT##bits##_MAX; \ - } else if (vr < 0) { \ - SET_QC(); \ - vr = 0; \ - } \ - r = deposit32(r, shift, bits, vr); \ - } while (0) - -uint32_t HELPER(neon_uqadd_s8)(CPUARMState *env, uint32_t a, uint32_t b) -{ - int16_t va, vb, vr; - uint32_t r = 0; - - USATACC(8, 0); - USATACC(8, 8); - USATACC(8, 16); - USATACC(8, 24); - return r; -} - -uint32_t HELPER(neon_uqadd_s16)(CPUARMState *env, uint32_t a, uint32_t b) -{ - int32_t va, vb, vr; - uint64_t r = 0; - - USATACC(16, 0); - USATACC(16, 16); - return r; -} - -#undef USATACC - -uint32_t HELPER(neon_uqadd_s32)(CPUARMState *env, uint32_t a, uint32_t b) -{ - int64_t va = (int32_t)a; - int64_t vb = (uint32_t)b; - int64_t vr = va + vb; - if (vr > UINT32_MAX) { - SET_QC(); - vr = UINT32_MAX; - } else if (vr < 0) { - SET_QC(); - vr = 0; - } - return vr; -} - -uint64_t HELPER(neon_uqadd_s64)(CPUARMState *env, uint64_t a, uint64_t b) -{ - uint64_t res; - res = a + b; - /* We only need to look at the pattern of SIGN bits to detect - * +ve/-ve saturation - */ - if (~a & b & ~res & SIGNBIT64) { - SET_QC(); - res = UINT64_MAX; - } else if (a & ~b & res & SIGNBIT64) { - SET_QC(); - res = 0; - } - return res; -} - -/* Signed saturating accumulate of unsigned value - * - * Op1/Rn is treated as unsigned - * Op2/Rd is treated as signed - * - * The result is treated as a signed value and saturated as such - * - * We use a macro for the 8/16 bit cases which expects signed integers of va, - * vb, and vr for interim calculation and an unsigned 32 bit result value r. - */ - -#define SSATACC(bits, shift) \ - do { \ - va = extract32(a, shift, bits); \ - vb = sextract32(b, shift, bits); \ - vr = va + vb; \ - if (vr > INT##bits##_MAX) { \ - SET_QC(); \ - vr = INT##bits##_MAX; \ - } else if (vr < INT##bits##_MIN) { \ - SET_QC(); \ - vr = INT##bits##_MIN; \ - } \ - r = deposit32(r, shift, bits, vr); \ - } while (0) - -uint32_t HELPER(neon_sqadd_u8)(CPUARMState *env, uint32_t a, uint32_t b) -{ - int16_t va, vb, vr; - uint32_t r = 0; - - SSATACC(8, 0); - SSATACC(8, 8); - SSATACC(8, 16); - SSATACC(8, 24); - return r; -} - -uint32_t HELPER(neon_sqadd_u16)(CPUARMState *env, uint32_t a, uint32_t b) -{ - int32_t va, vb, vr; - uint32_t r = 0; - - SSATACC(16, 0); - SSATACC(16, 16); - - return r; -} - -#undef SSATACC - -uint32_t HELPER(neon_sqadd_u32)(CPUARMState *env, uint32_t a, uint32_t b) -{ - int64_t res; - int64_t op1 = (uint32_t)a; - int64_t op2 = (int32_t)b; - res = op1 + op2; - if (res > INT32_MAX) { - SET_QC(); - res = INT32_MAX; - } else if (res < INT32_MIN) { - SET_QC(); - res = INT32_MIN; - } - return res; -} - -uint64_t HELPER(neon_sqadd_u64)(CPUARMState *env, uint64_t a, uint64_t b) -{ - uint64_t res; - res = a + b; - /* We only need to look at the pattern of SIGN bits to detect an overflow */ - if (((a & res) - | (~b & res) - | (a & ~b)) & SIGNBIT64) { - SET_QC(); - res = INT64_MAX; - } - return res; -} - - -#define NEON_USAT(dest, src1, src2, type) do { \ - uint32_t tmp = (uint32_t)src1 - (uint32_t)src2; \ - if (tmp != (type)tmp) { \ - SET_QC(); \ - dest = 0; \ - } else { \ - dest = tmp; \ - }} while(0) -#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint8_t) -NEON_VOP_ENV(qsub_u8, neon_u8, 4) -#undef NEON_FN -#define NEON_FN(dest, src1, src2) NEON_USAT(dest, src1, src2, uint16_t) -NEON_VOP_ENV(qsub_u16, neon_u16, 2) -#undef NEON_FN -#undef NEON_USAT - -uint32_t HELPER(neon_qsub_u32)(CPUARMState *env, uint32_t a, uint32_t b) -{ - uint32_t res = a - b; - if (res > a) { - SET_QC(); - res = 0; - } - return res; -} - -uint64_t HELPER(neon_qsub_u64)(CPUARMState *env, uint64_t src1, uint64_t src2) -{ - uint64_t res; - - if (src1 < src2) { - SET_QC(); - res = 0; - } else { - res = src1 - src2; - } - return res; -} - -#define NEON_SSAT(dest, src1, src2, type) do { \ - int32_t tmp = (uint32_t)src1 - (uint32_t)src2; \ - if (tmp != (type)tmp) { \ - SET_QC(); \ - if (src2 < 0) { \ - tmp = (1 << (sizeof(type) * 8 - 1)) - 1; \ - } else { \ - tmp = 1 << (sizeof(type) * 8 - 1); \ - } \ - } \ - dest = tmp; \ - } while(0) -#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int8_t) -NEON_VOP_ENV(qsub_s8, neon_s8, 4) -#undef NEON_FN -#define NEON_FN(dest, src1, src2) NEON_SSAT(dest, src1, src2, int16_t) -NEON_VOP_ENV(qsub_s16, neon_s16, 2) -#undef NEON_FN -#undef NEON_SSAT - -uint32_t HELPER(neon_qsub_s32)(CPUARMState *env, uint32_t a, uint32_t b) -{ - uint32_t res = a - b; - if (((res ^ a) & SIGNBIT) && ((a ^ b) & SIGNBIT)) { - SET_QC(); - res = ~(((int32_t)a >> 31) ^ SIGNBIT); - } - return res; -} - -uint64_t HELPER(neon_qsub_s64)(CPUARMState *env, uint64_t src1, uint64_t src2) -{ - uint64_t res; - - res = src1 - src2; - if (((res ^ src1) & SIGNBIT64) && ((src1 ^ src2) & SIGNBIT64)) { - SET_QC(); - res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64; - } - return res; -} - -#define NEON_FN(dest, src1, src2) dest = (src1 + src2) >> 1 -NEON_VOP(hadd_s8, neon_s8, 4) -NEON_VOP(hadd_u8, neon_u8, 4) -NEON_VOP(hadd_s16, neon_s16, 2) -NEON_VOP(hadd_u16, neon_u16, 2) -#undef NEON_FN - -int32_t HELPER(neon_hadd_s32)(int32_t src1, int32_t src2) -{ - int32_t dest; - - dest = (src1 >> 1) + (src2 >> 1); - if (src1 & src2 & 1) - dest++; - return dest; -} - -uint32_t HELPER(neon_hadd_u32)(uint32_t src1, uint32_t src2) -{ - uint32_t dest; - - dest = (src1 >> 1) + (src2 >> 1); - if (src1 & src2 & 1) - dest++; - return dest; -} - -#define NEON_FN(dest, src1, src2) dest = (src1 + src2 + 1) >> 1 -NEON_VOP(rhadd_s8, neon_s8, 4) -NEON_VOP(rhadd_u8, neon_u8, 4) -NEON_VOP(rhadd_s16, neon_s16, 2) -NEON_VOP(rhadd_u16, neon_u16, 2) -#undef NEON_FN - -int32_t HELPER(neon_rhadd_s32)(int32_t src1, int32_t src2) -{ - int32_t dest; - - dest = (src1 >> 1) + (src2 >> 1); - if ((src1 | src2) & 1) - dest++; - return dest; -} - -uint32_t HELPER(neon_rhadd_u32)(uint32_t src1, uint32_t src2) -{ - uint32_t dest; - - dest = (src1 >> 1) + (src2 >> 1); - if ((src1 | src2) & 1) - dest++; - return dest; -} - -#define NEON_FN(dest, src1, src2) dest = (src1 - src2) >> 1 -NEON_VOP(hsub_s8, neon_s8, 4) -NEON_VOP(hsub_u8, neon_u8, 4) -NEON_VOP(hsub_s16, neon_s16, 2) -NEON_VOP(hsub_u16, neon_u16, 2) -#undef NEON_FN - -int32_t HELPER(neon_hsub_s32)(int32_t src1, int32_t src2) -{ - int32_t dest; - - dest = (src1 >> 1) - (src2 >> 1); - if ((~src1) & src2 & 1) - dest--; - return dest; -} - -uint32_t HELPER(neon_hsub_u32)(uint32_t src1, uint32_t src2) -{ - uint32_t dest; - - dest = (src1 >> 1) - (src2 >> 1); - if ((~src1) & src2 & 1) - dest--; - return dest; -} - #define NEON_FN(dest, src1, src2) dest = (src1 < src2) ? src1 : src2 NEON_POP(pmin_s8, neon_s8, 4) NEON_POP(pmin_u8, neon_u8, 4) @@ -590,11 +206,23 @@ NEON_VOP(shl_s16, neon_s16, 2) #define NEON_FN(dest, src1, src2) \ (dest = do_sqrshl_bhs(src1, (int8_t)src2, 8, true, NULL)) NEON_VOP(rshl_s8, neon_s8, 4) +NEON_GVEC_VOP2(gvec_srshl_b, int8_t) #undef NEON_FN #define NEON_FN(dest, src1, src2) \ (dest = do_sqrshl_bhs(src1, (int8_t)src2, 16, true, NULL)) NEON_VOP(rshl_s16, neon_s16, 2) +NEON_GVEC_VOP2(gvec_srshl_h, int16_t) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ + (dest = do_sqrshl_bhs(src1, (int8_t)src2, 32, true, NULL)) +NEON_GVEC_VOP2(gvec_srshl_s, int32_t) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ + (dest = do_sqrshl_d(src1, (int8_t)src2, true, NULL)) +NEON_GVEC_VOP2(gvec_srshl_d, int64_t) #undef NEON_FN uint32_t HELPER(neon_rshl_s32)(uint32_t val, uint32_t shift) @@ -610,11 +238,23 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t val, uint64_t shift) #define NEON_FN(dest, src1, src2) \ (dest = do_uqrshl_bhs(src1, (int8_t)src2, 8, true, NULL)) NEON_VOP(rshl_u8, neon_u8, 4) +NEON_GVEC_VOP2(gvec_urshl_b, uint8_t) #undef NEON_FN #define NEON_FN(dest, src1, src2) \ (dest = do_uqrshl_bhs(src1, (int8_t)src2, 16, true, NULL)) NEON_VOP(rshl_u16, neon_u16, 2) +NEON_GVEC_VOP2(gvec_urshl_h, uint16_t) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ + (dest = do_uqrshl_bhs(src1, (int8_t)src2, 32, true, NULL)) +NEON_GVEC_VOP2(gvec_urshl_s, int32_t) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ + (dest = do_uqrshl_d(src1, (int8_t)src2, true, NULL)) +NEON_GVEC_VOP2(gvec_urshl_d, int64_t) #undef NEON_FN uint32_t HELPER(neon_rshl_u32)(uint32_t val, uint32_t shift) @@ -630,11 +270,23 @@ uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shift) #define NEON_FN(dest, src1, src2) \ (dest = do_uqrshl_bhs(src1, (int8_t)src2, 8, false, env->vfp.qc)) NEON_VOP_ENV(qshl_u8, neon_u8, 4) +NEON_GVEC_VOP2_ENV(neon_uqshl_b, uint8_t) #undef NEON_FN #define NEON_FN(dest, src1, src2) \ (dest = do_uqrshl_bhs(src1, (int8_t)src2, 16, false, env->vfp.qc)) NEON_VOP_ENV(qshl_u16, neon_u16, 2) +NEON_GVEC_VOP2_ENV(neon_uqshl_h, uint16_t) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ + (dest = do_uqrshl_bhs(src1, (int8_t)src2, 32, false, env->vfp.qc)) +NEON_GVEC_VOP2_ENV(neon_uqshl_s, uint32_t) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ + (dest = do_uqrshl_d(src1, (int8_t)src2, false, env->vfp.qc)) +NEON_GVEC_VOP2_ENV(neon_uqshl_d, uint64_t) #undef NEON_FN uint32_t HELPER(neon_qshl_u32)(CPUARMState *env, uint32_t val, uint32_t shift) @@ -650,11 +302,23 @@ uint64_t HELPER(neon_qshl_u64)(CPUARMState *env, uint64_t val, uint64_t shift) #define NEON_FN(dest, src1, src2) \ (dest = do_sqrshl_bhs(src1, (int8_t)src2, 8, false, env->vfp.qc)) NEON_VOP_ENV(qshl_s8, neon_s8, 4) +NEON_GVEC_VOP2_ENV(neon_sqshl_b, int8_t) #undef NEON_FN #define NEON_FN(dest, src1, src2) \ (dest = do_sqrshl_bhs(src1, (int8_t)src2, 16, false, env->vfp.qc)) NEON_VOP_ENV(qshl_s16, neon_s16, 2) +NEON_GVEC_VOP2_ENV(neon_sqshl_h, int16_t) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ + (dest = do_sqrshl_bhs(src1, (int8_t)src2, 32, false, env->vfp.qc)) +NEON_GVEC_VOP2_ENV(neon_sqshl_s, int32_t) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ + (dest = do_sqrshl_d(src1, (int8_t)src2, false, env->vfp.qc)) +NEON_GVEC_VOP2_ENV(neon_sqshl_d, int64_t) #undef NEON_FN uint32_t HELPER(neon_qshl_s32)(CPUARMState *env, uint32_t val, uint32_t shift) @@ -690,11 +354,23 @@ uint64_t HELPER(neon_qshlu_s64)(CPUARMState *env, uint64_t val, uint64_t shift) #define NEON_FN(dest, src1, src2) \ (dest = do_uqrshl_bhs(src1, (int8_t)src2, 8, true, env->vfp.qc)) NEON_VOP_ENV(qrshl_u8, neon_u8, 4) +NEON_GVEC_VOP2_ENV(neon_uqrshl_b, uint8_t) #undef NEON_FN #define NEON_FN(dest, src1, src2) \ (dest = do_uqrshl_bhs(src1, (int8_t)src2, 16, true, env->vfp.qc)) NEON_VOP_ENV(qrshl_u16, neon_u16, 2) +NEON_GVEC_VOP2_ENV(neon_uqrshl_h, uint16_t) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ + (dest = do_uqrshl_bhs(src1, (int8_t)src2, 32, true, env->vfp.qc)) +NEON_GVEC_VOP2_ENV(neon_uqrshl_s, uint32_t) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ + (dest = do_uqrshl_d(src1, (int8_t)src2, true, env->vfp.qc)) +NEON_GVEC_VOP2_ENV(neon_uqrshl_d, uint64_t) #undef NEON_FN uint32_t HELPER(neon_qrshl_u32)(CPUARMState *env, uint32_t val, uint32_t shift) @@ -710,11 +386,23 @@ uint64_t HELPER(neon_qrshl_u64)(CPUARMState *env, uint64_t val, uint64_t shift) #define NEON_FN(dest, src1, src2) \ (dest = do_sqrshl_bhs(src1, (int8_t)src2, 8, true, env->vfp.qc)) NEON_VOP_ENV(qrshl_s8, neon_s8, 4) +NEON_GVEC_VOP2_ENV(neon_sqrshl_b, int8_t) #undef NEON_FN #define NEON_FN(dest, src1, src2) \ (dest = do_sqrshl_bhs(src1, (int8_t)src2, 16, true, env->vfp.qc)) NEON_VOP_ENV(qrshl_s16, neon_s16, 2) +NEON_GVEC_VOP2_ENV(neon_sqrshl_h, int16_t) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ + (dest = do_sqrshl_bhs(src1, (int8_t)src2, 32, true, env->vfp.qc)) +NEON_GVEC_VOP2_ENV(neon_sqrshl_s, int32_t) +#undef NEON_FN + +#define NEON_FN(dest, src1, src2) \ + (dest = do_sqrshl_d(src1, (int8_t)src2, true, env->vfp.qc)) +NEON_GVEC_VOP2_ENV(neon_sqrshl_d, int64_t) #undef NEON_FN uint32_t HELPER(neon_qrshl_s32)(CPUARMState *env, uint32_t val, uint32_t shift) @@ -745,11 +433,6 @@ uint32_t HELPER(neon_add_u16)(uint32_t a, uint32_t b) return (a + b) ^ mask; } -#define NEON_FN(dest, src1, src2) dest = src1 + src2 -NEON_POP(padd_u8, neon_u8, 4) -NEON_POP(padd_u16, neon_u16, 2) -#undef NEON_FN - #define NEON_FN(dest, src1, src2) dest = src1 - src2 NEON_VOP(sub_u8, neon_u8, 4) NEON_VOP(sub_u16, neon_u16, 2) diff --git a/target/arm/tcg/op_helper.c b/target/arm/tcg/op_helper.c index c199b69fbf..c083e5cfb8 100644 --- a/target/arm/tcg/op_helper.c +++ b/target/arm/tcg/op_helper.c @@ -409,6 +409,60 @@ void HELPER(wfi)(CPUARMState *env, uint32_t insn_len) #endif } +void HELPER(wfit)(CPUARMState *env, uint64_t timeout) +{ +#ifdef CONFIG_USER_ONLY + /* + * WFI in the user-mode emulator is technically permitted but not + * something any real-world code would do. AArch64 Linux kernels + * trap it via SCTRL_EL1.nTWI and make it an (expensive) NOP; + * AArch32 kernels don't trap it so it will delay a bit. + * For QEMU, make it NOP here, because trying to raise EXCP_HLT + * would trigger an abort. + */ + return; +#else + ARMCPU *cpu = env_archcpu(env); + CPUState *cs = env_cpu(env); + int target_el = check_wfx_trap(env, false); + /* The WFIT should time out when CNTVCT_EL0 >= the specified value. */ + uint64_t cntval = gt_get_countervalue(env); + uint64_t offset = gt_virt_cnt_offset(env); + uint64_t cntvct = cntval - offset; + uint64_t nexttick; + + if (cpu_has_work(cs) || cntvct >= timeout) { + /* + * Don't bother to go into our "low power state" if + * we would just wake up immediately. + */ + return; + } + + if (target_el) { + env->pc -= 4; + raise_exception(env, EXCP_UDEF, syn_wfx(1, 0xe, 0, false), + target_el); + } + + if (uadd64_overflow(timeout, offset, &nexttick)) { + nexttick = UINT64_MAX; + } + if (nexttick > INT64_MAX / gt_cntfrq_period_ns(cpu)) { + /* + * If the timeout is too long for the signed 64-bit range + * of a QEMUTimer, let it expire early. + */ + timer_mod_ns(cpu->wfxt_timer, INT64_MAX); + } else { + timer_mod(cpu->wfxt_timer, nexttick); + } + cs->exception_index = EXCP_HLT; + cs->halted = 1; + cpu_loop_exit(cs); +#endif +} + void HELPER(wfe)(CPUARMState *env) { /* This is a hint instruction that is semantically different diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c index 6853f58c19..dd49e67d7a 100644 --- a/target/arm/tcg/sve_helper.c +++ b/target/arm/tcg/sve_helper.c @@ -21,6 +21,7 @@ #include "cpu.h" #include "internals.h" #include "exec/exec-all.h" +#include "exec/page-protection.h" #include "exec/helper-proto.h" #include "tcg/tcg-gvec-desc.h" #include "fpu/softfloat.h" diff --git a/target/arm/tcg/t32.decode b/target/arm/tcg/t32.decode index f21ad0167a..d327178829 100644 --- a/target/arm/tcg/t32.decode +++ b/target/arm/tcg/t32.decode @@ -458,41 +458,41 @@ STR_ri 1111 1000 1100 .... .... ............ @ldst_ri_pos # Note that Load, unsigned (literal) overlaps all other load encodings. { { - NOP 1111 1000 -001 1111 1111 ------------ # PLD + PLD 1111 1000 -001 1111 1111 ------------ # (literal) LDRB_ri 1111 1000 .001 1111 .... ............ @ldst_ri_lit } { - NOP 1111 1000 1001 ---- 1111 ------------ # PLD + PLD 1111 1000 1001 ---- 1111 ------------ # (immediate T1) LDRB_ri 1111 1000 1001 .... .... ............ @ldst_ri_pos } LDRB_ri 1111 1000 0001 .... .... 1..1 ........ @ldst_ri_idx { - NOP 1111 1000 0001 ---- 1111 1100 -------- # PLD + PLD 1111 1000 0001 ---- 1111 1100 -------- # (immediate T2) LDRB_ri 1111 1000 0001 .... .... 1100 ........ @ldst_ri_neg } LDRBT_ri 1111 1000 0001 .... .... 1110 ........ @ldst_ri_unp { - NOP 1111 1000 0001 ---- 1111 000000 -- ---- # PLD + PLD 1111 1000 0001 ---- 1111 000000 -- ---- # (register) LDRB_rr 1111 1000 0001 .... .... 000000 .. .... @ldst_rr } } { { - NOP 1111 1000 -011 1111 1111 ------------ # PLD + PLD 1111 1000 -011 1111 1111 ------------ # (literal) LDRH_ri 1111 1000 .011 1111 .... ............ @ldst_ri_lit } { - NOP 1111 1000 1011 ---- 1111 ------------ # PLDW + PLDW 1111 1000 1011 ---- 1111 ------------ # (immediate T1) LDRH_ri 1111 1000 1011 .... .... ............ @ldst_ri_pos } LDRH_ri 1111 1000 0011 .... .... 1..1 ........ @ldst_ri_idx { - NOP 1111 1000 0011 ---- 1111 1100 -------- # PLDW + PLDW 1111 1000 0011 ---- 1111 1100 -------- # (immediate T2) LDRH_ri 1111 1000 0011 .... .... 1100 ........ @ldst_ri_neg } LDRHT_ri 1111 1000 0011 .... .... 1110 ........ @ldst_ri_unp { - NOP 1111 1000 0011 ---- 1111 000000 -- ---- # PLDW + PLDW 1111 1000 0011 ---- 1111 000000 -- ---- # (register) LDRH_rr 1111 1000 0011 .... .... 000000 .. .... @ldst_rr } } @@ -504,24 +504,23 @@ STR_ri 1111 1000 1100 .... .... ............ @ldst_ri_pos LDRT_ri 1111 1000 0101 .... .... 1110 ........ @ldst_ri_unp LDR_rr 1111 1000 0101 .... .... 000000 .. .... @ldst_rr } -# NOPs here are PLI. { { - NOP 1111 1001 -001 1111 1111 ------------ + PLI 1111 1001 -001 1111 1111 ------------ # (literal T3) LDRSB_ri 1111 1001 .001 1111 .... ............ @ldst_ri_lit } { - NOP 1111 1001 1001 ---- 1111 ------------ + PLI 1111 1001 1001 ---- 1111 ------------ # (immediate T1) LDRSB_ri 1111 1001 1001 .... .... ............ @ldst_ri_pos } LDRSB_ri 1111 1001 0001 .... .... 1..1 ........ @ldst_ri_idx { - NOP 1111 1001 0001 ---- 1111 1100 -------- + PLI 1111 1001 0001 ---- 1111 1100 -------- # (immediate T2) LDRSB_ri 1111 1001 0001 .... .... 1100 ........ @ldst_ri_neg } LDRSBT_ri 1111 1001 0001 .... .... 1110 ........ @ldst_ri_unp { - NOP 1111 1001 0001 ---- 1111 000000 -- ---- + PLI 1111 1001 0001 ---- 1111 000000 -- ---- # (register) LDRSB_rr 1111 1001 0001 .... .... 000000 .. .... @ldst_rr } } diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index 976094a5c8..93543da39c 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -22,7 +22,6 @@ #include "translate.h" #include "translate-a64.h" #include "qemu/log.h" -#include "disas/disas.h" #include "arm_ldst.h" #include "semihosting/semihost.h" #include "cpregs.h" @@ -725,19 +724,6 @@ static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn, is_q ? 16 : 8, vec_full_reg_size(s), data, fn); } -/* Expand a 3-operand + qc + operation using an out-of-line helper. */ -static void gen_gvec_op3_qc(DisasContext *s, bool is_q, int rd, int rn, - int rm, gen_helper_gvec_3_ptr *fn) -{ - TCGv_ptr qc_ptr = tcg_temp_new_ptr(); - - tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc)); - tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), qc_ptr, - is_q ? 16 : 8, vec_full_reg_size(s), 0, fn); -} - /* Expand a 4-operand operation using an out-of-line helper. */ static void gen_gvec_op4_ool(DisasContext *s, bool is_q, int rd, int rn, int rm, int ra, int data, gen_helper_gvec_4 *fn) @@ -1315,6 +1301,75 @@ bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req) } /* + * Expanders for AdvSIMD translation functions. + */ + +static bool do_gvec_op2_ool(DisasContext *s, arg_qrr_e *a, int data, + gen_helper_gvec_2 *fn) +{ + if (!a->q && a->esz == MO_64) { + return false; + } + if (fp_access_check(s)) { + gen_gvec_op2_ool(s, a->q, a->rd, a->rn, data, fn); + } + return true; +} + +static bool do_gvec_op3_ool(DisasContext *s, arg_qrrr_e *a, int data, + gen_helper_gvec_3 *fn) +{ + if (!a->q && a->esz == MO_64) { + return false; + } + if (fp_access_check(s)) { + gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, data, fn); + } + return true; +} + +static bool do_gvec_fn3(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) +{ + if (!a->q && a->esz == MO_64) { + return false; + } + if (fp_access_check(s)) { + gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz); + } + return true; +} + +static bool do_gvec_fn3_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) +{ + if (a->esz == MO_64) { + return false; + } + if (fp_access_check(s)) { + gen_gvec_fn3(s, a->q, a->rd, a->rn, a->rm, fn, a->esz); + } + return true; +} + +static bool do_gvec_fn3_no8_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn) +{ + if (a->esz == MO_8) { + return false; + } + return do_gvec_fn3_no64(s, a, fn); +} + +static bool do_gvec_fn4(DisasContext *s, arg_qrrrr_e *a, GVecGen4Fn *fn) +{ + if (!a->q && a->esz == MO_64) { + return false; + } + if (fp_access_check(s)) { + gen_gvec_fn4(s, a->q, a->rd, a->rn, a->rm, a->ra, fn, a->esz); + } + return true; +} + +/* * This utility function is for doing register extension with an * optional shift. You will likely want to pass a temporary for the * destination register. See DecodeRegExtend() in the ARM ARM. @@ -1690,6 +1745,47 @@ static bool trans_WFE(DisasContext *s, arg_WFI *a) return true; } +static bool trans_WFIT(DisasContext *s, arg_WFIT *a) +{ + if (!dc_isar_feature(aa64_wfxt, s)) { + return false; + } + + /* + * Because we need to pass the register value to the helper, + * it's easier to emit the code now, unlike trans_WFI which + * defers it to aarch64_tr_tb_stop(). That means we need to + * check ss_active so that single-stepping a WFIT doesn't halt. + */ + if (s->ss_active) { + /* Act like a NOP under architectural singlestep */ + return true; + } + + gen_a64_update_pc(s, 4); + gen_helper_wfit(tcg_env, cpu_reg(s, a->rd)); + /* Go back to the main loop to check for interrupts */ + s->base.is_jmp = DISAS_EXIT; + return true; +} + +static bool trans_WFET(DisasContext *s, arg_WFET *a) +{ + if (!dc_isar_feature(aa64_wfxt, s)) { + return false; + } + + /* + * We rely here on our WFE implementation being a NOP, so we + * don't need to do anything different to handle the WFET timeout + * from what trans_WFE does. + */ + if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { + s->base.is_jmp = DISAS_WFE; + } + return true; +} + static bool trans_XPACLRI(DisasContext *s, arg_XPACLRI *a) { if (s->pauth_active) { @@ -4561,6 +4657,1382 @@ static bool trans_EXTR(DisasContext *s, arg_extract *a) return true; } +/* + * Cryptographic AES, SHA, SHA512 + */ + +TRANS_FEAT(AESE, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aese) +TRANS_FEAT(AESD, aa64_aes, do_gvec_op3_ool, a, 0, gen_helper_crypto_aesd) +TRANS_FEAT(AESMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesmc) +TRANS_FEAT(AESIMC, aa64_aes, do_gvec_op2_ool, a, 0, gen_helper_crypto_aesimc) + +TRANS_FEAT(SHA1C, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1c) +TRANS_FEAT(SHA1P, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1p) +TRANS_FEAT(SHA1M, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1m) +TRANS_FEAT(SHA1SU0, aa64_sha1, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha1su0) + +TRANS_FEAT(SHA256H, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h) +TRANS_FEAT(SHA256H2, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256h2) +TRANS_FEAT(SHA256SU1, aa64_sha256, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha256su1) + +TRANS_FEAT(SHA1H, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1h) +TRANS_FEAT(SHA1SU1, aa64_sha1, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha1su1) +TRANS_FEAT(SHA256SU0, aa64_sha256, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha256su0) + +TRANS_FEAT(SHA512H, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h) +TRANS_FEAT(SHA512H2, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512h2) +TRANS_FEAT(SHA512SU1, aa64_sha512, do_gvec_op3_ool, a, 0, gen_helper_crypto_sha512su1) +TRANS_FEAT(RAX1, aa64_sha3, do_gvec_fn3, a, gen_gvec_rax1) +TRANS_FEAT(SM3PARTW1, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw1) +TRANS_FEAT(SM3PARTW2, aa64_sm3, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm3partw2) +TRANS_FEAT(SM4EKEY, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4ekey) + +TRANS_FEAT(SHA512SU0, aa64_sha512, do_gvec_op2_ool, a, 0, gen_helper_crypto_sha512su0) +TRANS_FEAT(SM4E, aa64_sm4, do_gvec_op3_ool, a, 0, gen_helper_crypto_sm4e) + +TRANS_FEAT(EOR3, aa64_sha3, do_gvec_fn4, a, gen_gvec_eor3) +TRANS_FEAT(BCAX, aa64_sha3, do_gvec_fn4, a, gen_gvec_bcax) + +static bool trans_SM3SS1(DisasContext *s, arg_SM3SS1 *a) +{ + if (!dc_isar_feature(aa64_sm3, s)) { + return false; + } + if (fp_access_check(s)) { + TCGv_i32 tcg_op1 = tcg_temp_new_i32(); + TCGv_i32 tcg_op2 = tcg_temp_new_i32(); + TCGv_i32 tcg_op3 = tcg_temp_new_i32(); + TCGv_i32 tcg_res = tcg_temp_new_i32(); + unsigned vsz, dofs; + + read_vec_element_i32(s, tcg_op1, a->rn, 3, MO_32); + read_vec_element_i32(s, tcg_op2, a->rm, 3, MO_32); + read_vec_element_i32(s, tcg_op3, a->ra, 3, MO_32); + + tcg_gen_rotri_i32(tcg_res, tcg_op1, 20); + tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2); + tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3); + tcg_gen_rotri_i32(tcg_res, tcg_res, 25); + + /* Clear the whole register first, then store bits [127:96]. */ + vsz = vec_full_reg_size(s); + dofs = vec_full_reg_offset(s, a->rd); + tcg_gen_gvec_dup_imm(MO_64, dofs, vsz, vsz, 0); + write_vec_element_i32(s, tcg_res, a->rd, 3, MO_32); + } + return true; +} + +static bool do_crypto3i(DisasContext *s, arg_crypto3i *a, gen_helper_gvec_3 *fn) +{ + if (fp_access_check(s)) { + gen_gvec_op3_ool(s, true, a->rd, a->rn, a->rm, a->imm, fn); + } + return true; +} +TRANS_FEAT(SM3TT1A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1a) +TRANS_FEAT(SM3TT1B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt1b) +TRANS_FEAT(SM3TT2A, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2a) +TRANS_FEAT(SM3TT2B, aa64_sm3, do_crypto3i, a, gen_helper_crypto_sm3tt2b) + +static bool trans_XAR(DisasContext *s, arg_XAR *a) +{ + if (!dc_isar_feature(aa64_sha3, s)) { + return false; + } + if (fp_access_check(s)) { + gen_gvec_xar(MO_64, vec_full_reg_offset(s, a->rd), + vec_full_reg_offset(s, a->rn), + vec_full_reg_offset(s, a->rm), a->imm, 16, + vec_full_reg_size(s)); + } + return true; +} + +/* + * Advanced SIMD copy + */ + +static bool decode_esz_idx(int imm, MemOp *pesz, unsigned *pidx) +{ + unsigned esz = ctz32(imm); + if (esz <= MO_64) { + *pesz = esz; + *pidx = imm >> (esz + 1); + return true; + } + return false; +} + +static bool trans_DUP_element_s(DisasContext *s, arg_DUP_element_s *a) +{ + MemOp esz; + unsigned idx; + + if (!decode_esz_idx(a->imm, &esz, &idx)) { + return false; + } + if (fp_access_check(s)) { + /* + * This instruction just extracts the specified element and + * zero-extends it into the bottom of the destination register. + */ + TCGv_i64 tmp = tcg_temp_new_i64(); + read_vec_element(s, tmp, a->rn, idx, esz); + write_fp_dreg(s, a->rd, tmp); + } + return true; +} + +static bool trans_DUP_element_v(DisasContext *s, arg_DUP_element_v *a) +{ + MemOp esz; + unsigned idx; + + if (!decode_esz_idx(a->imm, &esz, &idx)) { + return false; + } + if (esz == MO_64 && !a->q) { + return false; + } + if (fp_access_check(s)) { + tcg_gen_gvec_dup_mem(esz, vec_full_reg_offset(s, a->rd), + vec_reg_offset(s, a->rn, idx, esz), + a->q ? 16 : 8, vec_full_reg_size(s)); + } + return true; +} + +static bool trans_DUP_general(DisasContext *s, arg_DUP_general *a) +{ + MemOp esz; + unsigned idx; + + if (!decode_esz_idx(a->imm, &esz, &idx)) { + return false; + } + if (esz == MO_64 && !a->q) { + return false; + } + if (fp_access_check(s)) { + tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), + a->q ? 16 : 8, vec_full_reg_size(s), + cpu_reg(s, a->rn)); + } + return true; +} + +static bool do_smov_umov(DisasContext *s, arg_SMOV *a, MemOp is_signed) +{ + MemOp esz; + unsigned idx; + + if (!decode_esz_idx(a->imm, &esz, &idx)) { + return false; + } + if (is_signed) { + if (esz == MO_64 || (esz == MO_32 && !a->q)) { + return false; + } + } else { + if (esz == MO_64 ? !a->q : a->q) { + return false; + } + } + if (fp_access_check(s)) { + TCGv_i64 tcg_rd = cpu_reg(s, a->rd); + read_vec_element(s, tcg_rd, a->rn, idx, esz | is_signed); + if (is_signed && !a->q) { + tcg_gen_ext32u_i64(tcg_rd, tcg_rd); + } + } + return true; +} + +TRANS(SMOV, do_smov_umov, a, MO_SIGN) +TRANS(UMOV, do_smov_umov, a, 0) + +static bool trans_INS_general(DisasContext *s, arg_INS_general *a) +{ + MemOp esz; + unsigned idx; + + if (!decode_esz_idx(a->imm, &esz, &idx)) { + return false; + } + if (fp_access_check(s)) { + write_vec_element(s, cpu_reg(s, a->rn), a->rd, idx, esz); + clear_vec_high(s, true, a->rd); + } + return true; +} + +static bool trans_INS_element(DisasContext *s, arg_INS_element *a) +{ + MemOp esz; + unsigned didx, sidx; + + if (!decode_esz_idx(a->di, &esz, &didx)) { + return false; + } + sidx = a->si >> esz; + if (fp_access_check(s)) { + TCGv_i64 tmp = tcg_temp_new_i64(); + + read_vec_element(s, tmp, a->rn, sidx, esz); + write_vec_element(s, tmp, a->rd, didx, esz); + + /* INS is considered a 128-bit write for SVE. */ + clear_vec_high(s, true, a->rd); + } + return true; +} + +/* + * Advanced SIMD three same + */ + +typedef struct FPScalar { + void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); + void (*gen_s)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); + void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr); +} FPScalar; + +static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f) +{ + switch (a->esz) { + case MO_64: + if (fp_access_check(s)) { + TCGv_i64 t0 = read_fp_dreg(s, a->rn); + TCGv_i64 t1 = read_fp_dreg(s, a->rm); + f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); + write_fp_dreg(s, a->rd, t0); + } + break; + case MO_32: + if (fp_access_check(s)) { + TCGv_i32 t0 = read_fp_sreg(s, a->rn); + TCGv_i32 t1 = read_fp_sreg(s, a->rm); + f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); + write_fp_sreg(s, a->rd, t0); + } + break; + case MO_16: + if (!dc_isar_feature(aa64_fp16, s)) { + return false; + } + if (fp_access_check(s)) { + TCGv_i32 t0 = read_fp_hreg(s, a->rn); + TCGv_i32 t1 = read_fp_hreg(s, a->rm); + f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16)); + write_fp_sreg(s, a->rd, t0); + } + break; + default: + return false; + } + return true; +} + +static const FPScalar f_scalar_fadd = { + gen_helper_vfp_addh, + gen_helper_vfp_adds, + gen_helper_vfp_addd, +}; +TRANS(FADD_s, do_fp3_scalar, a, &f_scalar_fadd) + +static const FPScalar f_scalar_fsub = { + gen_helper_vfp_subh, + gen_helper_vfp_subs, + gen_helper_vfp_subd, +}; +TRANS(FSUB_s, do_fp3_scalar, a, &f_scalar_fsub) + +static const FPScalar f_scalar_fdiv = { + gen_helper_vfp_divh, + gen_helper_vfp_divs, + gen_helper_vfp_divd, +}; +TRANS(FDIV_s, do_fp3_scalar, a, &f_scalar_fdiv) + +static const FPScalar f_scalar_fmul = { + gen_helper_vfp_mulh, + gen_helper_vfp_muls, + gen_helper_vfp_muld, +}; +TRANS(FMUL_s, do_fp3_scalar, a, &f_scalar_fmul) + +static const FPScalar f_scalar_fmax = { + gen_helper_advsimd_maxh, + gen_helper_vfp_maxs, + gen_helper_vfp_maxd, +}; +TRANS(FMAX_s, do_fp3_scalar, a, &f_scalar_fmax) + +static const FPScalar f_scalar_fmin = { + gen_helper_advsimd_minh, + gen_helper_vfp_mins, + gen_helper_vfp_mind, +}; +TRANS(FMIN_s, do_fp3_scalar, a, &f_scalar_fmin) + +static const FPScalar f_scalar_fmaxnm = { + gen_helper_advsimd_maxnumh, + gen_helper_vfp_maxnums, + gen_helper_vfp_maxnumd, +}; +TRANS(FMAXNM_s, do_fp3_scalar, a, &f_scalar_fmaxnm) + +static const FPScalar f_scalar_fminnm = { + gen_helper_advsimd_minnumh, + gen_helper_vfp_minnums, + gen_helper_vfp_minnumd, +}; +TRANS(FMINNM_s, do_fp3_scalar, a, &f_scalar_fminnm) + +static const FPScalar f_scalar_fmulx = { + gen_helper_advsimd_mulxh, + gen_helper_vfp_mulxs, + gen_helper_vfp_mulxd, +}; +TRANS(FMULX_s, do_fp3_scalar, a, &f_scalar_fmulx) + +static void gen_fnmul_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) +{ + gen_helper_vfp_mulh(d, n, m, s); + gen_vfp_negh(d, d); +} + +static void gen_fnmul_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) +{ + gen_helper_vfp_muls(d, n, m, s); + gen_vfp_negs(d, d); +} + +static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) +{ + gen_helper_vfp_muld(d, n, m, s); + gen_vfp_negd(d, d); +} + +static const FPScalar f_scalar_fnmul = { + gen_fnmul_h, + gen_fnmul_s, + gen_fnmul_d, +}; +TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul) + +static const FPScalar f_scalar_fcmeq = { + gen_helper_advsimd_ceq_f16, + gen_helper_neon_ceq_f32, + gen_helper_neon_ceq_f64, +}; +TRANS(FCMEQ_s, do_fp3_scalar, a, &f_scalar_fcmeq) + +static const FPScalar f_scalar_fcmge = { + gen_helper_advsimd_cge_f16, + gen_helper_neon_cge_f32, + gen_helper_neon_cge_f64, +}; +TRANS(FCMGE_s, do_fp3_scalar, a, &f_scalar_fcmge) + +static const FPScalar f_scalar_fcmgt = { + gen_helper_advsimd_cgt_f16, + gen_helper_neon_cgt_f32, + gen_helper_neon_cgt_f64, +}; +TRANS(FCMGT_s, do_fp3_scalar, a, &f_scalar_fcmgt) + +static const FPScalar f_scalar_facge = { + gen_helper_advsimd_acge_f16, + gen_helper_neon_acge_f32, + gen_helper_neon_acge_f64, +}; +TRANS(FACGE_s, do_fp3_scalar, a, &f_scalar_facge) + +static const FPScalar f_scalar_facgt = { + gen_helper_advsimd_acgt_f16, + gen_helper_neon_acgt_f32, + gen_helper_neon_acgt_f64, +}; +TRANS(FACGT_s, do_fp3_scalar, a, &f_scalar_facgt) + +static void gen_fabd_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) +{ + gen_helper_vfp_subh(d, n, m, s); + gen_vfp_absh(d, d); +} + +static void gen_fabd_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s) +{ + gen_helper_vfp_subs(d, n, m, s); + gen_vfp_abss(d, d); +} + +static void gen_fabd_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s) +{ + gen_helper_vfp_subd(d, n, m, s); + gen_vfp_absd(d, d); +} + +static const FPScalar f_scalar_fabd = { + gen_fabd_h, + gen_fabd_s, + gen_fabd_d, +}; +TRANS(FABD_s, do_fp3_scalar, a, &f_scalar_fabd) + +static const FPScalar f_scalar_frecps = { + gen_helper_recpsf_f16, + gen_helper_recpsf_f32, + gen_helper_recpsf_f64, +}; +TRANS(FRECPS_s, do_fp3_scalar, a, &f_scalar_frecps) + +static const FPScalar f_scalar_frsqrts = { + gen_helper_rsqrtsf_f16, + gen_helper_rsqrtsf_f32, + gen_helper_rsqrtsf_f64, +}; +TRANS(FRSQRTS_s, do_fp3_scalar, a, &f_scalar_frsqrts) + +static bool do_satacc_s(DisasContext *s, arg_rrr_e *a, + MemOp sgn_n, MemOp sgn_m, + void (*gen_bhs)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, MemOp), + void (*gen_d)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64)) +{ + TCGv_i64 t0, t1, t2, qc; + MemOp esz = a->esz; + + if (!fp_access_check(s)) { + return true; + } + + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + t2 = tcg_temp_new_i64(); + qc = tcg_temp_new_i64(); + read_vec_element(s, t1, a->rn, 0, esz | sgn_n); + read_vec_element(s, t2, a->rm, 0, esz | sgn_m); + tcg_gen_ld_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc)); + + if (esz == MO_64) { + gen_d(t0, qc, t1, t2); + } else { + gen_bhs(t0, qc, t1, t2, esz); + tcg_gen_ext_i64(t0, t0, esz); + } + + write_fp_dreg(s, a->rd, t0); + tcg_gen_st_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc)); + return true; +} + +TRANS(SQADD_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqadd_bhs, gen_sqadd_d) +TRANS(SQSUB_s, do_satacc_s, a, MO_SIGN, MO_SIGN, gen_sqsub_bhs, gen_sqsub_d) +TRANS(UQADD_s, do_satacc_s, a, 0, 0, gen_uqadd_bhs, gen_uqadd_d) +TRANS(UQSUB_s, do_satacc_s, a, 0, 0, gen_uqsub_bhs, gen_uqsub_d) +TRANS(SUQADD_s, do_satacc_s, a, MO_SIGN, 0, gen_suqadd_bhs, gen_suqadd_d) +TRANS(USQADD_s, do_satacc_s, a, 0, MO_SIGN, gen_usqadd_bhs, gen_usqadd_d) + +static bool do_int3_scalar_d(DisasContext *s, arg_rrr_e *a, + void (*fn)(TCGv_i64, TCGv_i64, TCGv_i64)) +{ + if (fp_access_check(s)) { + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + + read_vec_element(s, t0, a->rn, 0, MO_64); + read_vec_element(s, t1, a->rm, 0, MO_64); + fn(t0, t0, t1); + write_fp_dreg(s, a->rd, t0); + } + return true; +} + +TRANS(SSHL_s, do_int3_scalar_d, a, gen_sshl_i64) +TRANS(USHL_s, do_int3_scalar_d, a, gen_ushl_i64) +TRANS(SRSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_s64) +TRANS(URSHL_s, do_int3_scalar_d, a, gen_helper_neon_rshl_u64) +TRANS(ADD_s, do_int3_scalar_d, a, tcg_gen_add_i64) +TRANS(SUB_s, do_int3_scalar_d, a, tcg_gen_sub_i64) + +typedef struct ENVScalar2 { + NeonGenTwoOpEnvFn *gen_bhs[3]; + NeonGenTwo64OpEnvFn *gen_d; +} ENVScalar2; + +static bool do_env_scalar2(DisasContext *s, arg_rrr_e *a, const ENVScalar2 *f) +{ + if (!fp_access_check(s)) { + return true; + } + if (a->esz == MO_64) { + TCGv_i64 t0 = read_fp_dreg(s, a->rn); + TCGv_i64 t1 = read_fp_dreg(s, a->rm); + f->gen_d(t0, tcg_env, t0, t1); + write_fp_dreg(s, a->rd, t0); + } else { + TCGv_i32 t0 = tcg_temp_new_i32(); + TCGv_i32 t1 = tcg_temp_new_i32(); + + read_vec_element_i32(s, t0, a->rn, 0, a->esz); + read_vec_element_i32(s, t1, a->rm, 0, a->esz); + f->gen_bhs[a->esz](t0, tcg_env, t0, t1); + write_fp_sreg(s, a->rd, t0); + } + return true; +} + +static const ENVScalar2 f_scalar_sqshl = { + { gen_helper_neon_qshl_s8, + gen_helper_neon_qshl_s16, + gen_helper_neon_qshl_s32 }, + gen_helper_neon_qshl_s64, +}; +TRANS(SQSHL_s, do_env_scalar2, a, &f_scalar_sqshl) + +static const ENVScalar2 f_scalar_uqshl = { + { gen_helper_neon_qshl_u8, + gen_helper_neon_qshl_u16, + gen_helper_neon_qshl_u32 }, + gen_helper_neon_qshl_u64, +}; +TRANS(UQSHL_s, do_env_scalar2, a, &f_scalar_uqshl) + +static const ENVScalar2 f_scalar_sqrshl = { + { gen_helper_neon_qrshl_s8, + gen_helper_neon_qrshl_s16, + gen_helper_neon_qrshl_s32 }, + gen_helper_neon_qrshl_s64, +}; +TRANS(SQRSHL_s, do_env_scalar2, a, &f_scalar_sqrshl) + +static const ENVScalar2 f_scalar_uqrshl = { + { gen_helper_neon_qrshl_u8, + gen_helper_neon_qrshl_u16, + gen_helper_neon_qrshl_u32 }, + gen_helper_neon_qrshl_u64, +}; +TRANS(UQRSHL_s, do_env_scalar2, a, &f_scalar_uqrshl) + +static bool do_env_scalar2_hs(DisasContext *s, arg_rrr_e *a, + const ENVScalar2 *f) +{ + if (a->esz == MO_16 || a->esz == MO_32) { + return do_env_scalar2(s, a, f); + } + return false; +} + +static const ENVScalar2 f_scalar_sqdmulh = { + { NULL, gen_helper_neon_qdmulh_s16, gen_helper_neon_qdmulh_s32 } +}; +TRANS(SQDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqdmulh) + +static const ENVScalar2 f_scalar_sqrdmulh = { + { NULL, gen_helper_neon_qrdmulh_s16, gen_helper_neon_qrdmulh_s32 } +}; +TRANS(SQRDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqrdmulh) + +static bool do_cmop_d(DisasContext *s, arg_rrr_e *a, TCGCond cond) +{ + if (fp_access_check(s)) { + TCGv_i64 t0 = read_fp_dreg(s, a->rn); + TCGv_i64 t1 = read_fp_dreg(s, a->rm); + tcg_gen_negsetcond_i64(cond, t0, t0, t1); + write_fp_dreg(s, a->rd, t0); + } + return true; +} + +TRANS(CMGT_s, do_cmop_d, a, TCG_COND_GT) +TRANS(CMHI_s, do_cmop_d, a, TCG_COND_GTU) +TRANS(CMGE_s, do_cmop_d, a, TCG_COND_GE) +TRANS(CMHS_s, do_cmop_d, a, TCG_COND_GEU) +TRANS(CMEQ_s, do_cmop_d, a, TCG_COND_EQ) +TRANS(CMTST_s, do_cmop_d, a, TCG_COND_TSTNE) + +static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, + gen_helper_gvec_3_ptr * const fns[3]) +{ + MemOp esz = a->esz; + + switch (esz) { + case MO_64: + if (!a->q) { + return false; + } + break; + case MO_32: + break; + case MO_16: + if (!dc_isar_feature(aa64_fp16, s)) { + return false; + } + break; + default: + return false; + } + if (fp_access_check(s)) { + gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, + esz == MO_16, 0, fns[esz - 1]); + } + return true; +} + +static gen_helper_gvec_3_ptr * const f_vector_fadd[3] = { + gen_helper_gvec_fadd_h, + gen_helper_gvec_fadd_s, + gen_helper_gvec_fadd_d, +}; +TRANS(FADD_v, do_fp3_vector, a, f_vector_fadd) + +static gen_helper_gvec_3_ptr * const f_vector_fsub[3] = { + gen_helper_gvec_fsub_h, + gen_helper_gvec_fsub_s, + gen_helper_gvec_fsub_d, +}; +TRANS(FSUB_v, do_fp3_vector, a, f_vector_fsub) + +static gen_helper_gvec_3_ptr * const f_vector_fdiv[3] = { + gen_helper_gvec_fdiv_h, + gen_helper_gvec_fdiv_s, + gen_helper_gvec_fdiv_d, +}; +TRANS(FDIV_v, do_fp3_vector, a, f_vector_fdiv) + +static gen_helper_gvec_3_ptr * const f_vector_fmul[3] = { + gen_helper_gvec_fmul_h, + gen_helper_gvec_fmul_s, + gen_helper_gvec_fmul_d, +}; +TRANS(FMUL_v, do_fp3_vector, a, f_vector_fmul) + +static gen_helper_gvec_3_ptr * const f_vector_fmax[3] = { + gen_helper_gvec_fmax_h, + gen_helper_gvec_fmax_s, + gen_helper_gvec_fmax_d, +}; +TRANS(FMAX_v, do_fp3_vector, a, f_vector_fmax) + +static gen_helper_gvec_3_ptr * const f_vector_fmin[3] = { + gen_helper_gvec_fmin_h, + gen_helper_gvec_fmin_s, + gen_helper_gvec_fmin_d, +}; +TRANS(FMIN_v, do_fp3_vector, a, f_vector_fmin) + +static gen_helper_gvec_3_ptr * const f_vector_fmaxnm[3] = { + gen_helper_gvec_fmaxnum_h, + gen_helper_gvec_fmaxnum_s, + gen_helper_gvec_fmaxnum_d, +}; +TRANS(FMAXNM_v, do_fp3_vector, a, f_vector_fmaxnm) + +static gen_helper_gvec_3_ptr * const f_vector_fminnm[3] = { + gen_helper_gvec_fminnum_h, + gen_helper_gvec_fminnum_s, + gen_helper_gvec_fminnum_d, +}; +TRANS(FMINNM_v, do_fp3_vector, a, f_vector_fminnm) + +static gen_helper_gvec_3_ptr * const f_vector_fmulx[3] = { + gen_helper_gvec_fmulx_h, + gen_helper_gvec_fmulx_s, + gen_helper_gvec_fmulx_d, +}; +TRANS(FMULX_v, do_fp3_vector, a, f_vector_fmulx) + +static gen_helper_gvec_3_ptr * const f_vector_fmla[3] = { + gen_helper_gvec_vfma_h, + gen_helper_gvec_vfma_s, + gen_helper_gvec_vfma_d, +}; +TRANS(FMLA_v, do_fp3_vector, a, f_vector_fmla) + +static gen_helper_gvec_3_ptr * const f_vector_fmls[3] = { + gen_helper_gvec_vfms_h, + gen_helper_gvec_vfms_s, + gen_helper_gvec_vfms_d, +}; +TRANS(FMLS_v, do_fp3_vector, a, f_vector_fmls) + +static gen_helper_gvec_3_ptr * const f_vector_fcmeq[3] = { + gen_helper_gvec_fceq_h, + gen_helper_gvec_fceq_s, + gen_helper_gvec_fceq_d, +}; +TRANS(FCMEQ_v, do_fp3_vector, a, f_vector_fcmeq) + +static gen_helper_gvec_3_ptr * const f_vector_fcmge[3] = { + gen_helper_gvec_fcge_h, + gen_helper_gvec_fcge_s, + gen_helper_gvec_fcge_d, +}; +TRANS(FCMGE_v, do_fp3_vector, a, f_vector_fcmge) + +static gen_helper_gvec_3_ptr * const f_vector_fcmgt[3] = { + gen_helper_gvec_fcgt_h, + gen_helper_gvec_fcgt_s, + gen_helper_gvec_fcgt_d, +}; +TRANS(FCMGT_v, do_fp3_vector, a, f_vector_fcmgt) + +static gen_helper_gvec_3_ptr * const f_vector_facge[3] = { + gen_helper_gvec_facge_h, + gen_helper_gvec_facge_s, + gen_helper_gvec_facge_d, +}; +TRANS(FACGE_v, do_fp3_vector, a, f_vector_facge) + +static gen_helper_gvec_3_ptr * const f_vector_facgt[3] = { + gen_helper_gvec_facgt_h, + gen_helper_gvec_facgt_s, + gen_helper_gvec_facgt_d, +}; +TRANS(FACGT_v, do_fp3_vector, a, f_vector_facgt) + +static gen_helper_gvec_3_ptr * const f_vector_fabd[3] = { + gen_helper_gvec_fabd_h, + gen_helper_gvec_fabd_s, + gen_helper_gvec_fabd_d, +}; +TRANS(FABD_v, do_fp3_vector, a, f_vector_fabd) + +static gen_helper_gvec_3_ptr * const f_vector_frecps[3] = { + gen_helper_gvec_recps_h, + gen_helper_gvec_recps_s, + gen_helper_gvec_recps_d, +}; +TRANS(FRECPS_v, do_fp3_vector, a, f_vector_frecps) + +static gen_helper_gvec_3_ptr * const f_vector_frsqrts[3] = { + gen_helper_gvec_rsqrts_h, + gen_helper_gvec_rsqrts_s, + gen_helper_gvec_rsqrts_d, +}; +TRANS(FRSQRTS_v, do_fp3_vector, a, f_vector_frsqrts) + +static gen_helper_gvec_3_ptr * const f_vector_faddp[3] = { + gen_helper_gvec_faddp_h, + gen_helper_gvec_faddp_s, + gen_helper_gvec_faddp_d, +}; +TRANS(FADDP_v, do_fp3_vector, a, f_vector_faddp) + +static gen_helper_gvec_3_ptr * const f_vector_fmaxp[3] = { + gen_helper_gvec_fmaxp_h, + gen_helper_gvec_fmaxp_s, + gen_helper_gvec_fmaxp_d, +}; +TRANS(FMAXP_v, do_fp3_vector, a, f_vector_fmaxp) + +static gen_helper_gvec_3_ptr * const f_vector_fminp[3] = { + gen_helper_gvec_fminp_h, + gen_helper_gvec_fminp_s, + gen_helper_gvec_fminp_d, +}; +TRANS(FMINP_v, do_fp3_vector, a, f_vector_fminp) + +static gen_helper_gvec_3_ptr * const f_vector_fmaxnmp[3] = { + gen_helper_gvec_fmaxnump_h, + gen_helper_gvec_fmaxnump_s, + gen_helper_gvec_fmaxnump_d, +}; +TRANS(FMAXNMP_v, do_fp3_vector, a, f_vector_fmaxnmp) + +static gen_helper_gvec_3_ptr * const f_vector_fminnmp[3] = { + gen_helper_gvec_fminnump_h, + gen_helper_gvec_fminnump_s, + gen_helper_gvec_fminnump_d, +}; +TRANS(FMINNMP_v, do_fp3_vector, a, f_vector_fminnmp) + +static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2) +{ + if (fp_access_check(s)) { + int data = (is_2 << 1) | is_s; + tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), + vec_full_reg_offset(s, a->rn), + vec_full_reg_offset(s, a->rm), tcg_env, + a->q ? 16 : 8, vec_full_reg_size(s), + data, gen_helper_gvec_fmlal_a64); + } + return true; +} + +TRANS_FEAT(FMLAL_v, aa64_fhm, do_fmlal, a, false, false) +TRANS_FEAT(FMLSL_v, aa64_fhm, do_fmlal, a, true, false) +TRANS_FEAT(FMLAL2_v, aa64_fhm, do_fmlal, a, false, true) +TRANS_FEAT(FMLSL2_v, aa64_fhm, do_fmlal, a, true, true) + +TRANS(ADDP_v, do_gvec_fn3, a, gen_gvec_addp) +TRANS(SMAXP_v, do_gvec_fn3_no64, a, gen_gvec_smaxp) +TRANS(SMINP_v, do_gvec_fn3_no64, a, gen_gvec_sminp) +TRANS(UMAXP_v, do_gvec_fn3_no64, a, gen_gvec_umaxp) +TRANS(UMINP_v, do_gvec_fn3_no64, a, gen_gvec_uminp) + +TRANS(AND_v, do_gvec_fn3, a, tcg_gen_gvec_and) +TRANS(BIC_v, do_gvec_fn3, a, tcg_gen_gvec_andc) +TRANS(ORR_v, do_gvec_fn3, a, tcg_gen_gvec_or) +TRANS(ORN_v, do_gvec_fn3, a, tcg_gen_gvec_orc) +TRANS(EOR_v, do_gvec_fn3, a, tcg_gen_gvec_xor) + +static bool do_bitsel(DisasContext *s, bool is_q, int d, int a, int b, int c) +{ + if (fp_access_check(s)) { + gen_gvec_fn4(s, is_q, d, a, b, c, tcg_gen_gvec_bitsel, 0); + } + return true; +} + +TRANS(BSL_v, do_bitsel, a->q, a->rd, a->rd, a->rn, a->rm) +TRANS(BIT_v, do_bitsel, a->q, a->rd, a->rm, a->rn, a->rd) +TRANS(BIF_v, do_bitsel, a->q, a->rd, a->rm, a->rd, a->rn) + +TRANS(SQADD_v, do_gvec_fn3, a, gen_gvec_sqadd_qc) +TRANS(UQADD_v, do_gvec_fn3, a, gen_gvec_uqadd_qc) +TRANS(SQSUB_v, do_gvec_fn3, a, gen_gvec_sqsub_qc) +TRANS(UQSUB_v, do_gvec_fn3, a, gen_gvec_uqsub_qc) +TRANS(SUQADD_v, do_gvec_fn3, a, gen_gvec_suqadd_qc) +TRANS(USQADD_v, do_gvec_fn3, a, gen_gvec_usqadd_qc) + +TRANS(SSHL_v, do_gvec_fn3, a, gen_gvec_sshl) +TRANS(USHL_v, do_gvec_fn3, a, gen_gvec_ushl) +TRANS(SRSHL_v, do_gvec_fn3, a, gen_gvec_srshl) +TRANS(URSHL_v, do_gvec_fn3, a, gen_gvec_urshl) +TRANS(SQSHL_v, do_gvec_fn3, a, gen_neon_sqshl) +TRANS(UQSHL_v, do_gvec_fn3, a, gen_neon_uqshl) +TRANS(SQRSHL_v, do_gvec_fn3, a, gen_neon_sqrshl) +TRANS(UQRSHL_v, do_gvec_fn3, a, gen_neon_uqrshl) + +TRANS(ADD_v, do_gvec_fn3, a, tcg_gen_gvec_add) +TRANS(SUB_v, do_gvec_fn3, a, tcg_gen_gvec_sub) +TRANS(SHADD_v, do_gvec_fn3_no64, a, gen_gvec_shadd) +TRANS(UHADD_v, do_gvec_fn3_no64, a, gen_gvec_uhadd) +TRANS(SHSUB_v, do_gvec_fn3_no64, a, gen_gvec_shsub) +TRANS(UHSUB_v, do_gvec_fn3_no64, a, gen_gvec_uhsub) +TRANS(SRHADD_v, do_gvec_fn3_no64, a, gen_gvec_srhadd) +TRANS(URHADD_v, do_gvec_fn3_no64, a, gen_gvec_urhadd) +TRANS(SMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smax) +TRANS(UMAX_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umax) +TRANS(SMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_smin) +TRANS(UMIN_v, do_gvec_fn3_no64, a, tcg_gen_gvec_umin) +TRANS(SABA_v, do_gvec_fn3_no64, a, gen_gvec_saba) +TRANS(UABA_v, do_gvec_fn3_no64, a, gen_gvec_uaba) +TRANS(SABD_v, do_gvec_fn3_no64, a, gen_gvec_sabd) +TRANS(UABD_v, do_gvec_fn3_no64, a, gen_gvec_uabd) +TRANS(MUL_v, do_gvec_fn3_no64, a, tcg_gen_gvec_mul) +TRANS(PMUL_v, do_gvec_op3_ool, a, 0, gen_helper_gvec_pmul_b) +TRANS(MLA_v, do_gvec_fn3_no64, a, gen_gvec_mla) +TRANS(MLS_v, do_gvec_fn3_no64, a, gen_gvec_mls) + +static bool do_cmop_v(DisasContext *s, arg_qrrr_e *a, TCGCond cond) +{ + if (a->esz == MO_64 && !a->q) { + return false; + } + if (fp_access_check(s)) { + tcg_gen_gvec_cmp(cond, a->esz, + vec_full_reg_offset(s, a->rd), + vec_full_reg_offset(s, a->rn), + vec_full_reg_offset(s, a->rm), + a->q ? 16 : 8, vec_full_reg_size(s)); + } + return true; +} + +TRANS(CMGT_v, do_cmop_v, a, TCG_COND_GT) +TRANS(CMHI_v, do_cmop_v, a, TCG_COND_GTU) +TRANS(CMGE_v, do_cmop_v, a, TCG_COND_GE) +TRANS(CMHS_v, do_cmop_v, a, TCG_COND_GEU) +TRANS(CMEQ_v, do_cmop_v, a, TCG_COND_EQ) +TRANS(CMTST_v, do_gvec_fn3, a, gen_gvec_cmtst) + +TRANS(SQDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqdmulh_qc) +TRANS(SQRDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmulh_qc) + +/* + * Advanced SIMD scalar/vector x indexed element + */ + +static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f) +{ + switch (a->esz) { + case MO_64: + if (fp_access_check(s)) { + TCGv_i64 t0 = read_fp_dreg(s, a->rn); + TCGv_i64 t1 = tcg_temp_new_i64(); + + read_vec_element(s, t1, a->rm, a->idx, MO_64); + f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); + write_fp_dreg(s, a->rd, t0); + } + break; + case MO_32: + if (fp_access_check(s)) { + TCGv_i32 t0 = read_fp_sreg(s, a->rn); + TCGv_i32 t1 = tcg_temp_new_i32(); + + read_vec_element_i32(s, t1, a->rm, a->idx, MO_32); + f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); + write_fp_sreg(s, a->rd, t0); + } + break; + case MO_16: + if (!dc_isar_feature(aa64_fp16, s)) { + return false; + } + if (fp_access_check(s)) { + TCGv_i32 t0 = read_fp_hreg(s, a->rn); + TCGv_i32 t1 = tcg_temp_new_i32(); + + read_vec_element_i32(s, t1, a->rm, a->idx, MO_16); + f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16)); + write_fp_sreg(s, a->rd, t0); + } + break; + default: + g_assert_not_reached(); + } + return true; +} + +TRANS(FMUL_si, do_fp3_scalar_idx, a, &f_scalar_fmul) +TRANS(FMULX_si, do_fp3_scalar_idx, a, &f_scalar_fmulx) + +static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg) +{ + switch (a->esz) { + case MO_64: + if (fp_access_check(s)) { + TCGv_i64 t0 = read_fp_dreg(s, a->rd); + TCGv_i64 t1 = read_fp_dreg(s, a->rn); + TCGv_i64 t2 = tcg_temp_new_i64(); + + read_vec_element(s, t2, a->rm, a->idx, MO_64); + if (neg) { + gen_vfp_negd(t1, t1); + } + gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR)); + write_fp_dreg(s, a->rd, t0); + } + break; + case MO_32: + if (fp_access_check(s)) { + TCGv_i32 t0 = read_fp_sreg(s, a->rd); + TCGv_i32 t1 = read_fp_sreg(s, a->rn); + TCGv_i32 t2 = tcg_temp_new_i32(); + + read_vec_element_i32(s, t2, a->rm, a->idx, MO_32); + if (neg) { + gen_vfp_negs(t1, t1); + } + gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR)); + write_fp_sreg(s, a->rd, t0); + } + break; + case MO_16: + if (!dc_isar_feature(aa64_fp16, s)) { + return false; + } + if (fp_access_check(s)) { + TCGv_i32 t0 = read_fp_hreg(s, a->rd); + TCGv_i32 t1 = read_fp_hreg(s, a->rn); + TCGv_i32 t2 = tcg_temp_new_i32(); + + read_vec_element_i32(s, t2, a->rm, a->idx, MO_16); + if (neg) { + gen_vfp_negh(t1, t1); + } + gen_helper_advsimd_muladdh(t0, t1, t2, t0, + fpstatus_ptr(FPST_FPCR_F16)); + write_fp_sreg(s, a->rd, t0); + } + break; + default: + g_assert_not_reached(); + } + return true; +} + +TRANS(FMLA_si, do_fmla_scalar_idx, a, false) +TRANS(FMLS_si, do_fmla_scalar_idx, a, true) + +static bool do_env_scalar2_idx_hs(DisasContext *s, arg_rrx_e *a, + const ENVScalar2 *f) +{ + if (a->esz < MO_16 || a->esz > MO_32) { + return false; + } + if (fp_access_check(s)) { + TCGv_i32 t0 = tcg_temp_new_i32(); + TCGv_i32 t1 = tcg_temp_new_i32(); + + read_vec_element_i32(s, t0, a->rn, 0, a->esz); + read_vec_element_i32(s, t1, a->rm, a->idx, a->esz); + f->gen_bhs[a->esz](t0, tcg_env, t0, t1); + write_fp_sreg(s, a->rd, t0); + } + return true; +} + +TRANS(SQDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqdmulh) +TRANS(SQRDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqrdmulh) + +static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a, + gen_helper_gvec_3_ptr * const fns[3]) +{ + MemOp esz = a->esz; + + switch (esz) { + case MO_64: + if (!a->q) { + return false; + } + break; + case MO_32: + break; + case MO_16: + if (!dc_isar_feature(aa64_fp16, s)) { + return false; + } + break; + default: + g_assert_not_reached(); + } + if (fp_access_check(s)) { + gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm, + esz == MO_16, a->idx, fns[esz - 1]); + } + return true; +} + +static gen_helper_gvec_3_ptr * const f_vector_idx_fmul[3] = { + gen_helper_gvec_fmul_idx_h, + gen_helper_gvec_fmul_idx_s, + gen_helper_gvec_fmul_idx_d, +}; +TRANS(FMUL_vi, do_fp3_vector_idx, a, f_vector_idx_fmul) + +static gen_helper_gvec_3_ptr * const f_vector_idx_fmulx[3] = { + gen_helper_gvec_fmulx_idx_h, + gen_helper_gvec_fmulx_idx_s, + gen_helper_gvec_fmulx_idx_d, +}; +TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx) + +static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg) +{ + static gen_helper_gvec_4_ptr * const fns[3] = { + gen_helper_gvec_fmla_idx_h, + gen_helper_gvec_fmla_idx_s, + gen_helper_gvec_fmla_idx_d, + }; + MemOp esz = a->esz; + + switch (esz) { + case MO_64: + if (!a->q) { + return false; + } + break; + case MO_32: + break; + case MO_16: + if (!dc_isar_feature(aa64_fp16, s)) { + return false; + } + break; + default: + g_assert_not_reached(); + } + if (fp_access_check(s)) { + gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd, + esz == MO_16, (a->idx << 1) | neg, + fns[esz - 1]); + } + return true; +} + +TRANS(FMLA_vi, do_fmla_vector_idx, a, false) +TRANS(FMLS_vi, do_fmla_vector_idx, a, true) + +static bool do_fmlal_idx(DisasContext *s, arg_qrrx_e *a, bool is_s, bool is_2) +{ + if (fp_access_check(s)) { + int data = (a->idx << 2) | (is_2 << 1) | is_s; + tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd), + vec_full_reg_offset(s, a->rn), + vec_full_reg_offset(s, a->rm), tcg_env, + a->q ? 16 : 8, vec_full_reg_size(s), + data, gen_helper_gvec_fmlal_idx_a64); + } + return true; +} + +TRANS_FEAT(FMLAL_vi, aa64_fhm, do_fmlal_idx, a, false, false) +TRANS_FEAT(FMLSL_vi, aa64_fhm, do_fmlal_idx, a, true, false) +TRANS_FEAT(FMLAL2_vi, aa64_fhm, do_fmlal_idx, a, false, true) +TRANS_FEAT(FMLSL2_vi, aa64_fhm, do_fmlal_idx, a, true, true) + +static bool do_int3_vector_idx(DisasContext *s, arg_qrrx_e *a, + gen_helper_gvec_3 * const fns[2]) +{ + assert(a->esz == MO_16 || a->esz == MO_32); + if (fp_access_check(s)) { + gen_gvec_op3_ool(s, a->q, a->rd, a->rn, a->rm, a->idx, fns[a->esz - 1]); + } + return true; +} + +static gen_helper_gvec_3 * const f_vector_idx_mul[2] = { + gen_helper_gvec_mul_idx_h, + gen_helper_gvec_mul_idx_s, +}; +TRANS(MUL_vi, do_int3_vector_idx, a, f_vector_idx_mul) + +static bool do_mla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool sub) +{ + static gen_helper_gvec_4 * const fns[2][2] = { + { gen_helper_gvec_mla_idx_h, gen_helper_gvec_mls_idx_h }, + { gen_helper_gvec_mla_idx_s, gen_helper_gvec_mls_idx_s }, + }; + + assert(a->esz == MO_16 || a->esz == MO_32); + if (fp_access_check(s)) { + gen_gvec_op4_ool(s, a->q, a->rd, a->rn, a->rm, a->rd, + a->idx, fns[a->esz - 1][sub]); + } + return true; +} + +TRANS(MLA_vi, do_mla_vector_idx, a, false) +TRANS(MLS_vi, do_mla_vector_idx, a, true) + +static bool do_int3_qc_vector_idx(DisasContext *s, arg_qrrx_e *a, + gen_helper_gvec_4 * const fns[2]) +{ + assert(a->esz == MO_16 || a->esz == MO_32); + if (fp_access_check(s)) { + tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd), + vec_full_reg_offset(s, a->rn), + vec_full_reg_offset(s, a->rm), + offsetof(CPUARMState, vfp.qc), + a->q ? 16 : 8, vec_full_reg_size(s), + a->idx, fns[a->esz - 1]); + } + return true; +} + +static gen_helper_gvec_4 * const f_vector_idx_sqdmulh[2] = { + gen_helper_neon_sqdmulh_idx_h, + gen_helper_neon_sqdmulh_idx_s, +}; +TRANS(SQDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqdmulh) + +static gen_helper_gvec_4 * const f_vector_idx_sqrdmulh[2] = { + gen_helper_neon_sqrdmulh_idx_h, + gen_helper_neon_sqrdmulh_idx_s, +}; +TRANS(SQRDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqrdmulh) + +/* + * Advanced SIMD scalar pairwise + */ + +static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f) +{ + switch (a->esz) { + case MO_64: + if (fp_access_check(s)) { + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + + read_vec_element(s, t0, a->rn, 0, MO_64); + read_vec_element(s, t1, a->rn, 1, MO_64); + f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); + write_fp_dreg(s, a->rd, t0); + } + break; + case MO_32: + if (fp_access_check(s)) { + TCGv_i32 t0 = tcg_temp_new_i32(); + TCGv_i32 t1 = tcg_temp_new_i32(); + + read_vec_element_i32(s, t0, a->rn, 0, MO_32); + read_vec_element_i32(s, t1, a->rn, 1, MO_32); + f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR)); + write_fp_sreg(s, a->rd, t0); + } + break; + case MO_16: + if (!dc_isar_feature(aa64_fp16, s)) { + return false; + } + if (fp_access_check(s)) { + TCGv_i32 t0 = tcg_temp_new_i32(); + TCGv_i32 t1 = tcg_temp_new_i32(); + + read_vec_element_i32(s, t0, a->rn, 0, MO_16); + read_vec_element_i32(s, t1, a->rn, 1, MO_16); + f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16)); + write_fp_sreg(s, a->rd, t0); + } + break; + default: + g_assert_not_reached(); + } + return true; +} + +TRANS(FADDP_s, do_fp3_scalar_pair, a, &f_scalar_fadd) +TRANS(FMAXP_s, do_fp3_scalar_pair, a, &f_scalar_fmax) +TRANS(FMINP_s, do_fp3_scalar_pair, a, &f_scalar_fmin) +TRANS(FMAXNMP_s, do_fp3_scalar_pair, a, &f_scalar_fmaxnm) +TRANS(FMINNMP_s, do_fp3_scalar_pair, a, &f_scalar_fminnm) + +static bool trans_ADDP_s(DisasContext *s, arg_rr_e *a) +{ + if (fp_access_check(s)) { + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + + read_vec_element(s, t0, a->rn, 0, MO_64); + read_vec_element(s, t1, a->rn, 1, MO_64); + tcg_gen_add_i64(t0, t0, t1); + write_fp_dreg(s, a->rd, t0); + } + return true; +} + +/* + * Floating-point conditional select + */ + +static bool trans_FCSEL(DisasContext *s, arg_FCSEL *a) +{ + TCGv_i64 t_true, t_false; + DisasCompare64 c; + + switch (a->esz) { + case MO_32: + case MO_64: + break; + case MO_16: + if (!dc_isar_feature(aa64_fp16, s)) { + return false; + } + break; + default: + return false; + } + + if (!fp_access_check(s)) { + return true; + } + + /* Zero extend sreg & hreg inputs to 64 bits now. */ + t_true = tcg_temp_new_i64(); + t_false = tcg_temp_new_i64(); + read_vec_element(s, t_true, a->rn, 0, a->esz); + read_vec_element(s, t_false, a->rm, 0, a->esz); + + a64_test_cc(&c, a->cond); + tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0), + t_true, t_false); + + /* + * Note that sregs & hregs write back zeros to the high bits, + * and we've already done the zero-extension. + */ + write_fp_dreg(s, a->rd, t_true); + return true; +} + +/* + * Floating-point data-processing (3 source) + */ + +static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n) +{ + TCGv_ptr fpst; + + /* + * These are fused multiply-add. Note that doing the negations here + * as separate steps is correct: an input NaN should come out with + * its sign bit flipped if it is a negated-input. + */ + switch (a->esz) { + case MO_64: + if (fp_access_check(s)) { + TCGv_i64 tn = read_fp_dreg(s, a->rn); + TCGv_i64 tm = read_fp_dreg(s, a->rm); + TCGv_i64 ta = read_fp_dreg(s, a->ra); + + if (neg_a) { + gen_vfp_negd(ta, ta); + } + if (neg_n) { + gen_vfp_negd(tn, tn); + } + fpst = fpstatus_ptr(FPST_FPCR); + gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst); + write_fp_dreg(s, a->rd, ta); + } + break; + + case MO_32: + if (fp_access_check(s)) { + TCGv_i32 tn = read_fp_sreg(s, a->rn); + TCGv_i32 tm = read_fp_sreg(s, a->rm); + TCGv_i32 ta = read_fp_sreg(s, a->ra); + + if (neg_a) { + gen_vfp_negs(ta, ta); + } + if (neg_n) { + gen_vfp_negs(tn, tn); + } + fpst = fpstatus_ptr(FPST_FPCR); + gen_helper_vfp_muladds(ta, tn, tm, ta, fpst); + write_fp_sreg(s, a->rd, ta); + } + break; + + case MO_16: + if (!dc_isar_feature(aa64_fp16, s)) { + return false; + } + if (fp_access_check(s)) { + TCGv_i32 tn = read_fp_hreg(s, a->rn); + TCGv_i32 tm = read_fp_hreg(s, a->rm); + TCGv_i32 ta = read_fp_hreg(s, a->ra); + + if (neg_a) { + gen_vfp_negh(ta, ta); + } + if (neg_n) { + gen_vfp_negh(tn, tn); + } + fpst = fpstatus_ptr(FPST_FPCR_F16); + gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst); + write_fp_sreg(s, a->rd, ta); + } + break; + + default: + return false; + } + return true; +} + +TRANS(FMADD, do_fmadd, a, false, false) +TRANS(FNMADD, do_fmadd, a, true, true) +TRANS(FMSUB, do_fmadd, a, false, true) +TRANS(FNMSUB, do_fmadd, a, true, false) + /* Shift a TCGv src by TCGv shift_amount, put result in dst. * Note that it is the caller's responsibility to ensure that the * shift amount is in range (ie 0..31 or 0..63) and provide the ARM @@ -5945,68 +7417,6 @@ static void disas_fp_ccomp(DisasContext *s, uint32_t insn) } } -/* Floating point conditional select - * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 - * +---+---+---+-----------+------+---+------+------+-----+------+------+ - * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | cond | 1 1 | Rn | Rd | - * +---+---+---+-----------+------+---+------+------+-----+------+------+ - */ -static void disas_fp_csel(DisasContext *s, uint32_t insn) -{ - unsigned int mos, type, rm, cond, rn, rd; - TCGv_i64 t_true, t_false; - DisasCompare64 c; - MemOp sz; - - mos = extract32(insn, 29, 3); - type = extract32(insn, 22, 2); - rm = extract32(insn, 16, 5); - cond = extract32(insn, 12, 4); - rn = extract32(insn, 5, 5); - rd = extract32(insn, 0, 5); - - if (mos) { - unallocated_encoding(s); - return; - } - - switch (type) { - case 0: - sz = MO_32; - break; - case 1: - sz = MO_64; - break; - case 3: - sz = MO_16; - if (dc_isar_feature(aa64_fp16, s)) { - break; - } - /* fallthru */ - default: - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } - - /* Zero extend sreg & hreg inputs to 64 bits now. */ - t_true = tcg_temp_new_i64(); - t_false = tcg_temp_new_i64(); - read_vec_element(s, t_true, rn, 0, sz); - read_vec_element(s, t_false, rm, 0, sz); - - a64_test_cc(&c, cond); - tcg_gen_movcond_i64(c.cond, t_true, c.value, tcg_constant_i64(0), - t_true, t_false); - - /* Note that sregs & hregs write back zeros to the high bits, - and we've already done the zero-extension. */ - write_fp_dreg(s, rd, t_true); -} - /* Floating-point data-processing (1 source) - half precision */ static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn) { @@ -6019,10 +7429,10 @@ static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn) tcg_gen_mov_i32(tcg_res, tcg_op); break; case 0x1: /* FABS */ - tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff); + gen_vfp_absh(tcg_res, tcg_op); break; case 0x2: /* FNEG */ - tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); + gen_vfp_negh(tcg_res, tcg_op); break; case 0x3: /* FSQRT */ fpst = fpstatus_ptr(FPST_FPCR_F16); @@ -6073,10 +7483,10 @@ static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn) tcg_gen_mov_i32(tcg_res, tcg_op); goto done; case 0x1: /* FABS */ - gen_helper_vfp_abss(tcg_res, tcg_op); + gen_vfp_abss(tcg_res, tcg_op); goto done; case 0x2: /* FNEG */ - gen_helper_vfp_negs(tcg_res, tcg_op); + gen_vfp_negs(tcg_res, tcg_op); goto done; case 0x3: /* FSQRT */ gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env); @@ -6148,10 +7558,10 @@ static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn) switch (opcode) { case 0x1: /* FABS */ - gen_helper_vfp_absd(tcg_res, tcg_op); + gen_vfp_absd(tcg_res, tcg_op); goto done; case 0x2: /* FNEG */ - gen_helper_vfp_negd(tcg_res, tcg_op); + gen_vfp_negd(tcg_res, tcg_op); goto done; case 0x3: /* FSQRT */ gen_helper_vfp_sqrtd(tcg_res, tcg_op, tcg_env); @@ -6360,346 +7770,6 @@ static void disas_fp_1src(DisasContext *s, uint32_t insn) } } -/* Floating-point data-processing (2 source) - single precision */ -static void handle_fp_2src_single(DisasContext *s, int opcode, - int rd, int rn, int rm) -{ - TCGv_i32 tcg_op1; - TCGv_i32 tcg_op2; - TCGv_i32 tcg_res; - TCGv_ptr fpst; - - tcg_res = tcg_temp_new_i32(); - fpst = fpstatus_ptr(FPST_FPCR); - tcg_op1 = read_fp_sreg(s, rn); - tcg_op2 = read_fp_sreg(s, rm); - - switch (opcode) { - case 0x0: /* FMUL */ - gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x1: /* FDIV */ - gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x2: /* FADD */ - gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x3: /* FSUB */ - gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x4: /* FMAX */ - gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x5: /* FMIN */ - gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x6: /* FMAXNM */ - gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x7: /* FMINNM */ - gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x8: /* FNMUL */ - gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst); - gen_helper_vfp_negs(tcg_res, tcg_res); - break; - } - - write_fp_sreg(s, rd, tcg_res); -} - -/* Floating-point data-processing (2 source) - double precision */ -static void handle_fp_2src_double(DisasContext *s, int opcode, - int rd, int rn, int rm) -{ - TCGv_i64 tcg_op1; - TCGv_i64 tcg_op2; - TCGv_i64 tcg_res; - TCGv_ptr fpst; - - tcg_res = tcg_temp_new_i64(); - fpst = fpstatus_ptr(FPST_FPCR); - tcg_op1 = read_fp_dreg(s, rn); - tcg_op2 = read_fp_dreg(s, rm); - - switch (opcode) { - case 0x0: /* FMUL */ - gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x1: /* FDIV */ - gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x2: /* FADD */ - gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x3: /* FSUB */ - gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x4: /* FMAX */ - gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x5: /* FMIN */ - gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x6: /* FMAXNM */ - gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x7: /* FMINNM */ - gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x8: /* FNMUL */ - gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst); - gen_helper_vfp_negd(tcg_res, tcg_res); - break; - } - - write_fp_dreg(s, rd, tcg_res); -} - -/* Floating-point data-processing (2 source) - half precision */ -static void handle_fp_2src_half(DisasContext *s, int opcode, - int rd, int rn, int rm) -{ - TCGv_i32 tcg_op1; - TCGv_i32 tcg_op2; - TCGv_i32 tcg_res; - TCGv_ptr fpst; - - tcg_res = tcg_temp_new_i32(); - fpst = fpstatus_ptr(FPST_FPCR_F16); - tcg_op1 = read_fp_hreg(s, rn); - tcg_op2 = read_fp_hreg(s, rm); - - switch (opcode) { - case 0x0: /* FMUL */ - gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x1: /* FDIV */ - gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x2: /* FADD */ - gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x3: /* FSUB */ - gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x4: /* FMAX */ - gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x5: /* FMIN */ - gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x6: /* FMAXNM */ - gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x7: /* FMINNM */ - gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x8: /* FNMUL */ - gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst); - tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000); - break; - default: - g_assert_not_reached(); - } - - write_fp_sreg(s, rd, tcg_res); -} - -/* Floating point data-processing (2 source) - * 31 30 29 28 24 23 22 21 20 16 15 12 11 10 9 5 4 0 - * +---+---+---+-----------+------+---+------+--------+-----+------+------+ - * | M | 0 | S | 1 1 1 1 0 | type | 1 | Rm | opcode | 1 0 | Rn | Rd | - * +---+---+---+-----------+------+---+------+--------+-----+------+------+ - */ -static void disas_fp_2src(DisasContext *s, uint32_t insn) -{ - int mos = extract32(insn, 29, 3); - int type = extract32(insn, 22, 2); - int rd = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int rm = extract32(insn, 16, 5); - int opcode = extract32(insn, 12, 4); - - if (opcode > 8 || mos) { - unallocated_encoding(s); - return; - } - - switch (type) { - case 0: - if (!fp_access_check(s)) { - return; - } - handle_fp_2src_single(s, opcode, rd, rn, rm); - break; - case 1: - if (!fp_access_check(s)) { - return; - } - handle_fp_2src_double(s, opcode, rd, rn, rm); - break; - case 3: - if (!dc_isar_feature(aa64_fp16, s)) { - unallocated_encoding(s); - return; - } - if (!fp_access_check(s)) { - return; - } - handle_fp_2src_half(s, opcode, rd, rn, rm); - break; - default: - unallocated_encoding(s); - } -} - -/* Floating-point data-processing (3 source) - single precision */ -static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1, - int rd, int rn, int rm, int ra) -{ - TCGv_i32 tcg_op1, tcg_op2, tcg_op3; - TCGv_i32 tcg_res = tcg_temp_new_i32(); - TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); - - tcg_op1 = read_fp_sreg(s, rn); - tcg_op2 = read_fp_sreg(s, rm); - tcg_op3 = read_fp_sreg(s, ra); - - /* These are fused multiply-add, and must be done as one - * floating point operation with no rounding between the - * multiplication and addition steps. - * NB that doing the negations here as separate steps is - * correct : an input NaN should come out with its sign bit - * flipped if it is a negated-input. - */ - if (o1 == true) { - gen_helper_vfp_negs(tcg_op3, tcg_op3); - } - - if (o0 != o1) { - gen_helper_vfp_negs(tcg_op1, tcg_op1); - } - - gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); - - write_fp_sreg(s, rd, tcg_res); -} - -/* Floating-point data-processing (3 source) - double precision */ -static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1, - int rd, int rn, int rm, int ra) -{ - TCGv_i64 tcg_op1, tcg_op2, tcg_op3; - TCGv_i64 tcg_res = tcg_temp_new_i64(); - TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); - - tcg_op1 = read_fp_dreg(s, rn); - tcg_op2 = read_fp_dreg(s, rm); - tcg_op3 = read_fp_dreg(s, ra); - - /* These are fused multiply-add, and must be done as one - * floating point operation with no rounding between the - * multiplication and addition steps. - * NB that doing the negations here as separate steps is - * correct : an input NaN should come out with its sign bit - * flipped if it is a negated-input. - */ - if (o1 == true) { - gen_helper_vfp_negd(tcg_op3, tcg_op3); - } - - if (o0 != o1) { - gen_helper_vfp_negd(tcg_op1, tcg_op1); - } - - gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); - - write_fp_dreg(s, rd, tcg_res); -} - -/* Floating-point data-processing (3 source) - half precision */ -static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1, - int rd, int rn, int rm, int ra) -{ - TCGv_i32 tcg_op1, tcg_op2, tcg_op3; - TCGv_i32 tcg_res = tcg_temp_new_i32(); - TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_F16); - - tcg_op1 = read_fp_hreg(s, rn); - tcg_op2 = read_fp_hreg(s, rm); - tcg_op3 = read_fp_hreg(s, ra); - - /* These are fused multiply-add, and must be done as one - * floating point operation with no rounding between the - * multiplication and addition steps. - * NB that doing the negations here as separate steps is - * correct : an input NaN should come out with its sign bit - * flipped if it is a negated-input. - */ - if (o1 == true) { - tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000); - } - - if (o0 != o1) { - tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000); - } - - gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst); - - write_fp_sreg(s, rd, tcg_res); -} - -/* Floating point data-processing (3 source) - * 31 30 29 28 24 23 22 21 20 16 15 14 10 9 5 4 0 - * +---+---+---+-----------+------+----+------+----+------+------+------+ - * | M | 0 | S | 1 1 1 1 1 | type | o1 | Rm | o0 | Ra | Rn | Rd | - * +---+---+---+-----------+------+----+------+----+------+------+------+ - */ -static void disas_fp_3src(DisasContext *s, uint32_t insn) -{ - int mos = extract32(insn, 29, 3); - int type = extract32(insn, 22, 2); - int rd = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int ra = extract32(insn, 10, 5); - int rm = extract32(insn, 16, 5); - bool o0 = extract32(insn, 15, 1); - bool o1 = extract32(insn, 21, 1); - - if (mos) { - unallocated_encoding(s); - return; - } - - switch (type) { - case 0: - if (!fp_access_check(s)) { - return; - } - handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra); - break; - case 1: - if (!fp_access_check(s)) { - return; - } - handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra); - break; - case 3: - if (!dc_isar_feature(aa64_fp16, s)) { - unallocated_encoding(s); - return; - } - if (!fp_access_check(s)) { - return; - } - handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra); - break; - default: - unallocated_encoding(s); - } -} - /* Floating point immediate * 31 30 29 28 24 23 22 21 20 13 12 10 9 5 4 0 * +---+---+---+-----------+------+---+------------+-------+------+------+ @@ -7144,8 +8214,7 @@ static void disas_fp_int_conv(DisasContext *s, uint32_t insn) static void disas_data_proc_fp(DisasContext *s, uint32_t insn) { if (extract32(insn, 24, 1)) { - /* Floating point data-processing (3 source) */ - disas_fp_3src(s, insn); + unallocated_encoding(s); /* in decodetree */ } else if (extract32(insn, 21, 1) == 0) { /* Floating point to fixed point conversions */ disas_fp_fixed_conv(s, insn); @@ -7157,11 +8226,11 @@ static void disas_data_proc_fp(DisasContext *s, uint32_t insn) break; case 2: /* Floating point data-processing (2 source) */ - disas_fp_2src(s, insn); + unallocated_encoding(s); /* in decodetree */ break; case 3: /* Floating point conditional select */ - disas_fp_csel(s, insn); + unallocated_encoding(s); /* in decodetree */ break; case 0: switch (ctz32(extract32(insn, 12, 4))) { @@ -7619,268 +8688,6 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn) write_fp_dreg(s, rd, tcg_res); } -/* DUP (Element, Vector) - * - * 31 30 29 21 20 16 15 10 9 5 4 0 - * +---+---+-------------------+--------+-------------+------+------+ - * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd | - * +---+---+-------------------+--------+-------------+------+------+ - * - * size: encoded in imm5 (see ARM ARM LowestSetBit()) - */ -static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn, - int imm5) -{ - int size = ctz32(imm5); - int index; - - if (size > 3 || (size == 3 && !is_q)) { - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } - - index = imm5 >> (size + 1); - tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd), - vec_reg_offset(s, rn, index, size), - is_q ? 16 : 8, vec_full_reg_size(s)); -} - -/* DUP (element, scalar) - * 31 21 20 16 15 10 9 5 4 0 - * +-----------------------+--------+-------------+------+------+ - * | 0 1 0 1 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 0 1 | Rn | Rd | - * +-----------------------+--------+-------------+------+------+ - */ -static void handle_simd_dupes(DisasContext *s, int rd, int rn, - int imm5) -{ - int size = ctz32(imm5); - int index; - TCGv_i64 tmp; - - if (size > 3) { - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } - - index = imm5 >> (size + 1); - - /* This instruction just extracts the specified element and - * zero-extends it into the bottom of the destination register. - */ - tmp = tcg_temp_new_i64(); - read_vec_element(s, tmp, rn, index, size); - write_fp_dreg(s, rd, tmp); -} - -/* DUP (General) - * - * 31 30 29 21 20 16 15 10 9 5 4 0 - * +---+---+-------------------+--------+-------------+------+------+ - * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 0 1 1 | Rn | Rd | - * +---+---+-------------------+--------+-------------+------+------+ - * - * size: encoded in imm5 (see ARM ARM LowestSetBit()) - */ -static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn, - int imm5) -{ - int size = ctz32(imm5); - uint32_t dofs, oprsz, maxsz; - - if (size > 3 || ((size == 3) && !is_q)) { - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } - - dofs = vec_full_reg_offset(s, rd); - oprsz = is_q ? 16 : 8; - maxsz = vec_full_reg_size(s); - - tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn)); -} - -/* INS (Element) - * - * 31 21 20 16 15 14 11 10 9 5 4 0 - * +-----------------------+--------+------------+---+------+------+ - * | 0 1 1 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd | - * +-----------------------+--------+------------+---+------+------+ - * - * size: encoded in imm5 (see ARM ARM LowestSetBit()) - * index: encoded in imm5<4:size+1> - */ -static void handle_simd_inse(DisasContext *s, int rd, int rn, - int imm4, int imm5) -{ - int size = ctz32(imm5); - int src_index, dst_index; - TCGv_i64 tmp; - - if (size > 3) { - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } - - dst_index = extract32(imm5, 1+size, 5); - src_index = extract32(imm4, size, 4); - - tmp = tcg_temp_new_i64(); - - read_vec_element(s, tmp, rn, src_index, size); - write_vec_element(s, tmp, rd, dst_index, size); - - /* INS is considered a 128-bit write for SVE. */ - clear_vec_high(s, true, rd); -} - - -/* INS (General) - * - * 31 21 20 16 15 10 9 5 4 0 - * +-----------------------+--------+-------------+------+------+ - * | 0 1 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 0 1 1 1 | Rn | Rd | - * +-----------------------+--------+-------------+------+------+ - * - * size: encoded in imm5 (see ARM ARM LowestSetBit()) - * index: encoded in imm5<4:size+1> - */ -static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5) -{ - int size = ctz32(imm5); - int idx; - - if (size > 3) { - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } - - idx = extract32(imm5, 1 + size, 4 - size); - write_vec_element(s, cpu_reg(s, rn), rd, idx, size); - - /* INS is considered a 128-bit write for SVE. */ - clear_vec_high(s, true, rd); -} - -/* - * UMOV (General) - * SMOV (General) - * - * 31 30 29 21 20 16 15 12 10 9 5 4 0 - * +---+---+-------------------+--------+-------------+------+------+ - * | 0 | Q | 0 0 1 1 1 0 0 0 0 | imm5 | 0 0 1 U 1 1 | Rn | Rd | - * +---+---+-------------------+--------+-------------+------+------+ - * - * U: unsigned when set - * size: encoded in imm5 (see ARM ARM LowestSetBit()) - */ -static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed, - int rn, int rd, int imm5) -{ - int size = ctz32(imm5); - int element; - TCGv_i64 tcg_rd; - - /* Check for UnallocatedEncodings */ - if (is_signed) { - if (size > 2 || (size == 2 && !is_q)) { - unallocated_encoding(s); - return; - } - } else { - if (size > 3 - || (size < 3 && is_q) - || (size == 3 && !is_q)) { - unallocated_encoding(s); - return; - } - } - - if (!fp_access_check(s)) { - return; - } - - element = extract32(imm5, 1+size, 4); - - tcg_rd = cpu_reg(s, rd); - read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0)); - if (is_signed && !is_q) { - tcg_gen_ext32u_i64(tcg_rd, tcg_rd); - } -} - -/* AdvSIMD copy - * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0 - * +---+---+----+-----------------+------+---+------+---+------+------+ - * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd | - * +---+---+----+-----------------+------+---+------+---+------+------+ - */ -static void disas_simd_copy(DisasContext *s, uint32_t insn) -{ - int rd = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int imm4 = extract32(insn, 11, 4); - int op = extract32(insn, 29, 1); - int is_q = extract32(insn, 30, 1); - int imm5 = extract32(insn, 16, 5); - - if (op) { - if (is_q) { - /* INS (element) */ - handle_simd_inse(s, rd, rn, imm4, imm5); - } else { - unallocated_encoding(s); - } - } else { - switch (imm4) { - case 0: - /* DUP (element - vector) */ - handle_simd_dupe(s, is_q, rd, rn, imm5); - break; - case 1: - /* DUP (general) */ - handle_simd_dupg(s, is_q, rd, rn, imm5); - break; - case 3: - if (is_q) { - /* INS (general) */ - handle_simd_insg(s, rd, rn, imm5); - } else { - unallocated_encoding(s); - } - break; - case 5: - case 7: - /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */ - handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5); - break; - default: - unallocated_encoding(s); - break; - } - } -} - /* AdvSIMD modified immediate * 31 30 29 28 19 18 16 15 12 11 10 9 5 4 0 * +---+---+----+---------------------+-----+-------+----+---+-------+------+ @@ -7905,27 +8712,31 @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn) bool is_q = extract32(insn, 30, 1); uint64_t imm = 0; - if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) { - /* Check for FMOV (vector, immediate) - half-precision */ - if (!(dc_isar_feature(aa64_fp16, s) && o2 && cmode == 0xf)) { + if (o2) { + if (cmode != 0xf || is_neg) { unallocated_encoding(s); return; } - } - - if (!fp_access_check(s)) { - return; - } - - if (cmode == 15 && o2 && !is_neg) { /* FMOV (vector, immediate) - half-precision */ + if (!dc_isar_feature(aa64_fp16, s)) { + unallocated_encoding(s); + return; + } imm = vfp_expand_imm(MO_16, abcdefgh); /* now duplicate across the lanes */ imm = dup_const(MO_16, imm); } else { + if (cmode == 0xf && is_neg && !is_q) { + unallocated_encoding(s); + return; + } imm = asimd_imm_const(abcdefgh, cmode, is_neg); } + if (!fp_access_check(s)) { + return; + } + if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) { /* MOVI or MVNI, with MVNI negation handled above. */ tcg_gen_gvec_dup_imm(MO_64, vec_full_reg_offset(s, rd), is_q ? 16 : 8, @@ -7940,176 +8751,6 @@ static void disas_simd_mod_imm(DisasContext *s, uint32_t insn) } } -/* AdvSIMD scalar copy - * 31 30 29 28 21 20 16 15 14 11 10 9 5 4 0 - * +-----+----+-----------------+------+---+------+---+------+------+ - * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 | Rn | Rd | - * +-----+----+-----------------+------+---+------+---+------+------+ - */ -static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn) -{ - int rd = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int imm4 = extract32(insn, 11, 4); - int imm5 = extract32(insn, 16, 5); - int op = extract32(insn, 29, 1); - - if (op != 0 || imm4 != 0) { - unallocated_encoding(s); - return; - } - - /* DUP (element, scalar) */ - handle_simd_dupes(s, rd, rn, imm5); -} - -/* AdvSIMD scalar pairwise - * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 - * +-----+---+-----------+------+-----------+--------+-----+------+------+ - * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 | Rn | Rd | - * +-----+---+-----------+------+-----------+--------+-----+------+------+ - */ -static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn) -{ - int u = extract32(insn, 29, 1); - int size = extract32(insn, 22, 2); - int opcode = extract32(insn, 12, 5); - int rn = extract32(insn, 5, 5); - int rd = extract32(insn, 0, 5); - TCGv_ptr fpst; - - /* For some ops (the FP ones), size[1] is part of the encoding. - * For ADDP strictly it is not but size[1] is always 1 for valid - * encodings. - */ - opcode |= (extract32(size, 1, 1) << 5); - - switch (opcode) { - case 0x3b: /* ADDP */ - if (u || size != 3) { - unallocated_encoding(s); - return; - } - if (!fp_access_check(s)) { - return; - } - - fpst = NULL; - break; - case 0xc: /* FMAXNMP */ - case 0xd: /* FADDP */ - case 0xf: /* FMAXP */ - case 0x2c: /* FMINNMP */ - case 0x2f: /* FMINP */ - /* FP op, size[0] is 32 or 64 bit*/ - if (!u) { - if (!dc_isar_feature(aa64_fp16, s)) { - unallocated_encoding(s); - return; - } else { - size = MO_16; - } - } else { - size = extract32(size, 0, 1) ? MO_64 : MO_32; - } - - if (!fp_access_check(s)) { - return; - } - - fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16 : FPST_FPCR); - break; - default: - unallocated_encoding(s); - return; - } - - if (size == MO_64) { - TCGv_i64 tcg_op1 = tcg_temp_new_i64(); - TCGv_i64 tcg_op2 = tcg_temp_new_i64(); - TCGv_i64 tcg_res = tcg_temp_new_i64(); - - read_vec_element(s, tcg_op1, rn, 0, MO_64); - read_vec_element(s, tcg_op2, rn, 1, MO_64); - - switch (opcode) { - case 0x3b: /* ADDP */ - tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2); - break; - case 0xc: /* FMAXNMP */ - gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0xd: /* FADDP */ - gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0xf: /* FMAXP */ - gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x2c: /* FMINNMP */ - gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x2f: /* FMINP */ - gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst); - break; - default: - g_assert_not_reached(); - } - - write_fp_dreg(s, rd, tcg_res); - } else { - TCGv_i32 tcg_op1 = tcg_temp_new_i32(); - TCGv_i32 tcg_op2 = tcg_temp_new_i32(); - TCGv_i32 tcg_res = tcg_temp_new_i32(); - - read_vec_element_i32(s, tcg_op1, rn, 0, size); - read_vec_element_i32(s, tcg_op2, rn, 1, size); - - if (size == MO_16) { - switch (opcode) { - case 0xc: /* FMAXNMP */ - gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0xd: /* FADDP */ - gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0xf: /* FMAXP */ - gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x2c: /* FMINNMP */ - gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x2f: /* FMINP */ - gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - default: - g_assert_not_reached(); - } - } else { - switch (opcode) { - case 0xc: /* FMAXNMP */ - gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0xd: /* FADDP */ - gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0xf: /* FMAXP */ - gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x2c: /* FMINNMP */ - gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x2f: /* FMINP */ - gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst); - break; - default: - g_assert_not_reached(); - } - } - - write_fp_sreg(s, rd, tcg_res); - } -} - /* * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate) * @@ -8708,6 +9349,9 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, read_vec_element_i32(s, tcg_op, rn, pass, size); fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); if (is_scalar) { + if (size == MO_16 && !is_u) { + tcg_gen_ext16u_i32(tcg_op, tcg_op); + } write_fp_sreg(s, rd, tcg_op); } else { write_vec_element_i32(s, tcg_op, rd, pass, size); @@ -8892,512 +9536,6 @@ static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn) } } -static void handle_3same_64(DisasContext *s, int opcode, bool u, - TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm) -{ - /* Handle 64x64->64 opcodes which are shared between the scalar - * and vector 3-same groups. We cover every opcode where size == 3 - * is valid in either the three-reg-same (integer, not pairwise) - * or scalar-three-reg-same groups. - */ - TCGCond cond; - - switch (opcode) { - case 0x1: /* SQADD */ - if (u) { - gen_helper_neon_qadd_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm); - } else { - gen_helper_neon_qadd_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm); - } - break; - case 0x5: /* SQSUB */ - if (u) { - gen_helper_neon_qsub_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm); - } else { - gen_helper_neon_qsub_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm); - } - break; - case 0x6: /* CMGT, CMHI */ - cond = u ? TCG_COND_GTU : TCG_COND_GT; - do_cmop: - /* 64 bit integer comparison, result = test ? -1 : 0. */ - tcg_gen_negsetcond_i64(cond, tcg_rd, tcg_rn, tcg_rm); - break; - case 0x7: /* CMGE, CMHS */ - cond = u ? TCG_COND_GEU : TCG_COND_GE; - goto do_cmop; - case 0x11: /* CMTST, CMEQ */ - if (u) { - cond = TCG_COND_EQ; - goto do_cmop; - } - gen_cmtst_i64(tcg_rd, tcg_rn, tcg_rm); - break; - case 0x8: /* SSHL, USHL */ - if (u) { - gen_ushl_i64(tcg_rd, tcg_rn, tcg_rm); - } else { - gen_sshl_i64(tcg_rd, tcg_rn, tcg_rm); - } - break; - case 0x9: /* SQSHL, UQSHL */ - if (u) { - gen_helper_neon_qshl_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm); - } else { - gen_helper_neon_qshl_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm); - } - break; - case 0xa: /* SRSHL, URSHL */ - if (u) { - gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm); - } else { - gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm); - } - break; - case 0xb: /* SQRSHL, UQRSHL */ - if (u) { - gen_helper_neon_qrshl_u64(tcg_rd, tcg_env, tcg_rn, tcg_rm); - } else { - gen_helper_neon_qrshl_s64(tcg_rd, tcg_env, tcg_rn, tcg_rm); - } - break; - case 0x10: /* ADD, SUB */ - if (u) { - tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm); - } else { - tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm); - } - break; - default: - g_assert_not_reached(); - } -} - -/* Handle the 3-same-operands float operations; shared by the scalar - * and vector encodings. The caller must filter out any encodings - * not allocated for the encoding it is dealing with. - */ -static void handle_3same_float(DisasContext *s, int size, int elements, - int fpopcode, int rd, int rn, int rm) -{ - int pass; - TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR); - - for (pass = 0; pass < elements; pass++) { - if (size) { - /* Double */ - TCGv_i64 tcg_op1 = tcg_temp_new_i64(); - TCGv_i64 tcg_op2 = tcg_temp_new_i64(); - TCGv_i64 tcg_res = tcg_temp_new_i64(); - - read_vec_element(s, tcg_op1, rn, pass, MO_64); - read_vec_element(s, tcg_op2, rm, pass, MO_64); - - switch (fpopcode) { - case 0x39: /* FMLS */ - /* As usual for ARM, separate negation for fused multiply-add */ - gen_helper_vfp_negd(tcg_op1, tcg_op1); - /* fall through */ - case 0x19: /* FMLA */ - read_vec_element(s, tcg_res, rd, pass, MO_64); - gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, - tcg_res, fpst); - break; - case 0x18: /* FMAXNM */ - gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x1a: /* FADD */ - gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x1b: /* FMULX */ - gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x1c: /* FCMEQ */ - gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x1e: /* FMAX */ - gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x1f: /* FRECPS */ - gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x38: /* FMINNM */ - gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x3a: /* FSUB */ - gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x3e: /* FMIN */ - gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x3f: /* FRSQRTS */ - gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x5b: /* FMUL */ - gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x5c: /* FCMGE */ - gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x5d: /* FACGE */ - gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x5f: /* FDIV */ - gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x7a: /* FABD */ - gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst); - gen_helper_vfp_absd(tcg_res, tcg_res); - break; - case 0x7c: /* FCMGT */ - gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x7d: /* FACGT */ - gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst); - break; - default: - g_assert_not_reached(); - } - - write_vec_element(s, tcg_res, rd, pass, MO_64); - } else { - /* Single */ - TCGv_i32 tcg_op1 = tcg_temp_new_i32(); - TCGv_i32 tcg_op2 = tcg_temp_new_i32(); - TCGv_i32 tcg_res = tcg_temp_new_i32(); - - read_vec_element_i32(s, tcg_op1, rn, pass, MO_32); - read_vec_element_i32(s, tcg_op2, rm, pass, MO_32); - - switch (fpopcode) { - case 0x39: /* FMLS */ - /* As usual for ARM, separate negation for fused multiply-add */ - gen_helper_vfp_negs(tcg_op1, tcg_op1); - /* fall through */ - case 0x19: /* FMLA */ - read_vec_element_i32(s, tcg_res, rd, pass, MO_32); - gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, - tcg_res, fpst); - break; - case 0x1a: /* FADD */ - gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x1b: /* FMULX */ - gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x1c: /* FCMEQ */ - gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x1e: /* FMAX */ - gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x1f: /* FRECPS */ - gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x18: /* FMAXNM */ - gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x38: /* FMINNM */ - gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x3a: /* FSUB */ - gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x3e: /* FMIN */ - gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x3f: /* FRSQRTS */ - gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x5b: /* FMUL */ - gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x5c: /* FCMGE */ - gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x5d: /* FACGE */ - gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x5f: /* FDIV */ - gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x7a: /* FABD */ - gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst); - gen_helper_vfp_abss(tcg_res, tcg_res); - break; - case 0x7c: /* FCMGT */ - gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x7d: /* FACGT */ - gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst); - break; - default: - g_assert_not_reached(); - } - - if (elements == 1) { - /* scalar single so clear high part */ - TCGv_i64 tcg_tmp = tcg_temp_new_i64(); - - tcg_gen_extu_i32_i64(tcg_tmp, tcg_res); - write_vec_element(s, tcg_tmp, rd, pass, MO_64); - } else { - write_vec_element_i32(s, tcg_res, rd, pass, MO_32); - } - } - } - - clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd); -} - -/* AdvSIMD scalar three same - * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0 - * +-----+---+-----------+------+---+------+--------+---+------+------+ - * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd | - * +-----+---+-----------+------+---+------+--------+---+------+------+ - */ -static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn) -{ - int rd = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int opcode = extract32(insn, 11, 5); - int rm = extract32(insn, 16, 5); - int size = extract32(insn, 22, 2); - bool u = extract32(insn, 29, 1); - TCGv_i64 tcg_rd; - - if (opcode >= 0x18) { - /* Floating point: U, size[1] and opcode indicate operation */ - int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6); - switch (fpopcode) { - case 0x1b: /* FMULX */ - case 0x1f: /* FRECPS */ - case 0x3f: /* FRSQRTS */ - case 0x5d: /* FACGE */ - case 0x7d: /* FACGT */ - case 0x1c: /* FCMEQ */ - case 0x5c: /* FCMGE */ - case 0x7c: /* FCMGT */ - case 0x7a: /* FABD */ - break; - default: - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } - - handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm); - return; - } - - switch (opcode) { - case 0x1: /* SQADD, UQADD */ - case 0x5: /* SQSUB, UQSUB */ - case 0x9: /* SQSHL, UQSHL */ - case 0xb: /* SQRSHL, UQRSHL */ - break; - case 0x8: /* SSHL, USHL */ - case 0xa: /* SRSHL, URSHL */ - case 0x6: /* CMGT, CMHI */ - case 0x7: /* CMGE, CMHS */ - case 0x11: /* CMTST, CMEQ */ - case 0x10: /* ADD, SUB (vector) */ - if (size != 3) { - unallocated_encoding(s); - return; - } - break; - case 0x16: /* SQDMULH, SQRDMULH (vector) */ - if (size != 1 && size != 2) { - unallocated_encoding(s); - return; - } - break; - default: - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } - - tcg_rd = tcg_temp_new_i64(); - - if (size == 3) { - TCGv_i64 tcg_rn = read_fp_dreg(s, rn); - TCGv_i64 tcg_rm = read_fp_dreg(s, rm); - - handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm); - } else { - /* Do a single operation on the lowest element in the vector. - * We use the standard Neon helpers and rely on 0 OP 0 == 0 with - * no side effects for all these operations. - * OPTME: special-purpose helpers would avoid doing some - * unnecessary work in the helper for the 8 and 16 bit cases. - */ - NeonGenTwoOpEnvFn *genenvfn; - TCGv_i32 tcg_rn = tcg_temp_new_i32(); - TCGv_i32 tcg_rm = tcg_temp_new_i32(); - TCGv_i32 tcg_rd32 = tcg_temp_new_i32(); - - read_vec_element_i32(s, tcg_rn, rn, 0, size); - read_vec_element_i32(s, tcg_rm, rm, 0, size); - - switch (opcode) { - case 0x1: /* SQADD, UQADD */ - { - static NeonGenTwoOpEnvFn * const fns[3][2] = { - { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 }, - { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 }, - { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 }, - }; - genenvfn = fns[size][u]; - break; - } - case 0x5: /* SQSUB, UQSUB */ - { - static NeonGenTwoOpEnvFn * const fns[3][2] = { - { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 }, - { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 }, - { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 }, - }; - genenvfn = fns[size][u]; - break; - } - case 0x9: /* SQSHL, UQSHL */ - { - static NeonGenTwoOpEnvFn * const fns[3][2] = { - { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 }, - { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 }, - { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 }, - }; - genenvfn = fns[size][u]; - break; - } - case 0xb: /* SQRSHL, UQRSHL */ - { - static NeonGenTwoOpEnvFn * const fns[3][2] = { - { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 }, - { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 }, - { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 }, - }; - genenvfn = fns[size][u]; - break; - } - case 0x16: /* SQDMULH, SQRDMULH */ - { - static NeonGenTwoOpEnvFn * const fns[2][2] = { - { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 }, - { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 }, - }; - assert(size == 1 || size == 2); - genenvfn = fns[size - 1][u]; - break; - } - default: - g_assert_not_reached(); - } - - genenvfn(tcg_rd32, tcg_env, tcg_rn, tcg_rm); - tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32); - } - - write_fp_dreg(s, rd, tcg_rd); -} - -/* AdvSIMD scalar three same FP16 - * 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0 - * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+ - * | 0 1 | U | 1 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd | - * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+ - * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400 - * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400 - */ -static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s, - uint32_t insn) -{ - int rd = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int opcode = extract32(insn, 11, 3); - int rm = extract32(insn, 16, 5); - bool u = extract32(insn, 29, 1); - bool a = extract32(insn, 23, 1); - int fpopcode = opcode | (a << 3) | (u << 4); - TCGv_ptr fpst; - TCGv_i32 tcg_op1; - TCGv_i32 tcg_op2; - TCGv_i32 tcg_res; - - switch (fpopcode) { - case 0x03: /* FMULX */ - case 0x04: /* FCMEQ (reg) */ - case 0x07: /* FRECPS */ - case 0x0f: /* FRSQRTS */ - case 0x14: /* FCMGE (reg) */ - case 0x15: /* FACGE */ - case 0x1a: /* FABD */ - case 0x1c: /* FCMGT (reg) */ - case 0x1d: /* FACGT */ - break; - default: - unallocated_encoding(s); - return; - } - - if (!dc_isar_feature(aa64_fp16, s)) { - unallocated_encoding(s); - } - - if (!fp_access_check(s)) { - return; - } - - fpst = fpstatus_ptr(FPST_FPCR_F16); - - tcg_op1 = read_fp_hreg(s, rn); - tcg_op2 = read_fp_hreg(s, rm); - tcg_res = tcg_temp_new_i32(); - - switch (fpopcode) { - case 0x03: /* FMULX */ - gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x04: /* FCMEQ (reg) */ - gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x07: /* FRECPS */ - gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x0f: /* FRSQRTS */ - gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x14: /* FCMGE (reg) */ - gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x15: /* FACGE */ - gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x1a: /* FABD */ - gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); - tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff); - break; - case 0x1c: /* FCMGT (reg) */ - gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x1d: /* FACGT */ - gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); - break; - default: - g_assert_not_reached(); - } - - write_fp_sreg(s, rd, tcg_res); -} - /* AdvSIMD scalar three same extra * 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0 * +-----+---+-----------+------+---+------+---+--------+---+----+----+ @@ -9529,10 +9667,10 @@ static void handle_2misc_64(DisasContext *s, int opcode, bool u, } break; case 0x2f: /* FABS */ - gen_helper_vfp_absd(tcg_rd, tcg_rn); + gen_vfp_absd(tcg_rd, tcg_rn); break; case 0x6f: /* FNEG */ - gen_helper_vfp_negd(tcg_rd, tcg_rn); + gen_vfp_negd(tcg_rd, tcg_rn); break; case 0x7f: /* FSQRT */ gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, tcg_env); @@ -9884,88 +10022,6 @@ static void handle_2misc_narrow(DisasContext *s, bool scalar, clear_vec_high(s, is_q, rd); } -/* Remaining saturating accumulating ops */ -static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u, - bool is_q, int size, int rn, int rd) -{ - bool is_double = (size == 3); - - if (is_double) { - TCGv_i64 tcg_rn = tcg_temp_new_i64(); - TCGv_i64 tcg_rd = tcg_temp_new_i64(); - int pass; - - for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { - read_vec_element(s, tcg_rn, rn, pass, MO_64); - read_vec_element(s, tcg_rd, rd, pass, MO_64); - - if (is_u) { /* USQADD */ - gen_helper_neon_uqadd_s64(tcg_rd, tcg_env, tcg_rn, tcg_rd); - } else { /* SUQADD */ - gen_helper_neon_sqadd_u64(tcg_rd, tcg_env, tcg_rn, tcg_rd); - } - write_vec_element(s, tcg_rd, rd, pass, MO_64); - } - clear_vec_high(s, !is_scalar, rd); - } else { - TCGv_i32 tcg_rn = tcg_temp_new_i32(); - TCGv_i32 tcg_rd = tcg_temp_new_i32(); - int pass, maxpasses; - - if (is_scalar) { - maxpasses = 1; - } else { - maxpasses = is_q ? 4 : 2; - } - - for (pass = 0; pass < maxpasses; pass++) { - if (is_scalar) { - read_vec_element_i32(s, tcg_rn, rn, pass, size); - read_vec_element_i32(s, tcg_rd, rd, pass, size); - } else { - read_vec_element_i32(s, tcg_rn, rn, pass, MO_32); - read_vec_element_i32(s, tcg_rd, rd, pass, MO_32); - } - - if (is_u) { /* USQADD */ - switch (size) { - case 0: - gen_helper_neon_uqadd_s8(tcg_rd, tcg_env, tcg_rn, tcg_rd); - break; - case 1: - gen_helper_neon_uqadd_s16(tcg_rd, tcg_env, tcg_rn, tcg_rd); - break; - case 2: - gen_helper_neon_uqadd_s32(tcg_rd, tcg_env, tcg_rn, tcg_rd); - break; - default: - g_assert_not_reached(); - } - } else { /* SUQADD */ - switch (size) { - case 0: - gen_helper_neon_sqadd_u8(tcg_rd, tcg_env, tcg_rn, tcg_rd); - break; - case 1: - gen_helper_neon_sqadd_u16(tcg_rd, tcg_env, tcg_rn, tcg_rd); - break; - case 2: - gen_helper_neon_sqadd_u32(tcg_rd, tcg_env, tcg_rn, tcg_rd); - break; - default: - g_assert_not_reached(); - } - } - - if (is_scalar) { - write_vec_element(s, tcg_constant_i64(0), rd, 0, MO_64); - } - write_vec_element_i32(s, tcg_rd, rd, pass, MO_32); - } - clear_vec_high(s, is_q, rd); - } -} - /* AdvSIMD scalar two reg misc * 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0 * +-----+---+-----------+------+-----------+--------+-----+------+------+ @@ -9985,12 +10041,6 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) TCGv_ptr tcg_fpstatus; switch (opcode) { - case 0x3: /* USQADD / SUQADD*/ - if (!fp_access_check(s)) { - return; - } - handle_2misc_satacc(s, true, u, false, size, rn, rd); - return; case 0x7: /* SQABS / SQNEG */ break; case 0xa: /* CMLT */ @@ -10090,6 +10140,7 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) } break; default: + case 0x3: /* USQADD / SUQADD */ unallocated_encoding(s); return; } @@ -10822,789 +10873,6 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) } } -/* Logic op (opcode == 3) subgroup of C3.6.16. */ -static void disas_simd_3same_logic(DisasContext *s, uint32_t insn) -{ - int rd = extract32(insn, 0, 5); - int rn = extract32(insn, 5, 5); - int rm = extract32(insn, 16, 5); - int size = extract32(insn, 22, 2); - bool is_u = extract32(insn, 29, 1); - bool is_q = extract32(insn, 30, 1); - - if (!fp_access_check(s)) { - return; - } - - switch (size + 4 * is_u) { - case 0: /* AND */ - gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0); - return; - case 1: /* BIC */ - gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0); - return; - case 2: /* ORR */ - gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0); - return; - case 3: /* ORN */ - gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0); - return; - case 4: /* EOR */ - gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0); - return; - - case 5: /* BSL bitwise select */ - gen_gvec_fn4(s, is_q, rd, rd, rn, rm, tcg_gen_gvec_bitsel, 0); - return; - case 6: /* BIT, bitwise insert if true */ - gen_gvec_fn4(s, is_q, rd, rm, rn, rd, tcg_gen_gvec_bitsel, 0); - return; - case 7: /* BIF, bitwise insert if false */ - gen_gvec_fn4(s, is_q, rd, rm, rd, rn, tcg_gen_gvec_bitsel, 0); - return; - - default: - g_assert_not_reached(); - } -} - -/* Pairwise op subgroup of C3.6.16. - * - * This is called directly or via the handle_3same_float for float pairwise - * operations where the opcode and size are calculated differently. - */ -static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode, - int size, int rn, int rm, int rd) -{ - TCGv_ptr fpst; - int pass; - - /* Floating point operations need fpst */ - if (opcode >= 0x58) { - fpst = fpstatus_ptr(FPST_FPCR); - } else { - fpst = NULL; - } - - if (!fp_access_check(s)) { - return; - } - - /* These operations work on the concatenated rm:rn, with each pair of - * adjacent elements being operated on to produce an element in the result. - */ - if (size == 3) { - TCGv_i64 tcg_res[2]; - - for (pass = 0; pass < 2; pass++) { - TCGv_i64 tcg_op1 = tcg_temp_new_i64(); - TCGv_i64 tcg_op2 = tcg_temp_new_i64(); - int passreg = (pass == 0) ? rn : rm; - - read_vec_element(s, tcg_op1, passreg, 0, MO_64); - read_vec_element(s, tcg_op2, passreg, 1, MO_64); - tcg_res[pass] = tcg_temp_new_i64(); - - switch (opcode) { - case 0x17: /* ADDP */ - tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2); - break; - case 0x58: /* FMAXNMP */ - gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst); - break; - case 0x5a: /* FADDP */ - gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst); - break; - case 0x5e: /* FMAXP */ - gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst); - break; - case 0x78: /* FMINNMP */ - gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst); - break; - case 0x7e: /* FMINP */ - gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst); - break; - default: - g_assert_not_reached(); - } - } - - for (pass = 0; pass < 2; pass++) { - write_vec_element(s, tcg_res[pass], rd, pass, MO_64); - } - } else { - int maxpass = is_q ? 4 : 2; - TCGv_i32 tcg_res[4]; - - for (pass = 0; pass < maxpass; pass++) { - TCGv_i32 tcg_op1 = tcg_temp_new_i32(); - TCGv_i32 tcg_op2 = tcg_temp_new_i32(); - NeonGenTwoOpFn *genfn = NULL; - int passreg = pass < (maxpass / 2) ? rn : rm; - int passelt = (is_q && (pass & 1)) ? 2 : 0; - - read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32); - read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32); - tcg_res[pass] = tcg_temp_new_i32(); - - switch (opcode) { - case 0x17: /* ADDP */ - { - static NeonGenTwoOpFn * const fns[3] = { - gen_helper_neon_padd_u8, - gen_helper_neon_padd_u16, - tcg_gen_add_i32, - }; - genfn = fns[size]; - break; - } - case 0x14: /* SMAXP, UMAXP */ - { - static NeonGenTwoOpFn * const fns[3][2] = { - { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 }, - { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 }, - { tcg_gen_smax_i32, tcg_gen_umax_i32 }, - }; - genfn = fns[size][u]; - break; - } - case 0x15: /* SMINP, UMINP */ - { - static NeonGenTwoOpFn * const fns[3][2] = { - { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 }, - { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 }, - { tcg_gen_smin_i32, tcg_gen_umin_i32 }, - }; - genfn = fns[size][u]; - break; - } - /* The FP operations are all on single floats (32 bit) */ - case 0x58: /* FMAXNMP */ - gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst); - break; - case 0x5a: /* FADDP */ - gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst); - break; - case 0x5e: /* FMAXP */ - gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst); - break; - case 0x78: /* FMINNMP */ - gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst); - break; - case 0x7e: /* FMINP */ - gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst); - break; - default: - g_assert_not_reached(); - } - - /* FP ops called directly, otherwise call now */ - if (genfn) { - genfn(tcg_res[pass], tcg_op1, tcg_op2); - } - } - - for (pass = 0; pass < maxpass; pass++) { - write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32); - } - clear_vec_high(s, is_q, rd); - } -} - -/* Floating point op subgroup of C3.6.16. */ -static void disas_simd_3same_float(DisasContext *s, uint32_t insn) -{ - /* For floating point ops, the U, size[1] and opcode bits - * together indicate the operation. size[0] indicates single - * or double. - */ - int fpopcode = extract32(insn, 11, 5) - | (extract32(insn, 23, 1) << 5) - | (extract32(insn, 29, 1) << 6); - int is_q = extract32(insn, 30, 1); - int size = extract32(insn, 22, 1); - int rm = extract32(insn, 16, 5); - int rn = extract32(insn, 5, 5); - int rd = extract32(insn, 0, 5); - - int datasize = is_q ? 128 : 64; - int esize = 32 << size; - int elements = datasize / esize; - - if (size == 1 && !is_q) { - unallocated_encoding(s); - return; - } - - switch (fpopcode) { - case 0x58: /* FMAXNMP */ - case 0x5a: /* FADDP */ - case 0x5e: /* FMAXP */ - case 0x78: /* FMINNMP */ - case 0x7e: /* FMINP */ - if (size && !is_q) { - unallocated_encoding(s); - return; - } - handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32, - rn, rm, rd); - return; - case 0x1b: /* FMULX */ - case 0x1f: /* FRECPS */ - case 0x3f: /* FRSQRTS */ - case 0x5d: /* FACGE */ - case 0x7d: /* FACGT */ - case 0x19: /* FMLA */ - case 0x39: /* FMLS */ - case 0x18: /* FMAXNM */ - case 0x1a: /* FADD */ - case 0x1c: /* FCMEQ */ - case 0x1e: /* FMAX */ - case 0x38: /* FMINNM */ - case 0x3a: /* FSUB */ - case 0x3e: /* FMIN */ - case 0x5b: /* FMUL */ - case 0x5c: /* FCMGE */ - case 0x5f: /* FDIV */ - case 0x7a: /* FABD */ - case 0x7c: /* FCMGT */ - if (!fp_access_check(s)) { - return; - } - handle_3same_float(s, size, elements, fpopcode, rd, rn, rm); - return; - - case 0x1d: /* FMLAL */ - case 0x3d: /* FMLSL */ - case 0x59: /* FMLAL2 */ - case 0x79: /* FMLSL2 */ - if (size & 1 || !dc_isar_feature(aa64_fhm, s)) { - unallocated_encoding(s); - return; - } - if (fp_access_check(s)) { - int is_s = extract32(insn, 23, 1); - int is_2 = extract32(insn, 29, 1); - int data = (is_2 << 1) | is_s; - tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), tcg_env, - is_q ? 16 : 8, vec_full_reg_size(s), - data, gen_helper_gvec_fmlal_a64); - } - return; - - default: - unallocated_encoding(s); - return; - } -} - -/* Integer op subgroup of C3.6.16. */ -static void disas_simd_3same_int(DisasContext *s, uint32_t insn) -{ - int is_q = extract32(insn, 30, 1); - int u = extract32(insn, 29, 1); - int size = extract32(insn, 22, 2); - int opcode = extract32(insn, 11, 5); - int rm = extract32(insn, 16, 5); - int rn = extract32(insn, 5, 5); - int rd = extract32(insn, 0, 5); - int pass; - TCGCond cond; - - switch (opcode) { - case 0x13: /* MUL, PMUL */ - if (u && size != 0) { - unallocated_encoding(s); - return; - } - /* fall through */ - case 0x0: /* SHADD, UHADD */ - case 0x2: /* SRHADD, URHADD */ - case 0x4: /* SHSUB, UHSUB */ - case 0xc: /* SMAX, UMAX */ - case 0xd: /* SMIN, UMIN */ - case 0xe: /* SABD, UABD */ - case 0xf: /* SABA, UABA */ - case 0x12: /* MLA, MLS */ - if (size == 3) { - unallocated_encoding(s); - return; - } - break; - case 0x16: /* SQDMULH, SQRDMULH */ - if (size == 0 || size == 3) { - unallocated_encoding(s); - return; - } - break; - default: - if (size == 3 && !is_q) { - unallocated_encoding(s); - return; - } - break; - } - - if (!fp_access_check(s)) { - return; - } - - switch (opcode) { - case 0x01: /* SQADD, UQADD */ - if (u) { - gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqadd_qc, size); - } else { - gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqadd_qc, size); - } - return; - case 0x05: /* SQSUB, UQSUB */ - if (u) { - gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uqsub_qc, size); - } else { - gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqsub_qc, size); - } - return; - case 0x08: /* SSHL, USHL */ - if (u) { - gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_ushl, size); - } else { - gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sshl, size); - } - return; - case 0x0c: /* SMAX, UMAX */ - if (u) { - gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size); - } else { - gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smax, size); - } - return; - case 0x0d: /* SMIN, UMIN */ - if (u) { - gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umin, size); - } else { - gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size); - } - return; - case 0xe: /* SABD, UABD */ - if (u) { - gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uabd, size); - } else { - gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sabd, size); - } - return; - case 0xf: /* SABA, UABA */ - if (u) { - gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_uaba, size); - } else { - gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_saba, size); - } - return; - case 0x10: /* ADD, SUB */ - if (u) { - gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size); - } else { - gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size); - } - return; - case 0x13: /* MUL, PMUL */ - if (!u) { /* MUL */ - gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size); - } else { /* PMUL */ - gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0, gen_helper_gvec_pmul_b); - } - return; - case 0x12: /* MLA, MLS */ - if (u) { - gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mls, size); - } else { - gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_mla, size); - } - return; - case 0x16: /* SQDMULH, SQRDMULH */ - { - static gen_helper_gvec_3_ptr * const fns[2][2] = { - { gen_helper_neon_sqdmulh_h, gen_helper_neon_sqrdmulh_h }, - { gen_helper_neon_sqdmulh_s, gen_helper_neon_sqrdmulh_s }, - }; - gen_gvec_op3_qc(s, is_q, rd, rn, rm, fns[size - 1][u]); - } - return; - case 0x11: - if (!u) { /* CMTST */ - gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_cmtst, size); - return; - } - /* else CMEQ */ - cond = TCG_COND_EQ; - goto do_gvec_cmp; - case 0x06: /* CMGT, CMHI */ - cond = u ? TCG_COND_GTU : TCG_COND_GT; - goto do_gvec_cmp; - case 0x07: /* CMGE, CMHS */ - cond = u ? TCG_COND_GEU : TCG_COND_GE; - do_gvec_cmp: - tcg_gen_gvec_cmp(cond, size, vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), - is_q ? 16 : 8, vec_full_reg_size(s)); - return; - } - - if (size == 3) { - assert(is_q); - for (pass = 0; pass < 2; pass++) { - TCGv_i64 tcg_op1 = tcg_temp_new_i64(); - TCGv_i64 tcg_op2 = tcg_temp_new_i64(); - TCGv_i64 tcg_res = tcg_temp_new_i64(); - - read_vec_element(s, tcg_op1, rn, pass, MO_64); - read_vec_element(s, tcg_op2, rm, pass, MO_64); - - handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2); - - write_vec_element(s, tcg_res, rd, pass, MO_64); - } - } else { - for (pass = 0; pass < (is_q ? 4 : 2); pass++) { - TCGv_i32 tcg_op1 = tcg_temp_new_i32(); - TCGv_i32 tcg_op2 = tcg_temp_new_i32(); - TCGv_i32 tcg_res = tcg_temp_new_i32(); - NeonGenTwoOpFn *genfn = NULL; - NeonGenTwoOpEnvFn *genenvfn = NULL; - - read_vec_element_i32(s, tcg_op1, rn, pass, MO_32); - read_vec_element_i32(s, tcg_op2, rm, pass, MO_32); - - switch (opcode) { - case 0x0: /* SHADD, UHADD */ - { - static NeonGenTwoOpFn * const fns[3][2] = { - { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 }, - { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 }, - { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 }, - }; - genfn = fns[size][u]; - break; - } - case 0x2: /* SRHADD, URHADD */ - { - static NeonGenTwoOpFn * const fns[3][2] = { - { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 }, - { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 }, - { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 }, - }; - genfn = fns[size][u]; - break; - } - case 0x4: /* SHSUB, UHSUB */ - { - static NeonGenTwoOpFn * const fns[3][2] = { - { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 }, - { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 }, - { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 }, - }; - genfn = fns[size][u]; - break; - } - case 0x9: /* SQSHL, UQSHL */ - { - static NeonGenTwoOpEnvFn * const fns[3][2] = { - { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 }, - { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 }, - { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 }, - }; - genenvfn = fns[size][u]; - break; - } - case 0xa: /* SRSHL, URSHL */ - { - static NeonGenTwoOpFn * const fns[3][2] = { - { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 }, - { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 }, - { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 }, - }; - genfn = fns[size][u]; - break; - } - case 0xb: /* SQRSHL, UQRSHL */ - { - static NeonGenTwoOpEnvFn * const fns[3][2] = { - { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 }, - { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 }, - { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 }, - }; - genenvfn = fns[size][u]; - break; - } - default: - g_assert_not_reached(); - } - - if (genenvfn) { - genenvfn(tcg_res, tcg_env, tcg_op1, tcg_op2); - } else { - genfn(tcg_res, tcg_op1, tcg_op2); - } - - write_vec_element_i32(s, tcg_res, rd, pass, MO_32); - } - } - clear_vec_high(s, is_q, rd); -} - -/* AdvSIMD three same - * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0 - * +---+---+---+-----------+------+---+------+--------+---+------+------+ - * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd | - * +---+---+---+-----------+------+---+------+--------+---+------+------+ - */ -static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn) -{ - int opcode = extract32(insn, 11, 5); - - switch (opcode) { - case 0x3: /* logic ops */ - disas_simd_3same_logic(s, insn); - break; - case 0x17: /* ADDP */ - case 0x14: /* SMAXP, UMAXP */ - case 0x15: /* SMINP, UMINP */ - { - /* Pairwise operations */ - int is_q = extract32(insn, 30, 1); - int u = extract32(insn, 29, 1); - int size = extract32(insn, 22, 2); - int rm = extract32(insn, 16, 5); - int rn = extract32(insn, 5, 5); - int rd = extract32(insn, 0, 5); - if (opcode == 0x17) { - if (u || (size == 3 && !is_q)) { - unallocated_encoding(s); - return; - } - } else { - if (size == 3) { - unallocated_encoding(s); - return; - } - } - handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd); - break; - } - case 0x18 ... 0x31: - /* floating point ops, sz[1] and U are part of opcode */ - disas_simd_3same_float(s, insn); - break; - default: - disas_simd_3same_int(s, insn); - break; - } -} - -/* - * Advanced SIMD three same (ARMv8.2 FP16 variants) - * - * 31 30 29 28 24 23 22 21 20 16 15 14 13 11 10 9 5 4 0 - * +---+---+---+-----------+---------+------+-----+--------+---+------+------+ - * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 | Rm | 0 0 | opcode | 1 | Rn | Rd | - * +---+---+---+-----------+---------+------+-----+--------+---+------+------+ - * - * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE - * (register), FACGE, FABD, FCMGT (register) and FACGT. - * - */ -static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn) -{ - int opcode = extract32(insn, 11, 3); - int u = extract32(insn, 29, 1); - int a = extract32(insn, 23, 1); - int is_q = extract32(insn, 30, 1); - int rm = extract32(insn, 16, 5); - int rn = extract32(insn, 5, 5); - int rd = extract32(insn, 0, 5); - /* - * For these floating point ops, the U, a and opcode bits - * together indicate the operation. - */ - int fpopcode = opcode | (a << 3) | (u << 4); - int datasize = is_q ? 128 : 64; - int elements = datasize / 16; - bool pairwise; - TCGv_ptr fpst; - int pass; - - switch (fpopcode) { - case 0x0: /* FMAXNM */ - case 0x1: /* FMLA */ - case 0x2: /* FADD */ - case 0x3: /* FMULX */ - case 0x4: /* FCMEQ */ - case 0x6: /* FMAX */ - case 0x7: /* FRECPS */ - case 0x8: /* FMINNM */ - case 0x9: /* FMLS */ - case 0xa: /* FSUB */ - case 0xe: /* FMIN */ - case 0xf: /* FRSQRTS */ - case 0x13: /* FMUL */ - case 0x14: /* FCMGE */ - case 0x15: /* FACGE */ - case 0x17: /* FDIV */ - case 0x1a: /* FABD */ - case 0x1c: /* FCMGT */ - case 0x1d: /* FACGT */ - pairwise = false; - break; - case 0x10: /* FMAXNMP */ - case 0x12: /* FADDP */ - case 0x16: /* FMAXP */ - case 0x18: /* FMINNMP */ - case 0x1e: /* FMINP */ - pairwise = true; - break; - default: - unallocated_encoding(s); - return; - } - - if (!dc_isar_feature(aa64_fp16, s)) { - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } - - fpst = fpstatus_ptr(FPST_FPCR_F16); - - if (pairwise) { - int maxpass = is_q ? 8 : 4; - TCGv_i32 tcg_op1 = tcg_temp_new_i32(); - TCGv_i32 tcg_op2 = tcg_temp_new_i32(); - TCGv_i32 tcg_res[8]; - - for (pass = 0; pass < maxpass; pass++) { - int passreg = pass < (maxpass / 2) ? rn : rm; - int passelt = (pass << 1) & (maxpass - 1); - - read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16); - read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16); - tcg_res[pass] = tcg_temp_new_i32(); - - switch (fpopcode) { - case 0x10: /* FMAXNMP */ - gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2, - fpst); - break; - case 0x12: /* FADDP */ - gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst); - break; - case 0x16: /* FMAXP */ - gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst); - break; - case 0x18: /* FMINNMP */ - gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2, - fpst); - break; - case 0x1e: /* FMINP */ - gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst); - break; - default: - g_assert_not_reached(); - } - } - - for (pass = 0; pass < maxpass; pass++) { - write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16); - } - } else { - for (pass = 0; pass < elements; pass++) { - TCGv_i32 tcg_op1 = tcg_temp_new_i32(); - TCGv_i32 tcg_op2 = tcg_temp_new_i32(); - TCGv_i32 tcg_res = tcg_temp_new_i32(); - - read_vec_element_i32(s, tcg_op1, rn, pass, MO_16); - read_vec_element_i32(s, tcg_op2, rm, pass, MO_16); - - switch (fpopcode) { - case 0x0: /* FMAXNM */ - gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x1: /* FMLA */ - read_vec_element_i32(s, tcg_res, rd, pass, MO_16); - gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res, - fpst); - break; - case 0x2: /* FADD */ - gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x3: /* FMULX */ - gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x4: /* FCMEQ */ - gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x6: /* FMAX */ - gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x7: /* FRECPS */ - gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x8: /* FMINNM */ - gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x9: /* FMLS */ - /* As usual for ARM, separate negation for fused multiply-add */ - tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000); - read_vec_element_i32(s, tcg_res, rd, pass, MO_16); - gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res, - fpst); - break; - case 0xa: /* FSUB */ - gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0xe: /* FMIN */ - gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0xf: /* FRSQRTS */ - gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x13: /* FMUL */ - gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x14: /* FCMGE */ - gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x15: /* FACGE */ - gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x17: /* FDIV */ - gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x1a: /* FABD */ - gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst); - tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff); - break; - case 0x1c: /* FCMGT */ - gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); - break; - case 0x1d: /* FACGT */ - gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst); - break; - default: - g_assert_not_reached(); - } - - write_vec_element_i32(s, tcg_res, rd, pass, MO_16); - } - } - - clear_vec_high(s, is_q, rd); -} - /* AdvSIMD three same extra * 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0 * +---+---+---+-----------+------+---+------+---+--------+---+----+----+ @@ -12091,16 +11359,6 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) return; } break; - case 0x3: /* SUQADD, USQADD */ - if (size == 3 && !is_q) { - unallocated_encoding(s); - return; - } - if (!fp_access_check(s)) { - return; - } - handle_2misc_satacc(s, false, u, is_q, size, rn, rd); - return; case 0x7: /* SQABS, SQNEG */ if (size == 3 && !is_q) { unallocated_encoding(s); @@ -12275,6 +11533,7 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) break; } default: + case 0x3: /* SUQADD, USQADD */ unallocated_encoding(s); return; } @@ -12373,10 +11632,10 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) } break; case 0x2f: /* FABS */ - gen_helper_vfp_abss(tcg_res, tcg_op); + gen_vfp_abss(tcg_res, tcg_op); break; case 0x6f: /* FNEG */ - gen_helper_vfp_negs(tcg_res, tcg_op); + gen_vfp_negs(tcg_res, tcg_op); break; case 0x7f: /* FSQRT */ gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env); @@ -12783,14 +12042,6 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) TCGv_ptr fpst; switch (16 * u + opcode) { - case 0x08: /* MUL */ - case 0x10: /* MLA */ - case 0x14: /* MLS */ - if (is_scalar) { - unallocated_encoding(s); - return; - } - break; case 0x02: /* SMLAL, SMLAL2 */ case 0x12: /* UMLAL, UMLAL2 */ case 0x06: /* SMLSL, SMLSL2 */ @@ -12808,15 +12059,6 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) case 0x0b: /* SQDMULL, SQDMULL2 */ is_long = true; break; - case 0x0c: /* SQDMULH */ - case 0x0d: /* SQRDMULH */ - break; - case 0x01: /* FMLA */ - case 0x05: /* FMLS */ - case 0x09: /* FMUL */ - case 0x19: /* FMULX */ - is_fp = 1; - break; case 0x1d: /* SQRDMLAH */ case 0x1f: /* SQRDMLSH */ if (!dc_isar_feature(aa64_rdm, s)) { @@ -12871,38 +12113,28 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) } is_fp = 2; break; + default: case 0x00: /* FMLAL */ + case 0x01: /* FMLA */ case 0x04: /* FMLSL */ + case 0x05: /* FMLS */ + case 0x08: /* MUL */ + case 0x09: /* FMUL */ + case 0x0c: /* SQDMULH */ + case 0x0d: /* SQRDMULH */ + case 0x10: /* MLA */ + case 0x14: /* MLS */ case 0x18: /* FMLAL2 */ + case 0x19: /* FMULX */ case 0x1c: /* FMLSL2 */ - if (is_scalar || size != MO_32 || !dc_isar_feature(aa64_fhm, s)) { - unallocated_encoding(s); - return; - } - size = MO_16; - /* is_fp, but we pass tcg_env not fp_status. */ - break; - default: unallocated_encoding(s); return; } switch (is_fp) { case 1: /* normal fp */ - /* convert insn encoded size to MemOp size */ - switch (size) { - case 0: /* half-precision */ - size = MO_16; - is_fp16 = true; - break; - case MO_32: /* single precision */ - case MO_64: /* double precision */ - break; - default: - unallocated_encoding(s); - return; - } - break; + unallocated_encoding(s); /* in decodetree */ + return; case 2: /* complex fp */ /* Each indexable element is a complex pair. */ @@ -13012,111 +12244,10 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) : gen_helper_gvec_fcmlah_idx); } return; - - case 0x00: /* FMLAL */ - case 0x04: /* FMLSL */ - case 0x18: /* FMLAL2 */ - case 0x1c: /* FMLSL2 */ - { - int is_s = extract32(opcode, 2, 1); - int is_2 = u; - int data = (index << 2) | (is_2 << 1) | is_s; - tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), tcg_env, - is_q ? 16 : 8, vec_full_reg_size(s), - data, gen_helper_gvec_fmlal_idx_a64); - } - return; - - case 0x08: /* MUL */ - if (!is_long && !is_scalar) { - static gen_helper_gvec_3 * const fns[3] = { - gen_helper_gvec_mul_idx_h, - gen_helper_gvec_mul_idx_s, - gen_helper_gvec_mul_idx_d, - }; - tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), - is_q ? 16 : 8, vec_full_reg_size(s), - index, fns[size - 1]); - return; - } - break; - - case 0x10: /* MLA */ - if (!is_long && !is_scalar) { - static gen_helper_gvec_4 * const fns[3] = { - gen_helper_gvec_mla_idx_h, - gen_helper_gvec_mla_idx_s, - gen_helper_gvec_mla_idx_d, - }; - tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), - vec_full_reg_offset(s, rd), - is_q ? 16 : 8, vec_full_reg_size(s), - index, fns[size - 1]); - return; - } - break; - - case 0x14: /* MLS */ - if (!is_long && !is_scalar) { - static gen_helper_gvec_4 * const fns[3] = { - gen_helper_gvec_mls_idx_h, - gen_helper_gvec_mls_idx_s, - gen_helper_gvec_mls_idx_d, - }; - tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), - vec_full_reg_offset(s, rd), - is_q ? 16 : 8, vec_full_reg_size(s), - index, fns[size - 1]); - return; - } - break; } if (size == 3) { - TCGv_i64 tcg_idx = tcg_temp_new_i64(); - int pass; - - assert(is_fp && is_q && !is_long); - - read_vec_element(s, tcg_idx, rm, index, MO_64); - - for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) { - TCGv_i64 tcg_op = tcg_temp_new_i64(); - TCGv_i64 tcg_res = tcg_temp_new_i64(); - - read_vec_element(s, tcg_op, rn, pass, MO_64); - - switch (16 * u + opcode) { - case 0x05: /* FMLS */ - /* As usual for ARM, separate negation for fused multiply-add */ - gen_helper_vfp_negd(tcg_op, tcg_op); - /* fall through */ - case 0x01: /* FMLA */ - read_vec_element(s, tcg_res, rd, pass, MO_64); - gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst); - break; - case 0x09: /* FMUL */ - gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst); - break; - case 0x19: /* FMULX */ - gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst); - break; - default: - g_assert_not_reached(); - } - - write_vec_element(s, tcg_res, rd, pass, MO_64); - } - - clear_vec_high(s, !is_scalar, rd); + g_assert_not_reached(); } else if (!is_long) { /* 32 bit floating point, or 16 or 32 bit integer. * For the 16 bit scalar case we use the usual Neon helpers and @@ -13148,7 +12279,6 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32); switch (16 * u + opcode) { - case 0x08: /* MUL */ case 0x10: /* MLA */ case 0x14: /* MLS */ { @@ -13172,74 +12302,6 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) genfn(tcg_res, tcg_op, tcg_res); break; } - case 0x05: /* FMLS */ - case 0x01: /* FMLA */ - read_vec_element_i32(s, tcg_res, rd, pass, - is_scalar ? size : MO_32); - switch (size) { - case 1: - if (opcode == 0x5) { - /* As usual for ARM, separate negation for fused - * multiply-add */ - tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000); - } - if (is_scalar) { - gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx, - tcg_res, fpst); - } else { - gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx, - tcg_res, fpst); - } - break; - case 2: - if (opcode == 0x5) { - /* As usual for ARM, separate negation for - * fused multiply-add */ - tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000); - } - gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, - tcg_res, fpst); - break; - default: - g_assert_not_reached(); - } - break; - case 0x09: /* FMUL */ - switch (size) { - case 1: - if (is_scalar) { - gen_helper_advsimd_mulh(tcg_res, tcg_op, - tcg_idx, fpst); - } else { - gen_helper_advsimd_mul2h(tcg_res, tcg_op, - tcg_idx, fpst); - } - break; - case 2: - gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst); - break; - default: - g_assert_not_reached(); - } - break; - case 0x19: /* FMULX */ - switch (size) { - case 1: - if (is_scalar) { - gen_helper_advsimd_mulxh(tcg_res, tcg_op, - tcg_idx, fpst); - } else { - gen_helper_advsimd_mulx2h(tcg_res, tcg_op, - tcg_idx, fpst); - } - break; - case 2: - gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst); - break; - default: - g_assert_not_reached(); - } - break; case 0x0c: /* SQDMULH */ if (size == 1) { gen_helper_neon_qdmulh_s16(tcg_res, tcg_env, @@ -13281,6 +12343,10 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) } break; default: + case 0x01: /* FMLA */ + case 0x05: /* FMLS */ + case 0x09: /* FMUL */ + case 0x19: /* FMULX */ g_assert_not_reached(); } @@ -13454,461 +12520,6 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn) } } -/* Crypto AES - * 31 24 23 22 21 17 16 12 11 10 9 5 4 0 - * +-----------------+------+-----------+--------+-----+------+------+ - * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd | - * +-----------------+------+-----------+--------+-----+------+------+ - */ -static void disas_crypto_aes(DisasContext *s, uint32_t insn) -{ - int size = extract32(insn, 22, 2); - int opcode = extract32(insn, 12, 5); - int rn = extract32(insn, 5, 5); - int rd = extract32(insn, 0, 5); - gen_helper_gvec_2 *genfn2 = NULL; - gen_helper_gvec_3 *genfn3 = NULL; - - if (!dc_isar_feature(aa64_aes, s) || size != 0) { - unallocated_encoding(s); - return; - } - - switch (opcode) { - case 0x4: /* AESE */ - genfn3 = gen_helper_crypto_aese; - break; - case 0x6: /* AESMC */ - genfn2 = gen_helper_crypto_aesmc; - break; - case 0x5: /* AESD */ - genfn3 = gen_helper_crypto_aesd; - break; - case 0x7: /* AESIMC */ - genfn2 = gen_helper_crypto_aesimc; - break; - default: - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } - if (genfn2) { - gen_gvec_op2_ool(s, true, rd, rn, 0, genfn2); - } else { - gen_gvec_op3_ool(s, true, rd, rd, rn, 0, genfn3); - } -} - -/* Crypto three-reg SHA - * 31 24 23 22 21 20 16 15 14 12 11 10 9 5 4 0 - * +-----------------+------+---+------+---+--------+-----+------+------+ - * | 0 1 0 1 1 1 1 0 | size | 0 | Rm | 0 | opcode | 0 0 | Rn | Rd | - * +-----------------+------+---+------+---+--------+-----+------+------+ - */ -static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn) -{ - int size = extract32(insn, 22, 2); - int opcode = extract32(insn, 12, 3); - int rm = extract32(insn, 16, 5); - int rn = extract32(insn, 5, 5); - int rd = extract32(insn, 0, 5); - gen_helper_gvec_3 *genfn; - bool feature; - - if (size != 0) { - unallocated_encoding(s); - return; - } - - switch (opcode) { - case 0: /* SHA1C */ - genfn = gen_helper_crypto_sha1c; - feature = dc_isar_feature(aa64_sha1, s); - break; - case 1: /* SHA1P */ - genfn = gen_helper_crypto_sha1p; - feature = dc_isar_feature(aa64_sha1, s); - break; - case 2: /* SHA1M */ - genfn = gen_helper_crypto_sha1m; - feature = dc_isar_feature(aa64_sha1, s); - break; - case 3: /* SHA1SU0 */ - genfn = gen_helper_crypto_sha1su0; - feature = dc_isar_feature(aa64_sha1, s); - break; - case 4: /* SHA256H */ - genfn = gen_helper_crypto_sha256h; - feature = dc_isar_feature(aa64_sha256, s); - break; - case 5: /* SHA256H2 */ - genfn = gen_helper_crypto_sha256h2; - feature = dc_isar_feature(aa64_sha256, s); - break; - case 6: /* SHA256SU1 */ - genfn = gen_helper_crypto_sha256su1; - feature = dc_isar_feature(aa64_sha256, s); - break; - default: - unallocated_encoding(s); - return; - } - - if (!feature) { - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } - gen_gvec_op3_ool(s, true, rd, rn, rm, 0, genfn); -} - -/* Crypto two-reg SHA - * 31 24 23 22 21 17 16 12 11 10 9 5 4 0 - * +-----------------+------+-----------+--------+-----+------+------+ - * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 | Rn | Rd | - * +-----------------+------+-----------+--------+-----+------+------+ - */ -static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn) -{ - int size = extract32(insn, 22, 2); - int opcode = extract32(insn, 12, 5); - int rn = extract32(insn, 5, 5); - int rd = extract32(insn, 0, 5); - gen_helper_gvec_2 *genfn; - bool feature; - - if (size != 0) { - unallocated_encoding(s); - return; - } - - switch (opcode) { - case 0: /* SHA1H */ - feature = dc_isar_feature(aa64_sha1, s); - genfn = gen_helper_crypto_sha1h; - break; - case 1: /* SHA1SU1 */ - feature = dc_isar_feature(aa64_sha1, s); - genfn = gen_helper_crypto_sha1su1; - break; - case 2: /* SHA256SU0 */ - feature = dc_isar_feature(aa64_sha256, s); - genfn = gen_helper_crypto_sha256su0; - break; - default: - unallocated_encoding(s); - return; - } - - if (!feature) { - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } - gen_gvec_op2_ool(s, true, rd, rn, 0, genfn); -} - -static void gen_rax1_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) -{ - tcg_gen_rotli_i64(d, m, 1); - tcg_gen_xor_i64(d, d, n); -} - -static void gen_rax1_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m) -{ - tcg_gen_rotli_vec(vece, d, m, 1); - tcg_gen_xor_vec(vece, d, d, n); -} - -void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { INDEX_op_rotli_vec, 0 }; - static const GVecGen3 op = { - .fni8 = gen_rax1_i64, - .fniv = gen_rax1_vec, - .opt_opc = vecop_list, - .fno = gen_helper_crypto_rax1, - .vece = MO_64, - }; - tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &op); -} - -/* Crypto three-reg SHA512 - * 31 21 20 16 15 14 13 12 11 10 9 5 4 0 - * +-----------------------+------+---+---+-----+--------+------+------+ - * | 1 1 0 0 1 1 1 0 0 1 1 | Rm | 1 | O | 0 0 | opcode | Rn | Rd | - * +-----------------------+------+---+---+-----+--------+------+------+ - */ -static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn) -{ - int opcode = extract32(insn, 10, 2); - int o = extract32(insn, 14, 1); - int rm = extract32(insn, 16, 5); - int rn = extract32(insn, 5, 5); - int rd = extract32(insn, 0, 5); - bool feature; - gen_helper_gvec_3 *oolfn = NULL; - GVecGen3Fn *gvecfn = NULL; - - if (o == 0) { - switch (opcode) { - case 0: /* SHA512H */ - feature = dc_isar_feature(aa64_sha512, s); - oolfn = gen_helper_crypto_sha512h; - break; - case 1: /* SHA512H2 */ - feature = dc_isar_feature(aa64_sha512, s); - oolfn = gen_helper_crypto_sha512h2; - break; - case 2: /* SHA512SU1 */ - feature = dc_isar_feature(aa64_sha512, s); - oolfn = gen_helper_crypto_sha512su1; - break; - case 3: /* RAX1 */ - feature = dc_isar_feature(aa64_sha3, s); - gvecfn = gen_gvec_rax1; - break; - default: - g_assert_not_reached(); - } - } else { - switch (opcode) { - case 0: /* SM3PARTW1 */ - feature = dc_isar_feature(aa64_sm3, s); - oolfn = gen_helper_crypto_sm3partw1; - break; - case 1: /* SM3PARTW2 */ - feature = dc_isar_feature(aa64_sm3, s); - oolfn = gen_helper_crypto_sm3partw2; - break; - case 2: /* SM4EKEY */ - feature = dc_isar_feature(aa64_sm4, s); - oolfn = gen_helper_crypto_sm4ekey; - break; - default: - unallocated_encoding(s); - return; - } - } - - if (!feature) { - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } - - if (oolfn) { - gen_gvec_op3_ool(s, true, rd, rn, rm, 0, oolfn); - } else { - gen_gvec_fn3(s, true, rd, rn, rm, gvecfn, MO_64); - } -} - -/* Crypto two-reg SHA512 - * 31 12 11 10 9 5 4 0 - * +-----------------------------------------+--------+------+------+ - * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode | Rn | Rd | - * +-----------------------------------------+--------+------+------+ - */ -static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn) -{ - int opcode = extract32(insn, 10, 2); - int rn = extract32(insn, 5, 5); - int rd = extract32(insn, 0, 5); - bool feature; - - switch (opcode) { - case 0: /* SHA512SU0 */ - feature = dc_isar_feature(aa64_sha512, s); - break; - case 1: /* SM4E */ - feature = dc_isar_feature(aa64_sm4, s); - break; - default: - unallocated_encoding(s); - return; - } - - if (!feature) { - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } - - switch (opcode) { - case 0: /* SHA512SU0 */ - gen_gvec_op2_ool(s, true, rd, rn, 0, gen_helper_crypto_sha512su0); - break; - case 1: /* SM4E */ - gen_gvec_op3_ool(s, true, rd, rd, rn, 0, gen_helper_crypto_sm4e); - break; - default: - g_assert_not_reached(); - } -} - -/* Crypto four-register - * 31 23 22 21 20 16 15 14 10 9 5 4 0 - * +-------------------+-----+------+---+------+------+------+ - * | 1 1 0 0 1 1 1 0 0 | Op0 | Rm | 0 | Ra | Rn | Rd | - * +-------------------+-----+------+---+------+------+------+ - */ -static void disas_crypto_four_reg(DisasContext *s, uint32_t insn) -{ - int op0 = extract32(insn, 21, 2); - int rm = extract32(insn, 16, 5); - int ra = extract32(insn, 10, 5); - int rn = extract32(insn, 5, 5); - int rd = extract32(insn, 0, 5); - bool feature; - - switch (op0) { - case 0: /* EOR3 */ - case 1: /* BCAX */ - feature = dc_isar_feature(aa64_sha3, s); - break; - case 2: /* SM3SS1 */ - feature = dc_isar_feature(aa64_sm3, s); - break; - default: - unallocated_encoding(s); - return; - } - - if (!feature) { - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } - - if (op0 < 2) { - TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2]; - int pass; - - tcg_op1 = tcg_temp_new_i64(); - tcg_op2 = tcg_temp_new_i64(); - tcg_op3 = tcg_temp_new_i64(); - tcg_res[0] = tcg_temp_new_i64(); - tcg_res[1] = tcg_temp_new_i64(); - - for (pass = 0; pass < 2; pass++) { - read_vec_element(s, tcg_op1, rn, pass, MO_64); - read_vec_element(s, tcg_op2, rm, pass, MO_64); - read_vec_element(s, tcg_op3, ra, pass, MO_64); - - if (op0 == 0) { - /* EOR3 */ - tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3); - } else { - /* BCAX */ - tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3); - } - tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1); - } - write_vec_element(s, tcg_res[0], rd, 0, MO_64); - write_vec_element(s, tcg_res[1], rd, 1, MO_64); - } else { - TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero; - - tcg_op1 = tcg_temp_new_i32(); - tcg_op2 = tcg_temp_new_i32(); - tcg_op3 = tcg_temp_new_i32(); - tcg_res = tcg_temp_new_i32(); - tcg_zero = tcg_constant_i32(0); - - read_vec_element_i32(s, tcg_op1, rn, 3, MO_32); - read_vec_element_i32(s, tcg_op2, rm, 3, MO_32); - read_vec_element_i32(s, tcg_op3, ra, 3, MO_32); - - tcg_gen_rotri_i32(tcg_res, tcg_op1, 20); - tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2); - tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3); - tcg_gen_rotri_i32(tcg_res, tcg_res, 25); - - write_vec_element_i32(s, tcg_zero, rd, 0, MO_32); - write_vec_element_i32(s, tcg_zero, rd, 1, MO_32); - write_vec_element_i32(s, tcg_zero, rd, 2, MO_32); - write_vec_element_i32(s, tcg_res, rd, 3, MO_32); - } -} - -/* Crypto XAR - * 31 21 20 16 15 10 9 5 4 0 - * +-----------------------+------+--------+------+------+ - * | 1 1 0 0 1 1 1 0 1 0 0 | Rm | imm6 | Rn | Rd | - * +-----------------------+------+--------+------+------+ - */ -static void disas_crypto_xar(DisasContext *s, uint32_t insn) -{ - int rm = extract32(insn, 16, 5); - int imm6 = extract32(insn, 10, 6); - int rn = extract32(insn, 5, 5); - int rd = extract32(insn, 0, 5); - - if (!dc_isar_feature(aa64_sha3, s)) { - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } - - gen_gvec_xar(MO_64, vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - vec_full_reg_offset(s, rm), imm6, 16, - vec_full_reg_size(s)); -} - -/* Crypto three-reg imm2 - * 31 21 20 16 15 14 13 12 11 10 9 5 4 0 - * +-----------------------+------+-----+------+--------+------+------+ - * | 1 1 0 0 1 1 1 0 0 1 0 | Rm | 1 0 | imm2 | opcode | Rn | Rd | - * +-----------------------+------+-----+------+--------+------+------+ - */ -static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn) -{ - static gen_helper_gvec_3 * const fns[4] = { - gen_helper_crypto_sm3tt1a, gen_helper_crypto_sm3tt1b, - gen_helper_crypto_sm3tt2a, gen_helper_crypto_sm3tt2b, - }; - int opcode = extract32(insn, 10, 2); - int imm2 = extract32(insn, 12, 2); - int rm = extract32(insn, 16, 5); - int rn = extract32(insn, 5, 5); - int rd = extract32(insn, 0, 5); - - if (!dc_isar_feature(aa64_sm3, s)) { - unallocated_encoding(s); - return; - } - - if (!fp_access_check(s)) { - return; - } - - gen_gvec_op3_ool(s, true, rd, rn, rm, imm2, fns[opcode]); -} - /* C3.6 Data processing - SIMD, inc Crypto * * As the decode gets a little complex we are using a table based @@ -13916,12 +12527,10 @@ static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn) */ static const AArch64DecodeTable data_proc_simd[] = { /* pattern , mask , fn */ - { 0x0e200400, 0x9f200400, disas_simd_three_reg_same }, { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra }, { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff }, { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc }, { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes }, - { 0x0e000400, 0x9fe08400, disas_simd_copy }, { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */ /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */ { 0x0f000400, 0x9ff80400, disas_simd_mod_imm }, @@ -13929,25 +12538,12 @@ static const AArch64DecodeTable data_proc_simd[] = { { 0x0e000000, 0xbf208c00, disas_simd_tb }, { 0x0e000800, 0xbf208c00, disas_simd_zip_trn }, { 0x2e000000, 0xbf208400, disas_simd_ext }, - { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same }, { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra }, { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff }, { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc }, - { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise }, - { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy }, { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */ { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm }, - { 0x4e280800, 0xff3e0c00, disas_crypto_aes }, - { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha }, - { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha }, - { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 }, - { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 }, - { 0xce000000, 0xff808000, disas_crypto_four_reg }, - { 0xce800000, 0xffe00000, disas_crypto_xar }, - { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 }, - { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 }, { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 }, - { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 }, { 0x00000000, 0x00000000, NULL } }; @@ -14382,20 +12978,10 @@ static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) } } -static void aarch64_tr_disas_log(const DisasContextBase *dcbase, - CPUState *cpu, FILE *logfile) -{ - DisasContext *dc = container_of(dcbase, DisasContext, base); - - fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first)); - target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size); -} - const TranslatorOps aarch64_translator_ops = { .init_disas_context = aarch64_tr_init_disas_context, .tb_start = aarch64_tr_tb_start, .insn_start = aarch64_tr_insn_start, .translate_insn = aarch64_tr_translate_insn, .tb_stop = aarch64_tr_tb_stop, - .disas_log = aarch64_tr_disas_log, }; diff --git a/target/arm/tcg/translate-a64.h b/target/arm/tcg/translate-a64.h index 7b811b8ac5..0fcf7cb63a 100644 --- a/target/arm/tcg/translate-a64.h +++ b/target/arm/tcg/translate-a64.h @@ -193,6 +193,24 @@ void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, int64_t shift, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m, + uint32_t a, uint32_t oprsz, uint32_t maxsz); +void gen_gvec_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m, + uint32_t a, uint32_t oprsz, uint32_t maxsz); + +void gen_suqadd_bhs(TCGv_i64 res, TCGv_i64 qc, + TCGv_i64 a, TCGv_i64 b, MemOp esz); +void gen_suqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b); +void gen_gvec_suqadd_qc(unsigned vece, uint32_t rd_ofs, + uint32_t rn_ofs, uint32_t rm_ofs, + uint32_t opr_sz, uint32_t max_sz); + +void gen_usqadd_bhs(TCGv_i64 res, TCGv_i64 qc, + TCGv_i64 a, TCGv_i64 b, MemOp esz); +void gen_usqadd_d(TCGv_i64 res, TCGv_i64 qc, TCGv_i64 a, TCGv_i64 b); +void gen_gvec_usqadd_qc(unsigned vece, uint32_t rd_ofs, + uint32_t rn_ofs, uint32_t rm_ofs, + uint32_t opr_sz, uint32_t max_sz); void gen_sve_ldr(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm); void gen_sve_str(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int imm); diff --git a/target/arm/tcg/translate-neon.c b/target/arm/tcg/translate-neon.c index 144f18ba22..915c9e56db 100644 --- a/target/arm/tcg/translate-neon.c +++ b/target/arm/tcg/translate-neon.c @@ -794,6 +794,12 @@ DO_3SAME(VQADD_S, gen_gvec_sqadd_qc) DO_3SAME(VQADD_U, gen_gvec_uqadd_qc) DO_3SAME(VQSUB_S, gen_gvec_sqsub_qc) DO_3SAME(VQSUB_U, gen_gvec_uqsub_qc) +DO_3SAME(VRSHL_S, gen_gvec_srshl) +DO_3SAME(VRSHL_U, gen_gvec_urshl) +DO_3SAME(VQSHL_S, gen_neon_sqshl) +DO_3SAME(VQSHL_U, gen_neon_uqshl) +DO_3SAME(VQRSHL_S, gen_neon_sqrshl) +DO_3SAME(VQRSHL_U, gen_neon_uqrshl) /* These insns are all gvec_bitsel but with the inputs in various orders. */ #define DO_3SAME_BITSEL(INSN, O1, O2, O3) \ @@ -830,6 +836,17 @@ DO_3SAME_NO_SZ_3(VABD_S, gen_gvec_sabd) DO_3SAME_NO_SZ_3(VABA_S, gen_gvec_saba) DO_3SAME_NO_SZ_3(VABD_U, gen_gvec_uabd) DO_3SAME_NO_SZ_3(VABA_U, gen_gvec_uaba) +DO_3SAME_NO_SZ_3(VPADD, gen_gvec_addp) +DO_3SAME_NO_SZ_3(VPMAX_S, gen_gvec_smaxp) +DO_3SAME_NO_SZ_3(VPMIN_S, gen_gvec_sminp) +DO_3SAME_NO_SZ_3(VPMAX_U, gen_gvec_umaxp) +DO_3SAME_NO_SZ_3(VPMIN_U, gen_gvec_uminp) +DO_3SAME_NO_SZ_3(VHADD_S, gen_gvec_shadd) +DO_3SAME_NO_SZ_3(VHADD_U, gen_gvec_uhadd) +DO_3SAME_NO_SZ_3(VHSUB_S, gen_gvec_shsub) +DO_3SAME_NO_SZ_3(VHSUB_U, gen_gvec_uhsub) +DO_3SAME_NO_SZ_3(VRHADD_S, gen_gvec_srhadd) +DO_3SAME_NO_SZ_3(VRHADD_U, gen_gvec_urhadd) #define DO_3SAME_CMP(INSN, COND) \ static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ @@ -907,51 +924,6 @@ DO_SHA2(SHA256H, gen_helper_crypto_sha256h) DO_SHA2(SHA256H2, gen_helper_crypto_sha256h2) DO_SHA2(SHA256SU1, gen_helper_crypto_sha256su1) -#define DO_3SAME_64(INSN, FUNC) \ - static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ - uint32_t rn_ofs, uint32_t rm_ofs, \ - uint32_t oprsz, uint32_t maxsz) \ - { \ - static const GVecGen3 op = { .fni8 = FUNC }; \ - tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &op); \ - } \ - DO_3SAME(INSN, gen_##INSN##_3s) - -#define DO_3SAME_64_ENV(INSN, FUNC) \ - static void gen_##INSN##_elt(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m) \ - { \ - FUNC(d, tcg_env, n, m); \ - } \ - DO_3SAME_64(INSN, gen_##INSN##_elt) - -DO_3SAME_64(VRSHL_S64, gen_helper_neon_rshl_s64) -DO_3SAME_64(VRSHL_U64, gen_helper_neon_rshl_u64) -DO_3SAME_64_ENV(VQSHL_S64, gen_helper_neon_qshl_s64) -DO_3SAME_64_ENV(VQSHL_U64, gen_helper_neon_qshl_u64) -DO_3SAME_64_ENV(VQRSHL_S64, gen_helper_neon_qrshl_s64) -DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64) - -#define DO_3SAME_32(INSN, FUNC) \ - static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ - uint32_t rn_ofs, uint32_t rm_ofs, \ - uint32_t oprsz, uint32_t maxsz) \ - { \ - static const GVecGen3 ops[4] = { \ - { .fni4 = gen_helper_neon_##FUNC##8 }, \ - { .fni4 = gen_helper_neon_##FUNC##16 }, \ - { .fni4 = gen_helper_neon_##FUNC##32 }, \ - { 0 }, \ - }; \ - tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \ - } \ - static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ - { \ - if (a->size > 2) { \ - return false; \ - } \ - return do_3same(s, a, gen_##INSN##_3s); \ - } - /* * Some helper functions need to be passed the tcg_env. In order * to use those with the gvec APIs like tcg_gen_gvec_3() we need @@ -964,143 +936,12 @@ DO_3SAME_64_ENV(VQRSHL_U64, gen_helper_neon_qrshl_u64) FUNC(d, tcg_env, n, m); \ } -#define DO_3SAME_32_ENV(INSN, FUNC) \ - WRAP_ENV_FN(gen_##INSN##_tramp8, gen_helper_neon_##FUNC##8); \ - WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##16); \ - WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##32); \ - static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ - uint32_t rn_ofs, uint32_t rm_ofs, \ - uint32_t oprsz, uint32_t maxsz) \ - { \ - static const GVecGen3 ops[4] = { \ - { .fni4 = gen_##INSN##_tramp8 }, \ - { .fni4 = gen_##INSN##_tramp16 }, \ - { .fni4 = gen_##INSN##_tramp32 }, \ - { 0 }, \ - }; \ - tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece]); \ - } \ - static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ - { \ - if (a->size > 2) { \ - return false; \ - } \ - return do_3same(s, a, gen_##INSN##_3s); \ - } - -DO_3SAME_32(VHADD_S, hadd_s) -DO_3SAME_32(VHADD_U, hadd_u) -DO_3SAME_32(VHSUB_S, hsub_s) -DO_3SAME_32(VHSUB_U, hsub_u) -DO_3SAME_32(VRHADD_S, rhadd_s) -DO_3SAME_32(VRHADD_U, rhadd_u) -DO_3SAME_32(VRSHL_S, rshl_s) -DO_3SAME_32(VRSHL_U, rshl_u) - -DO_3SAME_32_ENV(VQSHL_S, qshl_s) -DO_3SAME_32_ENV(VQSHL_U, qshl_u) -DO_3SAME_32_ENV(VQRSHL_S, qrshl_s) -DO_3SAME_32_ENV(VQRSHL_U, qrshl_u) - -static bool do_3same_pair(DisasContext *s, arg_3same *a, NeonGenTwoOpFn *fn) -{ - /* Operations handled pairwise 32 bits at a time */ - TCGv_i32 tmp, tmp2, tmp3; - - if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { - return false; - } - - /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_simd_r32, s) && - ((a->vd | a->vn | a->vm) & 0x10)) { - return false; - } - - if (a->size == 3) { - return false; - } - - if (!vfp_access_check(s)) { - return true; - } - - assert(a->q == 0); /* enforced by decode patterns */ - - /* - * Note that we have to be careful not to clobber the source operands - * in the "vm == vd" case by storing the result of the first pass too - * early. Since Q is 0 there are always just two passes, so instead - * of a complicated loop over each pass we just unroll. - */ - tmp = tcg_temp_new_i32(); - tmp2 = tcg_temp_new_i32(); - tmp3 = tcg_temp_new_i32(); - - read_neon_element32(tmp, a->vn, 0, MO_32); - read_neon_element32(tmp2, a->vn, 1, MO_32); - fn(tmp, tmp, tmp2); - - read_neon_element32(tmp3, a->vm, 0, MO_32); - read_neon_element32(tmp2, a->vm, 1, MO_32); - fn(tmp3, tmp3, tmp2); - - write_neon_element32(tmp, a->vd, 0, MO_32); - write_neon_element32(tmp3, a->vd, 1, MO_32); - - return true; -} - -#define DO_3SAME_PAIR(INSN, func) \ - static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ - { \ - static NeonGenTwoOpFn * const fns[] = { \ - gen_helper_neon_##func##8, \ - gen_helper_neon_##func##16, \ - gen_helper_neon_##func##32, \ - }; \ - if (a->size > 2) { \ - return false; \ - } \ - return do_3same_pair(s, a, fns[a->size]); \ - } - -/* 32-bit pairwise ops end up the same as the elementwise versions. */ -#define gen_helper_neon_pmax_s32 tcg_gen_smax_i32 -#define gen_helper_neon_pmax_u32 tcg_gen_umax_i32 -#define gen_helper_neon_pmin_s32 tcg_gen_smin_i32 -#define gen_helper_neon_pmin_u32 tcg_gen_umin_i32 -#define gen_helper_neon_padd_u32 tcg_gen_add_i32 - -DO_3SAME_PAIR(VPMAX_S, pmax_s) -DO_3SAME_PAIR(VPMIN_S, pmin_s) -DO_3SAME_PAIR(VPMAX_U, pmax_u) -DO_3SAME_PAIR(VPMIN_U, pmin_u) -DO_3SAME_PAIR(VPADD, padd_u) - #define DO_3SAME_VQDMULH(INSN, FUNC) \ - WRAP_ENV_FN(gen_##INSN##_tramp16, gen_helper_neon_##FUNC##_s16); \ - WRAP_ENV_FN(gen_##INSN##_tramp32, gen_helper_neon_##FUNC##_s32); \ - static void gen_##INSN##_3s(unsigned vece, uint32_t rd_ofs, \ - uint32_t rn_ofs, uint32_t rm_ofs, \ - uint32_t oprsz, uint32_t maxsz) \ - { \ - static const GVecGen3 ops[2] = { \ - { .fni4 = gen_##INSN##_tramp16 }, \ - { .fni4 = gen_##INSN##_tramp32 }, \ - }; \ - tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, oprsz, maxsz, &ops[vece - 1]); \ - } \ static bool trans_##INSN##_3s(DisasContext *s, arg_3same *a) \ - { \ - if (a->size != 1 && a->size != 2) { \ - return false; \ - } \ - return do_3same(s, a, gen_##INSN##_3s); \ - } + { return a->size >= 1 && a->size <= 2 && do_3same(s, a, FUNC); } -DO_3SAME_VQDMULH(VQDMULH, qdmulh) -DO_3SAME_VQDMULH(VQRDMULH, qrdmulh) +DO_3SAME_VQDMULH(VQDMULH, gen_gvec_sqdmulh_qc) +DO_3SAME_VQDMULH(VQRDMULH, gen_gvec_sqrdmulh_qc) #define WRAP_FP_GVEC(WRAPNAME, FPST, FUNC) \ static void WRAPNAME(unsigned vece, uint32_t rd_ofs, \ @@ -1144,6 +985,9 @@ DO_3S_FP_GVEC(VFMA, gen_helper_gvec_vfma_s, gen_helper_gvec_vfma_h) DO_3S_FP_GVEC(VFMS, gen_helper_gvec_vfms_s, gen_helper_gvec_vfms_h) DO_3S_FP_GVEC(VRECPS, gen_helper_gvec_recps_nf_s, gen_helper_gvec_recps_nf_h) DO_3S_FP_GVEC(VRSQRTS, gen_helper_gvec_rsqrts_nf_s, gen_helper_gvec_rsqrts_nf_h) +DO_3S_FP_GVEC(VPADD, gen_helper_gvec_faddp_s, gen_helper_gvec_faddp_h) +DO_3S_FP_GVEC(VPMAX, gen_helper_gvec_fmaxp_s, gen_helper_gvec_fmaxp_h) +DO_3S_FP_GVEC(VPMIN, gen_helper_gvec_fminp_s, gen_helper_gvec_fminp_h) WRAP_FP_GVEC(gen_VMAXNM_fp32_3s, FPST_STD, gen_helper_gvec_fmaxnum_s) WRAP_FP_GVEC(gen_VMAXNM_fp16_3s, FPST_STD_F16, gen_helper_gvec_fmaxnum_h) @@ -1180,58 +1024,6 @@ static bool trans_VMINNM_fp_3s(DisasContext *s, arg_3same *a) return do_3same(s, a, gen_VMINNM_fp32_3s); } -static bool do_3same_fp_pair(DisasContext *s, arg_3same *a, - gen_helper_gvec_3_ptr *fn) -{ - /* FP pairwise operations */ - TCGv_ptr fpstatus; - - if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { - return false; - } - - /* UNDEF accesses to D16-D31 if they don't exist. */ - if (!dc_isar_feature(aa32_simd_r32, s) && - ((a->vd | a->vn | a->vm) & 0x10)) { - return false; - } - - if (!vfp_access_check(s)) { - return true; - } - - assert(a->q == 0); /* enforced by decode patterns */ - - - fpstatus = fpstatus_ptr(a->size == MO_16 ? FPST_STD_F16 : FPST_STD); - tcg_gen_gvec_3_ptr(vfp_reg_offset(1, a->vd), - vfp_reg_offset(1, a->vn), - vfp_reg_offset(1, a->vm), - fpstatus, 8, 8, 0, fn); - - return true; -} - -/* - * For all the functions using this macro, size == 1 means fp16, - * which is an architecture extension we don't implement yet. - */ -#define DO_3S_FP_PAIR(INSN,FUNC) \ - static bool trans_##INSN##_fp_3s(DisasContext *s, arg_3same *a) \ - { \ - if (a->size == MO_16) { \ - if (!dc_isar_feature(aa32_fp16_arith, s)) { \ - return false; \ - } \ - return do_3same_fp_pair(s, a, FUNC##h); \ - } \ - return do_3same_fp_pair(s, a, FUNC##s); \ - } - -DO_3S_FP_PAIR(VPADD, gen_helper_neon_padd) -DO_3S_FP_PAIR(VPMAX, gen_helper_neon_pmax) -DO_3S_FP_PAIR(VPMIN, gen_helper_neon_pmin) - static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn) { /* Handle a 2-reg-shift insn which can be vectorized. */ diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c index ada05aa530..798ab2bfb1 100644 --- a/target/arm/tcg/translate-sve.c +++ b/target/arm/tcg/translate-sve.c @@ -527,94 +527,6 @@ TRANS_FEAT(ORR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_or, a) TRANS_FEAT(EOR_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_xor, a) TRANS_FEAT(BIC_zzz, aa64_sve, gen_gvec_fn_arg_zzz, tcg_gen_gvec_andc, a) -static void gen_xar8_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) -{ - TCGv_i64 t = tcg_temp_new_i64(); - uint64_t mask = dup_const(MO_8, 0xff >> sh); - - tcg_gen_xor_i64(t, n, m); - tcg_gen_shri_i64(d, t, sh); - tcg_gen_shli_i64(t, t, 8 - sh); - tcg_gen_andi_i64(d, d, mask); - tcg_gen_andi_i64(t, t, ~mask); - tcg_gen_or_i64(d, d, t); -} - -static void gen_xar16_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) -{ - TCGv_i64 t = tcg_temp_new_i64(); - uint64_t mask = dup_const(MO_16, 0xffff >> sh); - - tcg_gen_xor_i64(t, n, m); - tcg_gen_shri_i64(d, t, sh); - tcg_gen_shli_i64(t, t, 16 - sh); - tcg_gen_andi_i64(d, d, mask); - tcg_gen_andi_i64(t, t, ~mask); - tcg_gen_or_i64(d, d, t); -} - -static void gen_xar_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, int32_t sh) -{ - tcg_gen_xor_i32(d, n, m); - tcg_gen_rotri_i32(d, d, sh); -} - -static void gen_xar_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, int64_t sh) -{ - tcg_gen_xor_i64(d, n, m); - tcg_gen_rotri_i64(d, d, sh); -} - -static void gen_xar_vec(unsigned vece, TCGv_vec d, TCGv_vec n, - TCGv_vec m, int64_t sh) -{ - tcg_gen_xor_vec(vece, d, n, m); - tcg_gen_rotri_vec(vece, d, d, sh); -} - -void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, int64_t shift, - uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop[] = { INDEX_op_rotli_vec, 0 }; - static const GVecGen3i ops[4] = { - { .fni8 = gen_xar8_i64, - .fniv = gen_xar_vec, - .fno = gen_helper_sve2_xar_b, - .opt_opc = vecop, - .vece = MO_8 }, - { .fni8 = gen_xar16_i64, - .fniv = gen_xar_vec, - .fno = gen_helper_sve2_xar_h, - .opt_opc = vecop, - .vece = MO_16 }, - { .fni4 = gen_xar_i32, - .fniv = gen_xar_vec, - .fno = gen_helper_sve2_xar_s, - .opt_opc = vecop, - .vece = MO_32 }, - { .fni8 = gen_xar_i64, - .fniv = gen_xar_vec, - .fno = gen_helper_gvec_xar_d, - .opt_opc = vecop, - .vece = MO_64 } - }; - int esize = 8 << vece; - - /* The SVE2 range is 1 .. esize; the AdvSIMD range is 0 .. esize-1. */ - tcg_debug_assert(shift >= 0); - tcg_debug_assert(shift <= esize); - shift &= esize - 1; - - if (shift == 0) { - /* xar with no rotate devolves to xor. */ - tcg_gen_gvec_xor(vece, rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz); - } else { - tcg_gen_gvec_3i(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, - shift, &ops[vece]); - } -} - static bool trans_XAR(DisasContext *s, arg_rrri_esz *a) { if (a->esz < 0 || !dc_isar_feature(aa64_sve2, s)) { @@ -629,61 +541,8 @@ static bool trans_XAR(DisasContext *s, arg_rrri_esz *a) return true; } -static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) -{ - tcg_gen_xor_i64(d, n, m); - tcg_gen_xor_i64(d, d, k); -} - -static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n, - TCGv_vec m, TCGv_vec k) -{ - tcg_gen_xor_vec(vece, d, n, m); - tcg_gen_xor_vec(vece, d, d, k); -} - -static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m, - uint32_t a, uint32_t oprsz, uint32_t maxsz) -{ - static const GVecGen4 op = { - .fni8 = gen_eor3_i64, - .fniv = gen_eor3_vec, - .fno = gen_helper_sve2_eor3, - .vece = MO_64, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - }; - tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); -} - -TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_eor3, a) - -static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) -{ - tcg_gen_andc_i64(d, m, k); - tcg_gen_xor_i64(d, d, n); -} - -static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n, - TCGv_vec m, TCGv_vec k) -{ - tcg_gen_andc_vec(vece, d, m, k); - tcg_gen_xor_vec(vece, d, d, n); -} - -static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m, - uint32_t a, uint32_t oprsz, uint32_t maxsz) -{ - static const GVecGen4 op = { - .fni8 = gen_bcax_i64, - .fniv = gen_bcax_vec, - .fno = gen_helper_sve2_bcax, - .vece = MO_64, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - }; - tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op); -} - -TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_bcax, a) +TRANS_FEAT(EOR3, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_gvec_eor3, a) +TRANS_FEAT(BCAX, aa64_sve2, gen_gvec_fn_arg_zzzz, gen_gvec_bcax, a) static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m, uint32_t a, uint32_t oprsz, uint32_t maxsz) diff --git a/target/arm/tcg/translate-vfp.c b/target/arm/tcg/translate-vfp.c index b9af03b7c3..39ec971ff7 100644 --- a/target/arm/tcg/translate-vfp.c +++ b/target/arm/tcg/translate-vfp.c @@ -48,6 +48,12 @@ static inline void vfp_store_reg32(TCGv_i32 var, int reg) tcg_gen_st_i32(var, tcg_env, vfp_reg_offset(false, reg)); } +static inline void vfp_load_reg16(TCGv_i32 var, int reg) +{ + tcg_gen_ld16u_i32(var, tcg_env, + vfp_reg_offset(false, reg) + HOST_BIG_ENDIAN * 2); +} + /* * The imm8 encodes the sign bit, enough bits to represent an exponent in * the range 01....1xx to 10....0xx, and the most significant 4 bits of @@ -902,8 +908,7 @@ static bool trans_VMOV_half(DisasContext *s, arg_VMOV_single *a) if (a->l) { /* VFP to general purpose register */ tmp = tcg_temp_new_i32(); - vfp_load_reg32(tmp, a->vn); - tcg_gen_andi_i32(tmp, tmp, 0xffff); + vfp_load_reg16(tmp, a->vn); store_reg(s, a->rt, tmp); } else { /* general purpose register to VFP */ @@ -1453,11 +1458,11 @@ static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn, fd = tcg_temp_new_i32(); fpst = fpstatus_ptr(FPST_FPCR_F16); - vfp_load_reg32(f0, vn); - vfp_load_reg32(f1, vm); + vfp_load_reg16(f0, vn); + vfp_load_reg16(f1, vm); if (reads_vd) { - vfp_load_reg32(fd, vd); + vfp_load_reg16(fd, vd); } fn(fd, f0, f1, fpst); vfp_store_reg32(fd, vd); @@ -1633,7 +1638,7 @@ static bool do_vfp_2op_hp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm) } f0 = tcg_temp_new_i32(); - vfp_load_reg32(f0, vm); + vfp_load_reg16(f0, vm); fn(f0, f0); vfp_store_reg32(f0, vd); @@ -1763,7 +1768,7 @@ static void gen_VMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) TCGv_i32 tmp = tcg_temp_new_i32(); gen_helper_vfp_mulh(tmp, vn, vm, fpst); - gen_helper_vfp_negh(tmp, tmp); + gen_vfp_negh(tmp, tmp); gen_helper_vfp_addh(vd, vd, tmp, fpst); } @@ -1781,7 +1786,7 @@ static void gen_VMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) TCGv_i32 tmp = tcg_temp_new_i32(); gen_helper_vfp_muls(tmp, vn, vm, fpst); - gen_helper_vfp_negs(tmp, tmp); + gen_vfp_negs(tmp, tmp); gen_helper_vfp_adds(vd, vd, tmp, fpst); } @@ -1799,7 +1804,7 @@ static void gen_VMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst) TCGv_i64 tmp = tcg_temp_new_i64(); gen_helper_vfp_muld(tmp, vn, vm, fpst); - gen_helper_vfp_negd(tmp, tmp); + gen_vfp_negd(tmp, tmp); gen_helper_vfp_addd(vd, vd, tmp, fpst); } @@ -1819,7 +1824,7 @@ static void gen_VNMLS_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) TCGv_i32 tmp = tcg_temp_new_i32(); gen_helper_vfp_mulh(tmp, vn, vm, fpst); - gen_helper_vfp_negh(vd, vd); + gen_vfp_negh(vd, vd); gen_helper_vfp_addh(vd, vd, tmp, fpst); } @@ -1839,7 +1844,7 @@ static void gen_VNMLS_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) TCGv_i32 tmp = tcg_temp_new_i32(); gen_helper_vfp_muls(tmp, vn, vm, fpst); - gen_helper_vfp_negs(vd, vd); + gen_vfp_negs(vd, vd); gen_helper_vfp_adds(vd, vd, tmp, fpst); } @@ -1859,7 +1864,7 @@ static void gen_VNMLS_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst) TCGv_i64 tmp = tcg_temp_new_i64(); gen_helper_vfp_muld(tmp, vn, vm, fpst); - gen_helper_vfp_negd(vd, vd); + gen_vfp_negd(vd, vd); gen_helper_vfp_addd(vd, vd, tmp, fpst); } @@ -1874,8 +1879,8 @@ static void gen_VNMLA_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) TCGv_i32 tmp = tcg_temp_new_i32(); gen_helper_vfp_mulh(tmp, vn, vm, fpst); - gen_helper_vfp_negh(tmp, tmp); - gen_helper_vfp_negh(vd, vd); + gen_vfp_negh(tmp, tmp); + gen_vfp_negh(vd, vd); gen_helper_vfp_addh(vd, vd, tmp, fpst); } @@ -1890,8 +1895,8 @@ static void gen_VNMLA_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) TCGv_i32 tmp = tcg_temp_new_i32(); gen_helper_vfp_muls(tmp, vn, vm, fpst); - gen_helper_vfp_negs(tmp, tmp); - gen_helper_vfp_negs(vd, vd); + gen_vfp_negs(tmp, tmp); + gen_vfp_negs(vd, vd); gen_helper_vfp_adds(vd, vd, tmp, fpst); } @@ -1906,8 +1911,8 @@ static void gen_VNMLA_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst) TCGv_i64 tmp = tcg_temp_new_i64(); gen_helper_vfp_muld(tmp, vn, vm, fpst); - gen_helper_vfp_negd(tmp, tmp); - gen_helper_vfp_negd(vd, vd); + gen_vfp_negd(tmp, tmp); + gen_vfp_negd(vd, vd); gen_helper_vfp_addd(vd, vd, tmp, fpst); } @@ -1935,7 +1940,7 @@ static void gen_VNMUL_hp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) { /* VNMUL: -(fn * fm) */ gen_helper_vfp_mulh(vd, vn, vm, fpst); - gen_helper_vfp_negh(vd, vd); + gen_vfp_negh(vd, vd); } static bool trans_VNMUL_hp(DisasContext *s, arg_VNMUL_sp *a) @@ -1947,7 +1952,7 @@ static void gen_VNMUL_sp(TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst) { /* VNMUL: -(fn * fm) */ gen_helper_vfp_muls(vd, vn, vm, fpst); - gen_helper_vfp_negs(vd, vd); + gen_vfp_negs(vd, vd); } static bool trans_VNMUL_sp(DisasContext *s, arg_VNMUL_sp *a) @@ -1959,7 +1964,7 @@ static void gen_VNMUL_dp(TCGv_i64 vd, TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst) { /* VNMUL: -(fn * fm) */ gen_helper_vfp_muld(vd, vn, vm, fpst); - gen_helper_vfp_negd(vd, vd); + gen_vfp_negd(vd, vd); } static bool trans_VNMUL_dp(DisasContext *s, arg_VNMUL_dp *a) @@ -2106,16 +2111,16 @@ static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d) vm = tcg_temp_new_i32(); vd = tcg_temp_new_i32(); - vfp_load_reg32(vn, a->vn); - vfp_load_reg32(vm, a->vm); + vfp_load_reg16(vn, a->vn); + vfp_load_reg16(vm, a->vm); if (neg_n) { /* VFNMS, VFMS */ - gen_helper_vfp_negh(vn, vn); + gen_vfp_negh(vn, vn); } - vfp_load_reg32(vd, a->vd); + vfp_load_reg16(vd, a->vd); if (neg_d) { /* VFNMA, VFNMS */ - gen_helper_vfp_negh(vd, vd); + gen_vfp_negh(vd, vd); } fpst = fpstatus_ptr(FPST_FPCR_F16); gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst); @@ -2169,12 +2174,12 @@ static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d) vfp_load_reg32(vm, a->vm); if (neg_n) { /* VFNMS, VFMS */ - gen_helper_vfp_negs(vn, vn); + gen_vfp_negs(vn, vn); } vfp_load_reg32(vd, a->vd); if (neg_d) { /* VFNMA, VFNMS */ - gen_helper_vfp_negs(vd, vd); + gen_vfp_negs(vd, vd); } fpst = fpstatus_ptr(FPST_FPCR); gen_helper_vfp_muladds(vd, vn, vm, vd, fpst); @@ -2234,12 +2239,12 @@ static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d) vfp_load_reg64(vm, a->vm); if (neg_n) { /* VFNMS, VFMS */ - gen_helper_vfp_negd(vn, vn); + gen_vfp_negd(vn, vn); } vfp_load_reg64(vd, a->vd); if (neg_d) { /* VFNMA, VFNMS */ - gen_helper_vfp_negd(vd, vd); + gen_vfp_negd(vd, vd); } fpst = fpstatus_ptr(FPST_FPCR); gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst); @@ -2409,13 +2414,13 @@ static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a) DO_VFP_VMOV(VMOV_reg, sp, tcg_gen_mov_i32) DO_VFP_VMOV(VMOV_reg, dp, tcg_gen_mov_i64) -DO_VFP_2OP(VABS, hp, gen_helper_vfp_absh, aa32_fp16_arith) -DO_VFP_2OP(VABS, sp, gen_helper_vfp_abss, aa32_fpsp_v2) -DO_VFP_2OP(VABS, dp, gen_helper_vfp_absd, aa32_fpdp_v2) +DO_VFP_2OP(VABS, hp, gen_vfp_absh, aa32_fp16_arith) +DO_VFP_2OP(VABS, sp, gen_vfp_abss, aa32_fpsp_v2) +DO_VFP_2OP(VABS, dp, gen_vfp_absd, aa32_fpdp_v2) -DO_VFP_2OP(VNEG, hp, gen_helper_vfp_negh, aa32_fp16_arith) -DO_VFP_2OP(VNEG, sp, gen_helper_vfp_negs, aa32_fpsp_v2) -DO_VFP_2OP(VNEG, dp, gen_helper_vfp_negd, aa32_fpdp_v2) +DO_VFP_2OP(VNEG, hp, gen_vfp_negh, aa32_fp16_arith) +DO_VFP_2OP(VNEG, sp, gen_vfp_negs, aa32_fpsp_v2) +DO_VFP_2OP(VNEG, dp, gen_vfp_negd, aa32_fpdp_v2) static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm) { @@ -2456,11 +2461,11 @@ static bool trans_VCMP_hp(DisasContext *s, arg_VCMP_sp *a) vd = tcg_temp_new_i32(); vm = tcg_temp_new_i32(); - vfp_load_reg32(vd, a->vd); + vfp_load_reg16(vd, a->vd); if (a->z) { tcg_gen_movi_i32(vm, 0); } else { - vfp_load_reg32(vm, a->vm); + vfp_load_reg16(vm, a->vm); } if (a->e) { @@ -2700,7 +2705,7 @@ static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a) } tmp = tcg_temp_new_i32(); - vfp_load_reg32(tmp, a->vm); + vfp_load_reg16(tmp, a->vm); fpst = fpstatus_ptr(FPST_FPCR_F16); gen_helper_rinth(tmp, tmp, fpst); vfp_store_reg32(tmp, a->vd); @@ -2773,7 +2778,7 @@ static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a) } tmp = tcg_temp_new_i32(); - vfp_load_reg32(tmp, a->vm); + vfp_load_reg16(tmp, a->vm); fpst = fpstatus_ptr(FPST_FPCR_F16); tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst); gen_helper_rinth(tmp, tmp, fpst); @@ -2853,7 +2858,7 @@ static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a) } tmp = tcg_temp_new_i32(); - vfp_load_reg32(tmp, a->vm); + vfp_load_reg16(tmp, a->vm); fpst = fpstatus_ptr(FPST_FPCR_F16); gen_helper_rinth_exact(tmp, tmp, fpst); vfp_store_reg32(tmp, a->vd); @@ -3270,7 +3275,7 @@ static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a) fpst = fpstatus_ptr(FPST_FPCR_F16); vm = tcg_temp_new_i32(); - vfp_load_reg32(vm, a->vm); + vfp_load_reg16(vm, a->vm); if (a->s) { if (a->rz) { @@ -3383,8 +3388,8 @@ static bool trans_VINS(DisasContext *s, arg_VINS *a) /* Insert low half of Vm into high half of Vd */ rm = tcg_temp_new_i32(); rd = tcg_temp_new_i32(); - vfp_load_reg32(rm, a->vm); - vfp_load_reg32(rd, a->vd); + vfp_load_reg16(rm, a->vm); + vfp_load_reg16(rd, a->vd); tcg_gen_deposit_i32(rd, rd, rm, 16, 16); vfp_store_reg32(rd, a->vd); return true; diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c index dc49a8d806..c5bc691d92 100644 --- a/target/arm/tcg/translate.c +++ b/target/arm/tcg/translate.c @@ -23,7 +23,6 @@ #include "translate.h" #include "translate-a32.h" #include "qemu/log.h" -#include "disas/disas.h" #include "arm_ldst.h" #include "semihosting/semihost.h" #include "cpregs.h" @@ -2913,1594 +2912,6 @@ static void gen_exception_return(DisasContext *s, TCGv_i32 pc) gen_rfe(s, pc, load_cpu_field(spsr)); } -static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, - uint32_t opr_sz, uint32_t max_sz, - gen_helper_gvec_3_ptr *fn) -{ - TCGv_ptr qc_ptr = tcg_temp_new_ptr(); - - tcg_gen_addi_ptr(qc_ptr, tcg_env, offsetof(CPUARMState, vfp.qc)); - tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr, - opr_sz, max_sz, 0, fn); -} - -void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) -{ - static gen_helper_gvec_3_ptr * const fns[2] = { - gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32 - }; - tcg_debug_assert(vece >= 1 && vece <= 2); - gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); -} - -void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) -{ - static gen_helper_gvec_3_ptr * const fns[2] = { - gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32 - }; - tcg_debug_assert(vece >= 1 && vece <= 2); - gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]); -} - -#define GEN_CMP0(NAME, COND) \ - void NAME(unsigned vece, uint32_t d, uint32_t m, \ - uint32_t opr_sz, uint32_t max_sz) \ - { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); } - -GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ) -GEN_CMP0(gen_gvec_cle0, TCG_COND_LE) -GEN_CMP0(gen_gvec_cge0, TCG_COND_GE) -GEN_CMP0(gen_gvec_clt0, TCG_COND_LT) -GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT) - -#undef GEN_CMP0 - -static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_vec_sar8i_i64(a, a, shift); - tcg_gen_vec_add8_i64(d, d, a); -} - -static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_vec_sar16i_i64(a, a, shift); - tcg_gen_vec_add16_i64(d, d, a); -} - -static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) -{ - tcg_gen_sari_i32(a, a, shift); - tcg_gen_add_i32(d, d, a); -} - -static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_sari_i64(a, a, shift); - tcg_gen_add_i64(d, d, a); -} - -static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) -{ - tcg_gen_sari_vec(vece, a, a, sh); - tcg_gen_add_vec(vece, d, d, a); -} - -void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, - int64_t shift, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_sari_vec, INDEX_op_add_vec, 0 - }; - static const GVecGen2i ops[4] = { - { .fni8 = gen_ssra8_i64, - .fniv = gen_ssra_vec, - .fno = gen_helper_gvec_ssra_b, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_8 }, - { .fni8 = gen_ssra16_i64, - .fniv = gen_ssra_vec, - .fno = gen_helper_gvec_ssra_h, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_16 }, - { .fni4 = gen_ssra32_i32, - .fniv = gen_ssra_vec, - .fno = gen_helper_gvec_ssra_s, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_32 }, - { .fni8 = gen_ssra64_i64, - .fniv = gen_ssra_vec, - .fno = gen_helper_gvec_ssra_d, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .opt_opc = vecop_list, - .load_dest = true, - .vece = MO_64 }, - }; - - /* tszimm encoding produces immediates in the range [1..esize]. */ - tcg_debug_assert(shift > 0); - tcg_debug_assert(shift <= (8 << vece)); - - /* - * Shifts larger than the element size are architecturally valid. - * Signed results in all sign bits. - */ - shift = MIN(shift, (8 << vece) - 1); - tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); -} - -static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_vec_shr8i_i64(a, a, shift); - tcg_gen_vec_add8_i64(d, d, a); -} - -static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_vec_shr16i_i64(a, a, shift); - tcg_gen_vec_add16_i64(d, d, a); -} - -static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) -{ - tcg_gen_shri_i32(a, a, shift); - tcg_gen_add_i32(d, d, a); -} - -static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_shri_i64(a, a, shift); - tcg_gen_add_i64(d, d, a); -} - -static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) -{ - tcg_gen_shri_vec(vece, a, a, sh); - tcg_gen_add_vec(vece, d, d, a); -} - -void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, - int64_t shift, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_shri_vec, INDEX_op_add_vec, 0 - }; - static const GVecGen2i ops[4] = { - { .fni8 = gen_usra8_i64, - .fniv = gen_usra_vec, - .fno = gen_helper_gvec_usra_b, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_8, }, - { .fni8 = gen_usra16_i64, - .fniv = gen_usra_vec, - .fno = gen_helper_gvec_usra_h, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_16, }, - { .fni4 = gen_usra32_i32, - .fniv = gen_usra_vec, - .fno = gen_helper_gvec_usra_s, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_32, }, - { .fni8 = gen_usra64_i64, - .fniv = gen_usra_vec, - .fno = gen_helper_gvec_usra_d, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_64, }, - }; - - /* tszimm encoding produces immediates in the range [1..esize]. */ - tcg_debug_assert(shift > 0); - tcg_debug_assert(shift <= (8 << vece)); - - /* - * Shifts larger than the element size are architecturally valid. - * Unsigned results in all zeros as input to accumulate: nop. - */ - if (shift < (8 << vece)) { - tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); - } else { - /* Nop, but we do need to clear the tail. */ - tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); - } -} - -/* - * Shift one less than the requested amount, and the low bit is - * the rounding bit. For the 8 and 16-bit operations, because we - * mask the low bit, we can perform a normal integer shift instead - * of a vector shift. - */ -static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) -{ - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_shri_i64(t, a, sh - 1); - tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); - tcg_gen_vec_sar8i_i64(d, a, sh); - tcg_gen_vec_add8_i64(d, d, t); -} - -static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) -{ - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_shri_i64(t, a, sh - 1); - tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); - tcg_gen_vec_sar16i_i64(d, a, sh); - tcg_gen_vec_add16_i64(d, d, t); -} - -static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) -{ - TCGv_i32 t; - - /* Handle shift by the input size for the benefit of trans_SRSHR_ri */ - if (sh == 32) { - tcg_gen_movi_i32(d, 0); - return; - } - t = tcg_temp_new_i32(); - tcg_gen_extract_i32(t, a, sh - 1, 1); - tcg_gen_sari_i32(d, a, sh); - tcg_gen_add_i32(d, d, t); -} - -static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) -{ - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_extract_i64(t, a, sh - 1, 1); - tcg_gen_sari_i64(d, a, sh); - tcg_gen_add_i64(d, d, t); -} - -static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) -{ - TCGv_vec t = tcg_temp_new_vec_matching(d); - TCGv_vec ones = tcg_temp_new_vec_matching(d); - - tcg_gen_shri_vec(vece, t, a, sh - 1); - tcg_gen_dupi_vec(vece, ones, 1); - tcg_gen_and_vec(vece, t, t, ones); - tcg_gen_sari_vec(vece, d, a, sh); - tcg_gen_add_vec(vece, d, d, t); -} - -void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, - int64_t shift, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 - }; - static const GVecGen2i ops[4] = { - { .fni8 = gen_srshr8_i64, - .fniv = gen_srshr_vec, - .fno = gen_helper_gvec_srshr_b, - .opt_opc = vecop_list, - .vece = MO_8 }, - { .fni8 = gen_srshr16_i64, - .fniv = gen_srshr_vec, - .fno = gen_helper_gvec_srshr_h, - .opt_opc = vecop_list, - .vece = MO_16 }, - { .fni4 = gen_srshr32_i32, - .fniv = gen_srshr_vec, - .fno = gen_helper_gvec_srshr_s, - .opt_opc = vecop_list, - .vece = MO_32 }, - { .fni8 = gen_srshr64_i64, - .fniv = gen_srshr_vec, - .fno = gen_helper_gvec_srshr_d, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .opt_opc = vecop_list, - .vece = MO_64 }, - }; - - /* tszimm encoding produces immediates in the range [1..esize] */ - tcg_debug_assert(shift > 0); - tcg_debug_assert(shift <= (8 << vece)); - - if (shift == (8 << vece)) { - /* - * Shifts larger than the element size are architecturally valid. - * Signed results in all sign bits. With rounding, this produces - * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. - * I.e. always zero. - */ - tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0); - } else { - tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); - } -} - -static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) -{ - TCGv_i64 t = tcg_temp_new_i64(); - - gen_srshr8_i64(t, a, sh); - tcg_gen_vec_add8_i64(d, d, t); -} - -static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) -{ - TCGv_i64 t = tcg_temp_new_i64(); - - gen_srshr16_i64(t, a, sh); - tcg_gen_vec_add16_i64(d, d, t); -} - -static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) -{ - TCGv_i32 t = tcg_temp_new_i32(); - - gen_srshr32_i32(t, a, sh); - tcg_gen_add_i32(d, d, t); -} - -static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) -{ - TCGv_i64 t = tcg_temp_new_i64(); - - gen_srshr64_i64(t, a, sh); - tcg_gen_add_i64(d, d, t); -} - -static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) -{ - TCGv_vec t = tcg_temp_new_vec_matching(d); - - gen_srshr_vec(vece, t, a, sh); - tcg_gen_add_vec(vece, d, d, t); -} - -void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, - int64_t shift, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0 - }; - static const GVecGen2i ops[4] = { - { .fni8 = gen_srsra8_i64, - .fniv = gen_srsra_vec, - .fno = gen_helper_gvec_srsra_b, - .opt_opc = vecop_list, - .load_dest = true, - .vece = MO_8 }, - { .fni8 = gen_srsra16_i64, - .fniv = gen_srsra_vec, - .fno = gen_helper_gvec_srsra_h, - .opt_opc = vecop_list, - .load_dest = true, - .vece = MO_16 }, - { .fni4 = gen_srsra32_i32, - .fniv = gen_srsra_vec, - .fno = gen_helper_gvec_srsra_s, - .opt_opc = vecop_list, - .load_dest = true, - .vece = MO_32 }, - { .fni8 = gen_srsra64_i64, - .fniv = gen_srsra_vec, - .fno = gen_helper_gvec_srsra_d, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .opt_opc = vecop_list, - .load_dest = true, - .vece = MO_64 }, - }; - - /* tszimm encoding produces immediates in the range [1..esize] */ - tcg_debug_assert(shift > 0); - tcg_debug_assert(shift <= (8 << vece)); - - /* - * Shifts larger than the element size are architecturally valid. - * Signed results in all sign bits. With rounding, this produces - * (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0. - * I.e. always zero. With accumulation, this leaves D unchanged. - */ - if (shift == (8 << vece)) { - /* Nop, but we do need to clear the tail. */ - tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); - } else { - tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); - } -} - -static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) -{ - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_shri_i64(t, a, sh - 1); - tcg_gen_andi_i64(t, t, dup_const(MO_8, 1)); - tcg_gen_vec_shr8i_i64(d, a, sh); - tcg_gen_vec_add8_i64(d, d, t); -} - -static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) -{ - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_shri_i64(t, a, sh - 1); - tcg_gen_andi_i64(t, t, dup_const(MO_16, 1)); - tcg_gen_vec_shr16i_i64(d, a, sh); - tcg_gen_vec_add16_i64(d, d, t); -} - -static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) -{ - TCGv_i32 t; - - /* Handle shift by the input size for the benefit of trans_URSHR_ri */ - if (sh == 32) { - tcg_gen_extract_i32(d, a, sh - 1, 1); - return; - } - t = tcg_temp_new_i32(); - tcg_gen_extract_i32(t, a, sh - 1, 1); - tcg_gen_shri_i32(d, a, sh); - tcg_gen_add_i32(d, d, t); -} - -static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) -{ - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_extract_i64(t, a, sh - 1, 1); - tcg_gen_shri_i64(d, a, sh); - tcg_gen_add_i64(d, d, t); -} - -static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift) -{ - TCGv_vec t = tcg_temp_new_vec_matching(d); - TCGv_vec ones = tcg_temp_new_vec_matching(d); - - tcg_gen_shri_vec(vece, t, a, shift - 1); - tcg_gen_dupi_vec(vece, ones, 1); - tcg_gen_and_vec(vece, t, t, ones); - tcg_gen_shri_vec(vece, d, a, shift); - tcg_gen_add_vec(vece, d, d, t); -} - -void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, - int64_t shift, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_shri_vec, INDEX_op_add_vec, 0 - }; - static const GVecGen2i ops[4] = { - { .fni8 = gen_urshr8_i64, - .fniv = gen_urshr_vec, - .fno = gen_helper_gvec_urshr_b, - .opt_opc = vecop_list, - .vece = MO_8 }, - { .fni8 = gen_urshr16_i64, - .fniv = gen_urshr_vec, - .fno = gen_helper_gvec_urshr_h, - .opt_opc = vecop_list, - .vece = MO_16 }, - { .fni4 = gen_urshr32_i32, - .fniv = gen_urshr_vec, - .fno = gen_helper_gvec_urshr_s, - .opt_opc = vecop_list, - .vece = MO_32 }, - { .fni8 = gen_urshr64_i64, - .fniv = gen_urshr_vec, - .fno = gen_helper_gvec_urshr_d, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .opt_opc = vecop_list, - .vece = MO_64 }, - }; - - /* tszimm encoding produces immediates in the range [1..esize] */ - tcg_debug_assert(shift > 0); - tcg_debug_assert(shift <= (8 << vece)); - - if (shift == (8 << vece)) { - /* - * Shifts larger than the element size are architecturally valid. - * Unsigned results in zero. With rounding, this produces a - * copy of the most significant bit. - */ - tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz); - } else { - tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); - } -} - -static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) -{ - TCGv_i64 t = tcg_temp_new_i64(); - - if (sh == 8) { - tcg_gen_vec_shr8i_i64(t, a, 7); - } else { - gen_urshr8_i64(t, a, sh); - } - tcg_gen_vec_add8_i64(d, d, t); -} - -static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) -{ - TCGv_i64 t = tcg_temp_new_i64(); - - if (sh == 16) { - tcg_gen_vec_shr16i_i64(t, a, 15); - } else { - gen_urshr16_i64(t, a, sh); - } - tcg_gen_vec_add16_i64(d, d, t); -} - -static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh) -{ - TCGv_i32 t = tcg_temp_new_i32(); - - if (sh == 32) { - tcg_gen_shri_i32(t, a, 31); - } else { - gen_urshr32_i32(t, a, sh); - } - tcg_gen_add_i32(d, d, t); -} - -static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh) -{ - TCGv_i64 t = tcg_temp_new_i64(); - - if (sh == 64) { - tcg_gen_shri_i64(t, a, 63); - } else { - gen_urshr64_i64(t, a, sh); - } - tcg_gen_add_i64(d, d, t); -} - -static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) -{ - TCGv_vec t = tcg_temp_new_vec_matching(d); - - if (sh == (8 << vece)) { - tcg_gen_shri_vec(vece, t, a, sh - 1); - } else { - gen_urshr_vec(vece, t, a, sh); - } - tcg_gen_add_vec(vece, d, d, t); -} - -void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, - int64_t shift, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_shri_vec, INDEX_op_add_vec, 0 - }; - static const GVecGen2i ops[4] = { - { .fni8 = gen_ursra8_i64, - .fniv = gen_ursra_vec, - .fno = gen_helper_gvec_ursra_b, - .opt_opc = vecop_list, - .load_dest = true, - .vece = MO_8 }, - { .fni8 = gen_ursra16_i64, - .fniv = gen_ursra_vec, - .fno = gen_helper_gvec_ursra_h, - .opt_opc = vecop_list, - .load_dest = true, - .vece = MO_16 }, - { .fni4 = gen_ursra32_i32, - .fniv = gen_ursra_vec, - .fno = gen_helper_gvec_ursra_s, - .opt_opc = vecop_list, - .load_dest = true, - .vece = MO_32 }, - { .fni8 = gen_ursra64_i64, - .fniv = gen_ursra_vec, - .fno = gen_helper_gvec_ursra_d, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .opt_opc = vecop_list, - .load_dest = true, - .vece = MO_64 }, - }; - - /* tszimm encoding produces immediates in the range [1..esize] */ - tcg_debug_assert(shift > 0); - tcg_debug_assert(shift <= (8 << vece)); - - tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); -} - -static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - uint64_t mask = dup_const(MO_8, 0xff >> shift); - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_shri_i64(t, a, shift); - tcg_gen_andi_i64(t, t, mask); - tcg_gen_andi_i64(d, d, ~mask); - tcg_gen_or_i64(d, d, t); -} - -static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - uint64_t mask = dup_const(MO_16, 0xffff >> shift); - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_shri_i64(t, a, shift); - tcg_gen_andi_i64(t, t, mask); - tcg_gen_andi_i64(d, d, ~mask); - tcg_gen_or_i64(d, d, t); -} - -static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) -{ - tcg_gen_shri_i32(a, a, shift); - tcg_gen_deposit_i32(d, d, a, 0, 32 - shift); -} - -static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_shri_i64(a, a, shift); - tcg_gen_deposit_i64(d, d, a, 0, 64 - shift); -} - -static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) -{ - TCGv_vec t = tcg_temp_new_vec_matching(d); - TCGv_vec m = tcg_temp_new_vec_matching(d); - - tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh)); - tcg_gen_shri_vec(vece, t, a, sh); - tcg_gen_and_vec(vece, d, d, m); - tcg_gen_or_vec(vece, d, d, t); -} - -void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, - int64_t shift, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 }; - const GVecGen2i ops[4] = { - { .fni8 = gen_shr8_ins_i64, - .fniv = gen_shr_ins_vec, - .fno = gen_helper_gvec_sri_b, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_8 }, - { .fni8 = gen_shr16_ins_i64, - .fniv = gen_shr_ins_vec, - .fno = gen_helper_gvec_sri_h, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_16 }, - { .fni4 = gen_shr32_ins_i32, - .fniv = gen_shr_ins_vec, - .fno = gen_helper_gvec_sri_s, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_32 }, - { .fni8 = gen_shr64_ins_i64, - .fniv = gen_shr_ins_vec, - .fno = gen_helper_gvec_sri_d, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_64 }, - }; - - /* tszimm encoding produces immediates in the range [1..esize]. */ - tcg_debug_assert(shift > 0); - tcg_debug_assert(shift <= (8 << vece)); - - /* Shift of esize leaves destination unchanged. */ - if (shift < (8 << vece)) { - tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); - } else { - /* Nop, but we do need to clear the tail. */ - tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz); - } -} - -static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - uint64_t mask = dup_const(MO_8, 0xff << shift); - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_shli_i64(t, a, shift); - tcg_gen_andi_i64(t, t, mask); - tcg_gen_andi_i64(d, d, ~mask); - tcg_gen_or_i64(d, d, t); -} - -static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - uint64_t mask = dup_const(MO_16, 0xffff << shift); - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_shli_i64(t, a, shift); - tcg_gen_andi_i64(t, t, mask); - tcg_gen_andi_i64(d, d, ~mask); - tcg_gen_or_i64(d, d, t); -} - -static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift) -{ - tcg_gen_deposit_i32(d, d, a, shift, 32 - shift); -} - -static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_deposit_i64(d, d, a, shift, 64 - shift); -} - -static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) -{ - TCGv_vec t = tcg_temp_new_vec_matching(d); - TCGv_vec m = tcg_temp_new_vec_matching(d); - - tcg_gen_shli_vec(vece, t, a, sh); - tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh)); - tcg_gen_and_vec(vece, d, d, m); - tcg_gen_or_vec(vece, d, d, t); -} - -void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, - int64_t shift, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 }; - const GVecGen2i ops[4] = { - { .fni8 = gen_shl8_ins_i64, - .fniv = gen_shl_ins_vec, - .fno = gen_helper_gvec_sli_b, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_8 }, - { .fni8 = gen_shl16_ins_i64, - .fniv = gen_shl_ins_vec, - .fno = gen_helper_gvec_sli_h, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_16 }, - { .fni4 = gen_shl32_ins_i32, - .fniv = gen_shl_ins_vec, - .fno = gen_helper_gvec_sli_s, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_32 }, - { .fni8 = gen_shl64_ins_i64, - .fniv = gen_shl_ins_vec, - .fno = gen_helper_gvec_sli_d, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_64 }, - }; - - /* tszimm encoding produces immediates in the range [0..esize-1]. */ - tcg_debug_assert(shift >= 0); - tcg_debug_assert(shift < (8 << vece)); - - if (shift == 0) { - tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz); - } else { - tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]); - } -} - -static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - gen_helper_neon_mul_u8(a, a, b); - gen_helper_neon_add_u8(d, d, a); -} - -static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - gen_helper_neon_mul_u8(a, a, b); - gen_helper_neon_sub_u8(d, d, a); -} - -static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - gen_helper_neon_mul_u16(a, a, b); - gen_helper_neon_add_u16(d, d, a); -} - -static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - gen_helper_neon_mul_u16(a, a, b); - gen_helper_neon_sub_u16(d, d, a); -} - -static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - tcg_gen_mul_i32(a, a, b); - tcg_gen_add_i32(d, d, a); -} - -static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - tcg_gen_mul_i32(a, a, b); - tcg_gen_sub_i32(d, d, a); -} - -static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) -{ - tcg_gen_mul_i64(a, a, b); - tcg_gen_add_i64(d, d, a); -} - -static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) -{ - tcg_gen_mul_i64(a, a, b); - tcg_gen_sub_i64(d, d, a); -} - -static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) -{ - tcg_gen_mul_vec(vece, a, a, b); - tcg_gen_add_vec(vece, d, d, a); -} - -static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) -{ - tcg_gen_mul_vec(vece, a, a, b); - tcg_gen_sub_vec(vece, d, d, a); -} - -/* Note that while NEON does not support VMLA and VMLS as 64-bit ops, - * these tables are shared with AArch64 which does support them. - */ -void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_mul_vec, INDEX_op_add_vec, 0 - }; - static const GVecGen3 ops[4] = { - { .fni4 = gen_mla8_i32, - .fniv = gen_mla_vec, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_8 }, - { .fni4 = gen_mla16_i32, - .fniv = gen_mla_vec, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_16 }, - { .fni4 = gen_mla32_i32, - .fniv = gen_mla_vec, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_32 }, - { .fni8 = gen_mla64_i64, - .fniv = gen_mla_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_64 }, - }; - tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); -} - -void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_mul_vec, INDEX_op_sub_vec, 0 - }; - static const GVecGen3 ops[4] = { - { .fni4 = gen_mls8_i32, - .fniv = gen_mls_vec, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_8 }, - { .fni4 = gen_mls16_i32, - .fniv = gen_mls_vec, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_16 }, - { .fni4 = gen_mls32_i32, - .fniv = gen_mls_vec, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_32 }, - { .fni8 = gen_mls64_i64, - .fniv = gen_mls_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .opt_opc = vecop_list, - .vece = MO_64 }, - }; - tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); -} - -/* CMTST : test is "if (X & Y != 0)". */ -static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - tcg_gen_and_i32(d, a, b); - tcg_gen_negsetcond_i32(TCG_COND_NE, d, d, tcg_constant_i32(0)); -} - -void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) -{ - tcg_gen_and_i64(d, a, b); - tcg_gen_negsetcond_i64(TCG_COND_NE, d, d, tcg_constant_i64(0)); -} - -static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) -{ - tcg_gen_and_vec(vece, d, a, b); - tcg_gen_dupi_vec(vece, a, 0); - tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a); -} - -void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 }; - static const GVecGen3 ops[4] = { - { .fni4 = gen_helper_neon_tst_u8, - .fniv = gen_cmtst_vec, - .opt_opc = vecop_list, - .vece = MO_8 }, - { .fni4 = gen_helper_neon_tst_u16, - .fniv = gen_cmtst_vec, - .opt_opc = vecop_list, - .vece = MO_16 }, - { .fni4 = gen_cmtst_i32, - .fniv = gen_cmtst_vec, - .opt_opc = vecop_list, - .vece = MO_32 }, - { .fni8 = gen_cmtst_i64, - .fniv = gen_cmtst_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .opt_opc = vecop_list, - .vece = MO_64 }, - }; - tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); -} - -void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) -{ - TCGv_i32 lval = tcg_temp_new_i32(); - TCGv_i32 rval = tcg_temp_new_i32(); - TCGv_i32 lsh = tcg_temp_new_i32(); - TCGv_i32 rsh = tcg_temp_new_i32(); - TCGv_i32 zero = tcg_constant_i32(0); - TCGv_i32 max = tcg_constant_i32(32); - - /* - * Rely on the TCG guarantee that out of range shifts produce - * unspecified results, not undefined behaviour (i.e. no trap). - * Discard out-of-range results after the fact. - */ - tcg_gen_ext8s_i32(lsh, shift); - tcg_gen_neg_i32(rsh, lsh); - tcg_gen_shl_i32(lval, src, lsh); - tcg_gen_shr_i32(rval, src, rsh); - tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero); - tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst); -} - -void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) -{ - TCGv_i64 lval = tcg_temp_new_i64(); - TCGv_i64 rval = tcg_temp_new_i64(); - TCGv_i64 lsh = tcg_temp_new_i64(); - TCGv_i64 rsh = tcg_temp_new_i64(); - TCGv_i64 zero = tcg_constant_i64(0); - TCGv_i64 max = tcg_constant_i64(64); - - /* - * Rely on the TCG guarantee that out of range shifts produce - * unspecified results, not undefined behaviour (i.e. no trap). - * Discard out-of-range results after the fact. - */ - tcg_gen_ext8s_i64(lsh, shift); - tcg_gen_neg_i64(rsh, lsh); - tcg_gen_shl_i64(lval, src, lsh); - tcg_gen_shr_i64(rval, src, rsh); - tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero); - tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst); -} - -static void gen_ushl_vec(unsigned vece, TCGv_vec dst, - TCGv_vec src, TCGv_vec shift) -{ - TCGv_vec lval = tcg_temp_new_vec_matching(dst); - TCGv_vec rval = tcg_temp_new_vec_matching(dst); - TCGv_vec lsh = tcg_temp_new_vec_matching(dst); - TCGv_vec rsh = tcg_temp_new_vec_matching(dst); - TCGv_vec msk, max; - - tcg_gen_neg_vec(vece, rsh, shift); - if (vece == MO_8) { - tcg_gen_mov_vec(lsh, shift); - } else { - msk = tcg_temp_new_vec_matching(dst); - tcg_gen_dupi_vec(vece, msk, 0xff); - tcg_gen_and_vec(vece, lsh, shift, msk); - tcg_gen_and_vec(vece, rsh, rsh, msk); - } - - /* - * Rely on the TCG guarantee that out of range shifts produce - * unspecified results, not undefined behaviour (i.e. no trap). - * Discard out-of-range results after the fact. - */ - tcg_gen_shlv_vec(vece, lval, src, lsh); - tcg_gen_shrv_vec(vece, rval, src, rsh); - - max = tcg_temp_new_vec_matching(dst); - tcg_gen_dupi_vec(vece, max, 8 << vece); - - /* - * The choice of LT (signed) and GEU (unsigned) are biased toward - * the instructions of the x86_64 host. For MO_8, the whole byte - * is significant so we must use an unsigned compare; otherwise we - * have already masked to a byte and so a signed compare works. - * Other tcg hosts have a full set of comparisons and do not care. - */ - if (vece == MO_8) { - tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max); - tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max); - tcg_gen_andc_vec(vece, lval, lval, lsh); - tcg_gen_andc_vec(vece, rval, rval, rsh); - } else { - tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max); - tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max); - tcg_gen_and_vec(vece, lval, lval, lsh); - tcg_gen_and_vec(vece, rval, rval, rsh); - } - tcg_gen_or_vec(vece, dst, lval, rval); -} - -void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_neg_vec, INDEX_op_shlv_vec, - INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0 - }; - static const GVecGen3 ops[4] = { - { .fniv = gen_ushl_vec, - .fno = gen_helper_gvec_ushl_b, - .opt_opc = vecop_list, - .vece = MO_8 }, - { .fniv = gen_ushl_vec, - .fno = gen_helper_gvec_ushl_h, - .opt_opc = vecop_list, - .vece = MO_16 }, - { .fni4 = gen_ushl_i32, - .fniv = gen_ushl_vec, - .opt_opc = vecop_list, - .vece = MO_32 }, - { .fni8 = gen_ushl_i64, - .fniv = gen_ushl_vec, - .opt_opc = vecop_list, - .vece = MO_64 }, - }; - tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); -} - -void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift) -{ - TCGv_i32 lval = tcg_temp_new_i32(); - TCGv_i32 rval = tcg_temp_new_i32(); - TCGv_i32 lsh = tcg_temp_new_i32(); - TCGv_i32 rsh = tcg_temp_new_i32(); - TCGv_i32 zero = tcg_constant_i32(0); - TCGv_i32 max = tcg_constant_i32(31); - - /* - * Rely on the TCG guarantee that out of range shifts produce - * unspecified results, not undefined behaviour (i.e. no trap). - * Discard out-of-range results after the fact. - */ - tcg_gen_ext8s_i32(lsh, shift); - tcg_gen_neg_i32(rsh, lsh); - tcg_gen_shl_i32(lval, src, lsh); - tcg_gen_umin_i32(rsh, rsh, max); - tcg_gen_sar_i32(rval, src, rsh); - tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero); - tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval); -} - -void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift) -{ - TCGv_i64 lval = tcg_temp_new_i64(); - TCGv_i64 rval = tcg_temp_new_i64(); - TCGv_i64 lsh = tcg_temp_new_i64(); - TCGv_i64 rsh = tcg_temp_new_i64(); - TCGv_i64 zero = tcg_constant_i64(0); - TCGv_i64 max = tcg_constant_i64(63); - - /* - * Rely on the TCG guarantee that out of range shifts produce - * unspecified results, not undefined behaviour (i.e. no trap). - * Discard out-of-range results after the fact. - */ - tcg_gen_ext8s_i64(lsh, shift); - tcg_gen_neg_i64(rsh, lsh); - tcg_gen_shl_i64(lval, src, lsh); - tcg_gen_umin_i64(rsh, rsh, max); - tcg_gen_sar_i64(rval, src, rsh); - tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero); - tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval); -} - -static void gen_sshl_vec(unsigned vece, TCGv_vec dst, - TCGv_vec src, TCGv_vec shift) -{ - TCGv_vec lval = tcg_temp_new_vec_matching(dst); - TCGv_vec rval = tcg_temp_new_vec_matching(dst); - TCGv_vec lsh = tcg_temp_new_vec_matching(dst); - TCGv_vec rsh = tcg_temp_new_vec_matching(dst); - TCGv_vec tmp = tcg_temp_new_vec_matching(dst); - - /* - * Rely on the TCG guarantee that out of range shifts produce - * unspecified results, not undefined behaviour (i.e. no trap). - * Discard out-of-range results after the fact. - */ - tcg_gen_neg_vec(vece, rsh, shift); - if (vece == MO_8) { - tcg_gen_mov_vec(lsh, shift); - } else { - tcg_gen_dupi_vec(vece, tmp, 0xff); - tcg_gen_and_vec(vece, lsh, shift, tmp); - tcg_gen_and_vec(vece, rsh, rsh, tmp); - } - - /* Bound rsh so out of bound right shift gets -1. */ - tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1); - tcg_gen_umin_vec(vece, rsh, rsh, tmp); - tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp); - - tcg_gen_shlv_vec(vece, lval, src, lsh); - tcg_gen_sarv_vec(vece, rval, src, rsh); - - /* Select in-bound left shift. */ - tcg_gen_andc_vec(vece, lval, lval, tmp); - - /* Select between left and right shift. */ - if (vece == MO_8) { - tcg_gen_dupi_vec(vece, tmp, 0); - tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval); - } else { - tcg_gen_dupi_vec(vece, tmp, 0x80); - tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval); - } -} - -void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec, - INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0 - }; - static const GVecGen3 ops[4] = { - { .fniv = gen_sshl_vec, - .fno = gen_helper_gvec_sshl_b, - .opt_opc = vecop_list, - .vece = MO_8 }, - { .fniv = gen_sshl_vec, - .fno = gen_helper_gvec_sshl_h, - .opt_opc = vecop_list, - .vece = MO_16 }, - { .fni4 = gen_sshl_i32, - .fniv = gen_sshl_vec, - .opt_opc = vecop_list, - .vece = MO_32 }, - { .fni8 = gen_sshl_i64, - .fniv = gen_sshl_vec, - .opt_opc = vecop_list, - .vece = MO_64 }, - }; - tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); -} - -static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, - TCGv_vec a, TCGv_vec b) -{ - TCGv_vec x = tcg_temp_new_vec_matching(t); - tcg_gen_add_vec(vece, x, a, b); - tcg_gen_usadd_vec(vece, t, a, b); - tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); - tcg_gen_or_vec(vece, sat, sat, x); -} - -void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0 - }; - static const GVecGen4 ops[4] = { - { .fniv = gen_uqadd_vec, - .fno = gen_helper_gvec_uqadd_b, - .write_aofs = true, - .opt_opc = vecop_list, - .vece = MO_8 }, - { .fniv = gen_uqadd_vec, - .fno = gen_helper_gvec_uqadd_h, - .write_aofs = true, - .opt_opc = vecop_list, - .vece = MO_16 }, - { .fniv = gen_uqadd_vec, - .fno = gen_helper_gvec_uqadd_s, - .write_aofs = true, - .opt_opc = vecop_list, - .vece = MO_32 }, - { .fniv = gen_uqadd_vec, - .fno = gen_helper_gvec_uqadd_d, - .write_aofs = true, - .opt_opc = vecop_list, - .vece = MO_64 }, - }; - tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), - rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); -} - -static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, - TCGv_vec a, TCGv_vec b) -{ - TCGv_vec x = tcg_temp_new_vec_matching(t); - tcg_gen_add_vec(vece, x, a, b); - tcg_gen_ssadd_vec(vece, t, a, b); - tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); - tcg_gen_or_vec(vece, sat, sat, x); -} - -void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0 - }; - static const GVecGen4 ops[4] = { - { .fniv = gen_sqadd_vec, - .fno = gen_helper_gvec_sqadd_b, - .opt_opc = vecop_list, - .write_aofs = true, - .vece = MO_8 }, - { .fniv = gen_sqadd_vec, - .fno = gen_helper_gvec_sqadd_h, - .opt_opc = vecop_list, - .write_aofs = true, - .vece = MO_16 }, - { .fniv = gen_sqadd_vec, - .fno = gen_helper_gvec_sqadd_s, - .opt_opc = vecop_list, - .write_aofs = true, - .vece = MO_32 }, - { .fniv = gen_sqadd_vec, - .fno = gen_helper_gvec_sqadd_d, - .opt_opc = vecop_list, - .write_aofs = true, - .vece = MO_64 }, - }; - tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), - rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); -} - -static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, - TCGv_vec a, TCGv_vec b) -{ - TCGv_vec x = tcg_temp_new_vec_matching(t); - tcg_gen_sub_vec(vece, x, a, b); - tcg_gen_ussub_vec(vece, t, a, b); - tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); - tcg_gen_or_vec(vece, sat, sat, x); -} - -void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0 - }; - static const GVecGen4 ops[4] = { - { .fniv = gen_uqsub_vec, - .fno = gen_helper_gvec_uqsub_b, - .opt_opc = vecop_list, - .write_aofs = true, - .vece = MO_8 }, - { .fniv = gen_uqsub_vec, - .fno = gen_helper_gvec_uqsub_h, - .opt_opc = vecop_list, - .write_aofs = true, - .vece = MO_16 }, - { .fniv = gen_uqsub_vec, - .fno = gen_helper_gvec_uqsub_s, - .opt_opc = vecop_list, - .write_aofs = true, - .vece = MO_32 }, - { .fniv = gen_uqsub_vec, - .fno = gen_helper_gvec_uqsub_d, - .opt_opc = vecop_list, - .write_aofs = true, - .vece = MO_64 }, - }; - tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), - rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); -} - -static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat, - TCGv_vec a, TCGv_vec b) -{ - TCGv_vec x = tcg_temp_new_vec_matching(t); - tcg_gen_sub_vec(vece, x, a, b); - tcg_gen_sssub_vec(vece, t, a, b); - tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t); - tcg_gen_or_vec(vece, sat, sat, x); -} - -void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0 - }; - static const GVecGen4 ops[4] = { - { .fniv = gen_sqsub_vec, - .fno = gen_helper_gvec_sqsub_b, - .opt_opc = vecop_list, - .write_aofs = true, - .vece = MO_8 }, - { .fniv = gen_sqsub_vec, - .fno = gen_helper_gvec_sqsub_h, - .opt_opc = vecop_list, - .write_aofs = true, - .vece = MO_16 }, - { .fniv = gen_sqsub_vec, - .fno = gen_helper_gvec_sqsub_s, - .opt_opc = vecop_list, - .write_aofs = true, - .vece = MO_32 }, - { .fniv = gen_sqsub_vec, - .fno = gen_helper_gvec_sqsub_d, - .opt_opc = vecop_list, - .write_aofs = true, - .vece = MO_64 }, - }; - tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc), - rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); -} - -static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 t = tcg_temp_new_i32(); - - tcg_gen_sub_i32(t, a, b); - tcg_gen_sub_i32(d, b, a); - tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t); -} - -static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_sub_i64(t, a, b); - tcg_gen_sub_i64(d, b, a); - tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t); -} - -static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) -{ - TCGv_vec t = tcg_temp_new_vec_matching(d); - - tcg_gen_smin_vec(vece, t, a, b); - tcg_gen_smax_vec(vece, d, a, b); - tcg_gen_sub_vec(vece, d, d, t); -} - -void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0 - }; - static const GVecGen3 ops[4] = { - { .fniv = gen_sabd_vec, - .fno = gen_helper_gvec_sabd_b, - .opt_opc = vecop_list, - .vece = MO_8 }, - { .fniv = gen_sabd_vec, - .fno = gen_helper_gvec_sabd_h, - .opt_opc = vecop_list, - .vece = MO_16 }, - { .fni4 = gen_sabd_i32, - .fniv = gen_sabd_vec, - .fno = gen_helper_gvec_sabd_s, - .opt_opc = vecop_list, - .vece = MO_32 }, - { .fni8 = gen_sabd_i64, - .fniv = gen_sabd_vec, - .fno = gen_helper_gvec_sabd_d, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .opt_opc = vecop_list, - .vece = MO_64 }, - }; - tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); -} - -static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 t = tcg_temp_new_i32(); - - tcg_gen_sub_i32(t, a, b); - tcg_gen_sub_i32(d, b, a); - tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t); -} - -static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 t = tcg_temp_new_i64(); - - tcg_gen_sub_i64(t, a, b); - tcg_gen_sub_i64(d, b, a); - tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t); -} - -static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) -{ - TCGv_vec t = tcg_temp_new_vec_matching(d); - - tcg_gen_umin_vec(vece, t, a, b); - tcg_gen_umax_vec(vece, d, a, b); - tcg_gen_sub_vec(vece, d, d, t); -} - -void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0 - }; - static const GVecGen3 ops[4] = { - { .fniv = gen_uabd_vec, - .fno = gen_helper_gvec_uabd_b, - .opt_opc = vecop_list, - .vece = MO_8 }, - { .fniv = gen_uabd_vec, - .fno = gen_helper_gvec_uabd_h, - .opt_opc = vecop_list, - .vece = MO_16 }, - { .fni4 = gen_uabd_i32, - .fniv = gen_uabd_vec, - .fno = gen_helper_gvec_uabd_s, - .opt_opc = vecop_list, - .vece = MO_32 }, - { .fni8 = gen_uabd_i64, - .fniv = gen_uabd_vec, - .fno = gen_helper_gvec_uabd_d, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .opt_opc = vecop_list, - .vece = MO_64 }, - }; - tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); -} - -static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 t = tcg_temp_new_i32(); - gen_sabd_i32(t, a, b); - tcg_gen_add_i32(d, d, t); -} - -static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 t = tcg_temp_new_i64(); - gen_sabd_i64(t, a, b); - tcg_gen_add_i64(d, d, t); -} - -static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) -{ - TCGv_vec t = tcg_temp_new_vec_matching(d); - gen_sabd_vec(vece, t, a, b); - tcg_gen_add_vec(vece, d, d, t); -} - -void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_sub_vec, INDEX_op_add_vec, - INDEX_op_smin_vec, INDEX_op_smax_vec, 0 - }; - static const GVecGen3 ops[4] = { - { .fniv = gen_saba_vec, - .fno = gen_helper_gvec_saba_b, - .opt_opc = vecop_list, - .load_dest = true, - .vece = MO_8 }, - { .fniv = gen_saba_vec, - .fno = gen_helper_gvec_saba_h, - .opt_opc = vecop_list, - .load_dest = true, - .vece = MO_16 }, - { .fni4 = gen_saba_i32, - .fniv = gen_saba_vec, - .fno = gen_helper_gvec_saba_s, - .opt_opc = vecop_list, - .load_dest = true, - .vece = MO_32 }, - { .fni8 = gen_saba_i64, - .fniv = gen_saba_vec, - .fno = gen_helper_gvec_saba_d, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .opt_opc = vecop_list, - .load_dest = true, - .vece = MO_64 }, - }; - tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); -} - -static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - TCGv_i32 t = tcg_temp_new_i32(); - gen_uabd_i32(t, a, b); - tcg_gen_add_i32(d, d, t); -} - -static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) -{ - TCGv_i64 t = tcg_temp_new_i64(); - gen_uabd_i64(t, a, b); - tcg_gen_add_i64(d, d, t); -} - -static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) -{ - TCGv_vec t = tcg_temp_new_vec_matching(d); - gen_uabd_vec(vece, t, a, b); - tcg_gen_add_vec(vece, d, d, t); -} - -void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, - uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz) -{ - static const TCGOpcode vecop_list[] = { - INDEX_op_sub_vec, INDEX_op_add_vec, - INDEX_op_umin_vec, INDEX_op_umax_vec, 0 - }; - static const GVecGen3 ops[4] = { - { .fniv = gen_uaba_vec, - .fno = gen_helper_gvec_uaba_b, - .opt_opc = vecop_list, - .load_dest = true, - .vece = MO_8 }, - { .fniv = gen_uaba_vec, - .fno = gen_helper_gvec_uaba_h, - .opt_opc = vecop_list, - .load_dest = true, - .vece = MO_16 }, - { .fni4 = gen_uaba_i32, - .fniv = gen_uaba_vec, - .fno = gen_helper_gvec_uaba_s, - .opt_opc = vecop_list, - .load_dest = true, - .vece = MO_32 }, - { .fni8 = gen_uaba_i64, - .fniv = gen_uaba_vec, - .fno = gen_helper_gvec_uaba_d, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .opt_opc = vecop_list, - .load_dest = true, - .vece = MO_64 }, - }; - tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]); -} - static bool aa32_cpreg_encoding_in_impdef_space(uint8_t crn, uint8_t crm) { static const uint16_t mask[3] = { @@ -8766,12 +7177,12 @@ static bool trans_PLD(DisasContext *s, arg_PLD *a) return ENABLE_ARCH_5TE; } -static bool trans_PLDW(DisasContext *s, arg_PLD *a) +static bool trans_PLDW(DisasContext *s, arg_PLDW *a) { return arm_dc_feature(s, ARM_FEATURE_V7MP); } -static bool trans_PLI(DisasContext *s, arg_PLD *a) +static bool trans_PLI(DisasContext *s, arg_PLI *a) { return ENABLE_ARCH_7; } @@ -9663,22 +8074,12 @@ static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) } } -static void arm_tr_disas_log(const DisasContextBase *dcbase, - CPUState *cpu, FILE *logfile) -{ - DisasContext *dc = container_of(dcbase, DisasContext, base); - - fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first)); - target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size); -} - static const TranslatorOps arm_translator_ops = { .init_disas_context = arm_tr_init_disas_context, .tb_start = arm_tr_tb_start, .insn_start = arm_tr_insn_start, .translate_insn = arm_tr_translate_insn, .tb_stop = arm_tr_tb_stop, - .disas_log = arm_tr_disas_log, }; static const TranslatorOps thumb_translator_ops = { @@ -9687,7 +8088,6 @@ static const TranslatorOps thumb_translator_ops = { .insn_start = arm_tr_insn_start, .translate_insn = thumb_tr_translate_insn, .tb_stop = arm_tr_tb_stop, - .disas_log = arm_tr_disas_log, }; /* generate intermediate code for basic block 'tb'. */ diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h index dc66ff2190..aba21f730f 100644 --- a/target/arm/tcg/translate.h +++ b/target/arm/tcg/translate.h @@ -252,6 +252,11 @@ static inline int shl_12(DisasContext *s, int x) return x << 12; } +static inline int xor_2(DisasContext *s, int x) +{ + return x ^ 2; +} + static inline int neon_3same_fp_size(DisasContext *s, int x) { /* Convert 0==fp32, 1==fp16 into a MO_* value */ @@ -401,6 +406,36 @@ static inline void gen_swstep_exception(DisasContext *s, int isv, int ex) */ uint64_t vfp_expand_imm(int size, uint8_t imm8); +static inline void gen_vfp_absh(TCGv_i32 d, TCGv_i32 s) +{ + tcg_gen_andi_i32(d, s, INT16_MAX); +} + +static inline void gen_vfp_abss(TCGv_i32 d, TCGv_i32 s) +{ + tcg_gen_andi_i32(d, s, INT32_MAX); +} + +static inline void gen_vfp_absd(TCGv_i64 d, TCGv_i64 s) +{ + tcg_gen_andi_i64(d, s, INT64_MAX); +} + +static inline void gen_vfp_negh(TCGv_i32 d, TCGv_i32 s) +{ + tcg_gen_xori_i32(d, s, 1u << 15); +} + +static inline void gen_vfp_negs(TCGv_i32 d, TCGv_i32 s) +{ + tcg_gen_xori_i32(d, s, 1u << 31); +} + +static inline void gen_vfp_negd(TCGv_i64 d, TCGv_i64 s) +{ + tcg_gen_xori_i64(d, s, 1ull << 63); +} + /* Vector operations shared between ARM and AArch64. */ void gen_gvec_ceq0(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); @@ -424,6 +459,31 @@ void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_srshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_urshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_neon_sqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_neon_uqshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_neon_sqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_neon_uqrshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); + +void gen_gvec_shadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_uhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_shsub(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_uhsub(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_srhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_urhadd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); void gen_ushl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b); @@ -431,12 +491,27 @@ void gen_sshl_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b); void gen_ushl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); void gen_sshl_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); +void gen_uqadd_bhs(TCGv_i64 res, TCGv_i64 qc, + TCGv_i64 a, TCGv_i64 b, MemOp esz); +void gen_uqadd_d(TCGv_i64 d, TCGv_i64 q, TCGv_i64 a, TCGv_i64 b); void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); + +void gen_sqadd_bhs(TCGv_i64 res, TCGv_i64 qc, + TCGv_i64 a, TCGv_i64 b, MemOp esz); +void gen_sqadd_d(TCGv_i64 d, TCGv_i64 q, TCGv_i64 a, TCGv_i64 b); void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); + +void gen_uqsub_bhs(TCGv_i64 res, TCGv_i64 qc, + TCGv_i64 a, TCGv_i64 b, MemOp esz); +void gen_uqsub_d(TCGv_i64 d, TCGv_i64 q, TCGv_i64 a, TCGv_i64 b); void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); + +void gen_sqsub_bhs(TCGv_i64 res, TCGv_i64 qc, + TCGv_i64 a, TCGv_i64 b, MemOp esz); +void gen_sqsub_d(TCGv_i64 d, TCGv_i64 q, TCGv_i64 a, TCGv_i64 b); void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); @@ -445,6 +520,11 @@ void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, int64_t shift, uint32_t opr_sz, uint32_t max_sz); +void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh); +void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh); +void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh); +void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh); + void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, int64_t shift, uint32_t opr_sz, uint32_t max_sz); void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, @@ -459,6 +539,10 @@ void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, int64_t shift, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_sqdmulh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_sqrdmulh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, @@ -474,6 +558,17 @@ void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_addp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_smaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_sminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_umaxp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +void gen_gvec_uminp(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, + uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); + /* * Forward to the isar_feature_* tests given a DisasContext pointer. */ diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c index 1f93510b85..b05922b425 100644 --- a/target/arm/tcg/vec_helper.c +++ b/target/arm/tcg/vec_helper.c @@ -311,6 +311,38 @@ void HELPER(neon_sqrdmulh_h)(void *vd, void *vn, void *vm, clear_tail(d, opr_sz, simd_maxsz(desc)); } +void HELPER(neon_sqdmulh_idx_h)(void *vd, void *vn, void *vm, + void *vq, uint32_t desc) +{ + intptr_t i, j, opr_sz = simd_oprsz(desc); + int idx = simd_data(desc); + int16_t *d = vd, *n = vn, *m = (int16_t *)vm + H2(idx); + + for (i = 0; i < opr_sz / 2; i += 16 / 2) { + int16_t mm = m[i]; + for (j = 0; j < 16 / 2; ++j) { + d[i + j] = do_sqrdmlah_h(n[i + j], mm, 0, false, false, vq); + } + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +void HELPER(neon_sqrdmulh_idx_h)(void *vd, void *vn, void *vm, + void *vq, uint32_t desc) +{ + intptr_t i, j, opr_sz = simd_oprsz(desc); + int idx = simd_data(desc); + int16_t *d = vd, *n = vn, *m = (int16_t *)vm + H2(idx); + + for (i = 0; i < opr_sz / 2; i += 16 / 2) { + int16_t mm = m[i]; + for (j = 0; j < 16 / 2; ++j) { + d[i + j] = do_sqrdmlah_h(n[i + j], mm, 0, false, true, vq); + } + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + void HELPER(sve2_sqrdmlah_h)(void *vd, void *vn, void *vm, void *va, uint32_t desc) { @@ -474,6 +506,38 @@ void HELPER(neon_sqrdmulh_s)(void *vd, void *vn, void *vm, clear_tail(d, opr_sz, simd_maxsz(desc)); } +void HELPER(neon_sqdmulh_idx_s)(void *vd, void *vn, void *vm, + void *vq, uint32_t desc) +{ + intptr_t i, j, opr_sz = simd_oprsz(desc); + int idx = simd_data(desc); + int32_t *d = vd, *n = vn, *m = (int32_t *)vm + H4(idx); + + for (i = 0; i < opr_sz / 4; i += 16 / 4) { + int32_t mm = m[i]; + for (j = 0; j < 16 / 4; ++j) { + d[i + j] = do_sqrdmlah_s(n[i + j], mm, 0, false, false, vq); + } + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + +void HELPER(neon_sqrdmulh_idx_s)(void *vd, void *vn, void *vm, + void *vq, uint32_t desc) +{ + intptr_t i, j, opr_sz = simd_oprsz(desc); + int idx = simd_data(desc); + int32_t *d = vd, *n = vn, *m = (int32_t *)vm + H4(idx); + + for (i = 0; i < opr_sz / 4; i += 16 / 4) { + int32_t mm = m[i]; + for (j = 0; j < 16 / 4; ++j) { + d[i + j] = do_sqrdmlah_s(n[i + j], mm, 0, false, true, vq); + } + } + clear_tail(d, opr_sz, simd_maxsz(desc)); +} + void HELPER(sve2_sqrdmlah_s)(void *vd, void *vn, void *vm, void *va, uint32_t desc) { @@ -971,6 +1035,11 @@ static uint32_t float32_ceq(float32 op1, float32 op2, float_status *stat) return -float32_eq_quiet(op1, op2, stat); } +static uint64_t float64_ceq(float64 op1, float64 op2, float_status *stat) +{ + return -float64_eq_quiet(op1, op2, stat); +} + static uint16_t float16_cge(float16 op1, float16 op2, float_status *stat) { return -float16_le(op2, op1, stat); @@ -981,6 +1050,11 @@ static uint32_t float32_cge(float32 op1, float32 op2, float_status *stat) return -float32_le(op2, op1, stat); } +static uint64_t float64_cge(float64 op1, float64 op2, float_status *stat) +{ + return -float64_le(op2, op1, stat); +} + static uint16_t float16_cgt(float16 op1, float16 op2, float_status *stat) { return -float16_lt(op2, op1, stat); @@ -991,6 +1065,11 @@ static uint32_t float32_cgt(float32 op1, float32 op2, float_status *stat) return -float32_lt(op2, op1, stat); } +static uint64_t float64_cgt(float64 op1, float64 op2, float_status *stat) +{ + return -float64_lt(op2, op1, stat); +} + static uint16_t float16_acge(float16 op1, float16 op2, float_status *stat) { return -float16_le(float16_abs(op2), float16_abs(op1), stat); @@ -1001,6 +1080,11 @@ static uint32_t float32_acge(float32 op1, float32 op2, float_status *stat) return -float32_le(float32_abs(op2), float32_abs(op1), stat); } +static uint64_t float64_acge(float64 op1, float64 op2, float_status *stat) +{ + return -float64_le(float64_abs(op2), float64_abs(op1), stat); +} + static uint16_t float16_acgt(float16 op1, float16 op2, float_status *stat) { return -float16_lt(float16_abs(op2), float16_abs(op1), stat); @@ -1011,6 +1095,11 @@ static uint32_t float32_acgt(float32 op1, float32 op2, float_status *stat) return -float32_lt(float32_abs(op2), float32_abs(op1), stat); } +static uint64_t float64_acgt(float64 op1, float64 op2, float_status *stat) +{ + return -float64_lt(float64_abs(op2), float64_abs(op1), stat); +} + static int16_t vfp_tosszh(float16 x, void *fpstp) { float_status *fpst = fpstp; @@ -1129,6 +1218,11 @@ static float32 float32_abd(float32 op1, float32 op2, float_status *stat) return float32_abs(float32_sub(op1, op2, stat)); } +static float64 float64_abd(float64 op1, float64 op2, float_status *stat) +{ + return float64_abs(float64_sub(op1, op2, stat)); +} + /* * Reciprocal step. These are the AArch32 version which uses a * non-fused multiply-and-subtract. @@ -1213,33 +1307,43 @@ DO_3OP(gvec_ftsmul_d, float64_ftsmul, float64) DO_3OP(gvec_fabd_h, float16_abd, float16) DO_3OP(gvec_fabd_s, float32_abd, float32) +DO_3OP(gvec_fabd_d, float64_abd, float64) DO_3OP(gvec_fceq_h, float16_ceq, float16) DO_3OP(gvec_fceq_s, float32_ceq, float32) +DO_3OP(gvec_fceq_d, float64_ceq, float64) DO_3OP(gvec_fcge_h, float16_cge, float16) DO_3OP(gvec_fcge_s, float32_cge, float32) +DO_3OP(gvec_fcge_d, float64_cge, float64) DO_3OP(gvec_fcgt_h, float16_cgt, float16) DO_3OP(gvec_fcgt_s, float32_cgt, float32) +DO_3OP(gvec_fcgt_d, float64_cgt, float64) DO_3OP(gvec_facge_h, float16_acge, float16) DO_3OP(gvec_facge_s, float32_acge, float32) +DO_3OP(gvec_facge_d, float64_acge, float64) DO_3OP(gvec_facgt_h, float16_acgt, float16) DO_3OP(gvec_facgt_s, float32_acgt, float32) +DO_3OP(gvec_facgt_d, float64_acgt, float64) DO_3OP(gvec_fmax_h, float16_max, float16) DO_3OP(gvec_fmax_s, float32_max, float32) +DO_3OP(gvec_fmax_d, float64_max, float64) DO_3OP(gvec_fmin_h, float16_min, float16) DO_3OP(gvec_fmin_s, float32_min, float32) +DO_3OP(gvec_fmin_d, float64_min, float64) DO_3OP(gvec_fmaxnum_h, float16_maxnum, float16) DO_3OP(gvec_fmaxnum_s, float32_maxnum, float32) +DO_3OP(gvec_fmaxnum_d, float64_maxnum, float64) DO_3OP(gvec_fminnum_h, float16_minnum, float16) DO_3OP(gvec_fminnum_s, float32_minnum, float32) +DO_3OP(gvec_fminnum_d, float64_minnum, float64) DO_3OP(gvec_recps_nf_h, float16_recps_nf, float16) DO_3OP(gvec_recps_nf_s, float32_recps_nf, float32) @@ -1248,6 +1352,13 @@ DO_3OP(gvec_rsqrts_nf_h, float16_rsqrts_nf, float16) DO_3OP(gvec_rsqrts_nf_s, float32_rsqrts_nf, float32) #ifdef TARGET_AARCH64 +DO_3OP(gvec_fdiv_h, float16_div, float16) +DO_3OP(gvec_fdiv_s, float32_div, float32) +DO_3OP(gvec_fdiv_d, float64_div, float64) + +DO_3OP(gvec_fmulx_h, helper_advsimd_mulxh, float16) +DO_3OP(gvec_fmulx_s, helper_vfp_mulxs, float32) +DO_3OP(gvec_fmulx_d, helper_vfp_mulxd, float64) DO_3OP(gvec_recps_h, helper_recpsf_f16, float16) DO_3OP(gvec_recps_s, helper_recpsf_f32, float32) @@ -1298,6 +1409,12 @@ static float32 float32_muladd_f(float32 dest, float32 op1, float32 op2, return float32_muladd(op1, op2, dest, 0, stat); } +static float64 float64_muladd_f(float64 dest, float64 op1, float64 op2, + float_status *stat) +{ + return float64_muladd(op1, op2, dest, 0, stat); +} + static float16 float16_mulsub_f(float16 dest, float16 op1, float16 op2, float_status *stat) { @@ -1310,6 +1427,12 @@ static float32 float32_mulsub_f(float32 dest, float32 op1, float32 op2, return float32_muladd(float32_chs(op1), op2, dest, 0, stat); } +static float64 float64_mulsub_f(float64 dest, float64 op1, float64 op2, + float_status *stat) +{ + return float64_muladd(float64_chs(op1), op2, dest, 0, stat); +} + #define DO_MULADD(NAME, FUNC, TYPE) \ void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \ { \ @@ -1329,9 +1452,11 @@ DO_MULADD(gvec_fmls_s, float32_mulsub_nf, float32) DO_MULADD(gvec_vfma_h, float16_muladd_f, float16) DO_MULADD(gvec_vfma_s, float32_muladd_f, float32) +DO_MULADD(gvec_vfma_d, float64_muladd_f, float64) DO_MULADD(gvec_vfms_h, float16_mulsub_f, float16) DO_MULADD(gvec_vfms_s, float32_mulsub_f, float32) +DO_MULADD(gvec_vfms_d, float64_mulsub_f, float64) /* For the indexed ops, SVE applies the index per 128-bit vector segment. * For AdvSIMD, there is of course only one such vector segment. @@ -1385,7 +1510,7 @@ DO_MLA_IDX(gvec_mls_idx_d, uint64_t, -, H8) #undef DO_MLA_IDX -#define DO_FMUL_IDX(NAME, ADD, TYPE, H) \ +#define DO_FMUL_IDX(NAME, ADD, MUL, TYPE, H) \ void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \ { \ intptr_t i, j, oprsz = simd_oprsz(desc); \ @@ -1395,33 +1520,37 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \ for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \ TYPE mm = m[H(i + idx)]; \ for (j = 0; j < segment; j++) { \ - d[i + j] = TYPE##_##ADD(d[i + j], \ - TYPE##_mul(n[i + j], mm, stat), stat); \ + d[i + j] = ADD(d[i + j], MUL(n[i + j], mm, stat), stat); \ } \ } \ clear_tail(d, oprsz, simd_maxsz(desc)); \ } -#define float16_nop(N, M, S) (M) -#define float32_nop(N, M, S) (M) -#define float64_nop(N, M, S) (M) +#define nop(N, M, S) (M) -DO_FMUL_IDX(gvec_fmul_idx_h, nop, float16, H2) -DO_FMUL_IDX(gvec_fmul_idx_s, nop, float32, H4) -DO_FMUL_IDX(gvec_fmul_idx_d, nop, float64, H8) +DO_FMUL_IDX(gvec_fmul_idx_h, nop, float16_mul, float16, H2) +DO_FMUL_IDX(gvec_fmul_idx_s, nop, float32_mul, float32, H4) +DO_FMUL_IDX(gvec_fmul_idx_d, nop, float64_mul, float64, H8) + +#ifdef TARGET_AARCH64 + +DO_FMUL_IDX(gvec_fmulx_idx_h, nop, helper_advsimd_mulxh, float16, H2) +DO_FMUL_IDX(gvec_fmulx_idx_s, nop, helper_vfp_mulxs, float32, H4) +DO_FMUL_IDX(gvec_fmulx_idx_d, nop, helper_vfp_mulxd, float64, H8) + +#endif + +#undef nop /* * Non-fused multiply-accumulate operations, for Neon. NB that unlike * the fused ops below they assume accumulate both from and into Vd. */ -DO_FMUL_IDX(gvec_fmla_nf_idx_h, add, float16, H2) -DO_FMUL_IDX(gvec_fmla_nf_idx_s, add, float32, H4) -DO_FMUL_IDX(gvec_fmls_nf_idx_h, sub, float16, H2) -DO_FMUL_IDX(gvec_fmls_nf_idx_s, sub, float32, H4) - -#undef float16_nop -#undef float32_nop -#undef float64_nop +DO_FMUL_IDX(gvec_fmla_nf_idx_h, float16_add, float16_mul, float16, H2) +DO_FMUL_IDX(gvec_fmla_nf_idx_s, float32_add, float32_mul, float32, H4) +DO_FMUL_IDX(gvec_fmls_nf_idx_h, float16_sub, float16_mul, float16, H2) +DO_FMUL_IDX(gvec_fmls_nf_idx_s, float32_sub, float32_mul, float32, H4) + #undef DO_FMUL_IDX #define DO_FMLA_IDX(NAME, TYPE, H) \ @@ -1490,6 +1619,14 @@ DO_SAT(gvec_sqsub_b, int, int8_t, int8_t, -, INT8_MIN, INT8_MAX) DO_SAT(gvec_sqsub_h, int, int16_t, int16_t, -, INT16_MIN, INT16_MAX) DO_SAT(gvec_sqsub_s, int64_t, int32_t, int32_t, -, INT32_MIN, INT32_MAX) +DO_SAT(gvec_usqadd_b, int, uint8_t, int8_t, +, 0, UINT8_MAX) +DO_SAT(gvec_usqadd_h, int, uint16_t, int16_t, +, 0, UINT16_MAX) +DO_SAT(gvec_usqadd_s, int64_t, uint32_t, int32_t, +, 0, UINT32_MAX) + +DO_SAT(gvec_suqadd_b, int, int8_t, uint8_t, +, INT8_MIN, INT8_MAX) +DO_SAT(gvec_suqadd_h, int, int16_t, uint16_t, +, INT16_MIN, INT16_MAX) +DO_SAT(gvec_suqadd_s, int64_t, int32_t, uint32_t, +, INT32_MIN, INT32_MAX) + #undef DO_SAT void HELPER(gvec_uqadd_d)(void *vd, void *vq, void *vn, @@ -1580,6 +1717,62 @@ void HELPER(gvec_sqsub_d)(void *vd, void *vq, void *vn, clear_tail(d, oprsz, simd_maxsz(desc)); } +void HELPER(gvec_usqadd_d)(void *vd, void *vq, void *vn, + void *vm, uint32_t desc) +{ + intptr_t i, oprsz = simd_oprsz(desc); + uint64_t *d = vd, *n = vn, *m = vm; + bool q = false; + + for (i = 0; i < oprsz / 8; i++) { + uint64_t nn = n[i]; + int64_t mm = m[i]; + uint64_t dd = nn + mm; + + if (mm < 0) { + if (nn < (uint64_t)-mm) { + dd = 0; + q = true; + } + } else { + if (dd < nn) { + dd = UINT64_MAX; + q = true; + } + } + d[i] = dd; + } + if (q) { + uint32_t *qc = vq; + qc[0] = 1; + } + clear_tail(d, oprsz, simd_maxsz(desc)); +} + +void HELPER(gvec_suqadd_d)(void *vd, void *vq, void *vn, + void *vm, uint32_t desc) +{ + intptr_t i, oprsz = simd_oprsz(desc); + uint64_t *d = vd, *n = vn, *m = vm; + bool q = false; + + for (i = 0; i < oprsz / 8; i++) { + int64_t nn = n[i]; + uint64_t mm = m[i]; + int64_t dd = nn + mm; + + if (mm > (uint64_t)(INT64_MAX - nn)) { + dd = INT64_MAX; + q = true; + } + d[i] = dd; + } + if (q) { + uint32_t *qc = vq; + qc[0] = 1; + } + clear_tail(d, oprsz, simd_maxsz(desc)); +} #define DO_SRA(NAME, TYPE) \ void HELPER(NAME)(void *vd, void *vn, uint32_t desc) \ @@ -2127,50 +2320,90 @@ DO_ABA(gvec_uaba_d, uint64_t) #undef DO_ABA -#define DO_NEON_PAIRWISE(NAME, OP) \ - void HELPER(NAME##s)(void *vd, void *vn, void *vm, \ - void *stat, uint32_t oprsz) \ - { \ - float_status *fpst = stat; \ - float32 *d = vd; \ - float32 *n = vn; \ - float32 *m = vm; \ - float32 r0, r1; \ - \ - /* Read all inputs before writing outputs in case vm == vd */ \ - r0 = float32_##OP(n[H4(0)], n[H4(1)], fpst); \ - r1 = float32_##OP(m[H4(0)], m[H4(1)], fpst); \ - \ - d[H4(0)] = r0; \ - d[H4(1)] = r1; \ - } \ - \ - void HELPER(NAME##h)(void *vd, void *vn, void *vm, \ - void *stat, uint32_t oprsz) \ - { \ - float_status *fpst = stat; \ - float16 *d = vd; \ - float16 *n = vn; \ - float16 *m = vm; \ - float16 r0, r1, r2, r3; \ - \ - /* Read all inputs before writing outputs in case vm == vd */ \ - r0 = float16_##OP(n[H2(0)], n[H2(1)], fpst); \ - r1 = float16_##OP(n[H2(2)], n[H2(3)], fpst); \ - r2 = float16_##OP(m[H2(0)], m[H2(1)], fpst); \ - r3 = float16_##OP(m[H2(2)], m[H2(3)], fpst); \ - \ - d[H2(0)] = r0; \ - d[H2(1)] = r1; \ - d[H2(2)] = r2; \ - d[H2(3)] = r3; \ - } - -DO_NEON_PAIRWISE(neon_padd, add) -DO_NEON_PAIRWISE(neon_pmax, max) -DO_NEON_PAIRWISE(neon_pmin, min) - -#undef DO_NEON_PAIRWISE +#define DO_3OP_PAIR(NAME, FUNC, TYPE, H) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \ +{ \ + ARMVectorReg scratch; \ + intptr_t oprsz = simd_oprsz(desc); \ + intptr_t half = oprsz / sizeof(TYPE) / 2; \ + TYPE *d = vd, *n = vn, *m = vm; \ + if (unlikely(d == m)) { \ + m = memcpy(&scratch, m, oprsz); \ + } \ + for (intptr_t i = 0; i < half; ++i) { \ + d[H(i)] = FUNC(n[H(i * 2)], n[H(i * 2 + 1)], stat); \ + } \ + for (intptr_t i = 0; i < half; ++i) { \ + d[H(i + half)] = FUNC(m[H(i * 2)], m[H(i * 2 + 1)], stat); \ + } \ + clear_tail(d, oprsz, simd_maxsz(desc)); \ +} + +DO_3OP_PAIR(gvec_faddp_h, float16_add, float16, H2) +DO_3OP_PAIR(gvec_faddp_s, float32_add, float32, H4) +DO_3OP_PAIR(gvec_faddp_d, float64_add, float64, ) + +DO_3OP_PAIR(gvec_fmaxp_h, float16_max, float16, H2) +DO_3OP_PAIR(gvec_fmaxp_s, float32_max, float32, H4) +DO_3OP_PAIR(gvec_fmaxp_d, float64_max, float64, ) + +DO_3OP_PAIR(gvec_fminp_h, float16_min, float16, H2) +DO_3OP_PAIR(gvec_fminp_s, float32_min, float32, H4) +DO_3OP_PAIR(gvec_fminp_d, float64_min, float64, ) + +DO_3OP_PAIR(gvec_fmaxnump_h, float16_maxnum, float16, H2) +DO_3OP_PAIR(gvec_fmaxnump_s, float32_maxnum, float32, H4) +DO_3OP_PAIR(gvec_fmaxnump_d, float64_maxnum, float64, ) + +DO_3OP_PAIR(gvec_fminnump_h, float16_minnum, float16, H2) +DO_3OP_PAIR(gvec_fminnump_s, float32_minnum, float32, H4) +DO_3OP_PAIR(gvec_fminnump_d, float64_minnum, float64, ) + +#undef DO_3OP_PAIR + +#define DO_3OP_PAIR(NAME, FUNC, TYPE, H) \ +void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \ +{ \ + ARMVectorReg scratch; \ + intptr_t oprsz = simd_oprsz(desc); \ + intptr_t half = oprsz / sizeof(TYPE) / 2; \ + TYPE *d = vd, *n = vn, *m = vm; \ + if (unlikely(d == m)) { \ + m = memcpy(&scratch, m, oprsz); \ + } \ + for (intptr_t i = 0; i < half; ++i) { \ + d[H(i)] = FUNC(n[H(i * 2)], n[H(i * 2 + 1)]); \ + } \ + for (intptr_t i = 0; i < half; ++i) { \ + d[H(i + half)] = FUNC(m[H(i * 2)], m[H(i * 2 + 1)]); \ + } \ + clear_tail(d, oprsz, simd_maxsz(desc)); \ +} + +#define ADD(A, B) (A + B) +DO_3OP_PAIR(gvec_addp_b, ADD, uint8_t, H1) +DO_3OP_PAIR(gvec_addp_h, ADD, uint16_t, H2) +DO_3OP_PAIR(gvec_addp_s, ADD, uint32_t, H4) +DO_3OP_PAIR(gvec_addp_d, ADD, uint64_t, ) +#undef ADD + +DO_3OP_PAIR(gvec_smaxp_b, MAX, int8_t, H1) +DO_3OP_PAIR(gvec_smaxp_h, MAX, int16_t, H2) +DO_3OP_PAIR(gvec_smaxp_s, MAX, int32_t, H4) + +DO_3OP_PAIR(gvec_umaxp_b, MAX, uint8_t, H1) +DO_3OP_PAIR(gvec_umaxp_h, MAX, uint16_t, H2) +DO_3OP_PAIR(gvec_umaxp_s, MAX, uint32_t, H4) + +DO_3OP_PAIR(gvec_sminp_b, MIN, int8_t, H1) +DO_3OP_PAIR(gvec_sminp_h, MIN, int16_t, H2) +DO_3OP_PAIR(gvec_sminp_s, MIN, int32_t, H4) + +DO_3OP_PAIR(gvec_uminp_b, MIN, uint8_t, H1) +DO_3OP_PAIR(gvec_uminp_h, MIN, uint16_t, H2) +DO_3OP_PAIR(gvec_uminp_s, MIN, uint32_t, H4) + +#undef DO_3OP_PAIR #define DO_VCVT_FIXED(NAME, FUNC, TYPE) \ void HELPER(NAME)(void *vd, void *vn, void *stat, uint32_t desc) \ diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c index 3e5e37abbe..ce26b8a71a 100644 --- a/target/arm/vfp_helper.c +++ b/target/arm/vfp_helper.c @@ -281,36 +281,6 @@ VFP_BINOP(minnum) VFP_BINOP(maxnum) #undef VFP_BINOP -dh_ctype_f16 VFP_HELPER(neg, h)(dh_ctype_f16 a) -{ - return float16_chs(a); -} - -float32 VFP_HELPER(neg, s)(float32 a) -{ - return float32_chs(a); -} - -float64 VFP_HELPER(neg, d)(float64 a) -{ - return float64_chs(a); -} - -dh_ctype_f16 VFP_HELPER(abs, h)(dh_ctype_f16 a) -{ - return float16_abs(a); -} - -float32 VFP_HELPER(abs, s)(float32 a) -{ - return float32_abs(a); -} - -float64 VFP_HELPER(abs, d)(float64 a) -{ - return float64_abs(a); -} - dh_ctype_f16 VFP_HELPER(sqrt, h)(dh_ctype_f16 a, CPUARMState *env) { return float16_sqrt(a, &env->vfp.fp_status_f16); diff --git a/target/avr/cpu-param.h b/target/avr/cpu-param.h index 9a92bc74fc..93c2f470d0 100644 --- a/target/avr/cpu-param.h +++ b/target/avr/cpu-param.h @@ -32,4 +32,6 @@ #define TARGET_PHYS_ADDR_SPACE_BITS 24 #define TARGET_VIRT_ADDR_SPACE_BITS 24 +#define TCG_GUEST_DEFAULT_MO 0 + #endif diff --git a/target/avr/cpu.c b/target/avr/cpu.c index 71ce62a4c2..f53e1192b1 100644 --- a/target/avr/cpu.c +++ b/target/avr/cpu.c @@ -55,7 +55,7 @@ static int avr_cpu_mmu_index(CPUState *cs, bool ifetch) static void avr_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb) { - tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL)); + tcg_debug_assert(!tcg_cflags_has(cs, CF_PCREL)); cpu_env(cs)->pc_w = tb->pc / 2; /* internally PC points to words */ } diff --git a/target/avr/cpu.h b/target/avr/cpu.h index d185d20dcb..4725535102 100644 --- a/target/avr/cpu.h +++ b/target/avr/cpu.h @@ -30,8 +30,6 @@ #define CPU_RESOLVING_TYPE TYPE_AVR_CPU -#define TCG_GUEST_DEFAULT_MO 0 - /* * AVR has two memory spaces, data & code. * e.g. both have 0 address diff --git a/target/avr/gdbstub.c b/target/avr/gdbstub.c index 2eeee2bf4e..d6d3c1479b 100644 --- a/target/avr/gdbstub.c +++ b/target/avr/gdbstub.c @@ -20,6 +20,7 @@ #include "qemu/osdep.h" #include "gdbstub/helpers.h" +#include "cpu.h" int avr_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n) { diff --git a/target/avr/helper.c b/target/avr/helper.c index eeca415c43..345708a1b3 100644 --- a/target/avr/helper.c +++ b/target/avr/helper.c @@ -24,6 +24,7 @@ #include "cpu.h" #include "hw/core/tcg-cpu-ops.h" #include "exec/exec-all.h" +#include "exec/page-protection.h" #include "exec/cpu_ldst.h" #include "exec/address-spaces.h" #include "exec/helper-proto.h" diff --git a/target/avr/translate.c b/target/avr/translate.c index 87e2bd5ef1..2d51892115 100644 --- a/target/avr/translate.c +++ b/target/avr/translate.c @@ -24,7 +24,6 @@ #include "cpu.h" #include "exec/exec-all.h" #include "tcg/tcg-op.h" -#include "exec/cpu_ldst.h" #include "exec/helper-proto.h" #include "exec/helper-gen.h" #include "exec/log.h" @@ -173,7 +172,7 @@ static int to_regs_00_30_by_two(DisasContext *ctx, int indx) static uint16_t next_word(DisasContext *ctx) { - return cpu_lduw_code(ctx->env, ctx->npc++ * 2); + return translator_lduw(ctx->env, &ctx->base, ctx->npc++ * 2); } static int append_16(DisasContext *ctx, int x) @@ -2787,20 +2786,12 @@ static void avr_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs) } } -static void avr_tr_disas_log(const DisasContextBase *dcbase, - CPUState *cs, FILE *logfile) -{ - fprintf(logfile, "IN: %s\n", lookup_symbol(dcbase->pc_first)); - target_disas(logfile, cs, dcbase->pc_first, dcbase->tb->size); -} - static const TranslatorOps avr_tr_ops = { .init_disas_context = avr_tr_init_disas_context, .tb_start = avr_tr_tb_start, .insn_start = avr_tr_insn_start, .translate_insn = avr_tr_translate_insn, .tb_stop = avr_tr_tb_stop, - .disas_log = avr_tr_disas_log, }; void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns, diff --git a/target/cris/mmu.c b/target/cris/mmu.c index b574ec6e5b..d51008c541 100644 --- a/target/cris/mmu.c +++ b/target/cris/mmu.c @@ -21,6 +21,7 @@ #include "qemu/osdep.h" #include "cpu.h" #include "exec/exec-all.h" +#include "exec/page-protection.h" #include "mmu.h" #ifdef DEBUG @@ -333,7 +334,7 @@ int cris_mmu_translate(struct cris_mmu_result *res, if (!cris_mmu_enabled(env->sregs[SFR_RW_GC_CFG])) { res->phy = vaddr; - res->prot = PAGE_BITS; + res->prot = PAGE_RWX; goto done; } @@ -344,7 +345,7 @@ int cris_mmu_translate(struct cris_mmu_result *res, miss = 0; base = cris_mmu_translate_seg(env, seg); res->phy = base | (0x0fffffff & vaddr); - res->prot = PAGE_BITS; + res->prot = PAGE_RWX; } else { miss = cris_mmu_translate_page(res, env, vaddr, access_type, is_user, debug); diff --git a/target/cris/translate.c b/target/cris/translate.c index b3a4d61d0a..a30c67eb07 100644 --- a/target/cris/translate.c +++ b/target/cris/translate.c @@ -25,12 +25,10 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "disas/disas.h" #include "exec/exec-all.h" #include "tcg/tcg-op.h" #include "exec/helper-proto.h" #include "mmu.h" -#include "exec/cpu_ldst.h" #include "exec/translator.h" #include "crisv32-decode.h" #include "qemu/qemu-print.h" @@ -223,37 +221,28 @@ static int sign_extend(unsigned int val, unsigned int width) } static int cris_fetch(CPUCRISState *env, DisasContext *dc, uint32_t addr, - unsigned int size, unsigned int sign) + unsigned int size, bool sign) { int r; switch (size) { case 4: - { - r = cpu_ldl_code(env, addr); + r = translator_ldl(env, &dc->base, addr); break; - } case 2: - { + r = translator_lduw(env, &dc->base, addr); if (sign) { - r = cpu_ldsw_code(env, addr); - } else { - r = cpu_lduw_code(env, addr); + r = (int16_t)r; } break; - } case 1: - { + r = translator_ldub(env, &dc->base, addr); if (sign) { - r = cpu_ldsb_code(env, addr); - } else { - r = cpu_ldub_code(env, addr); + r = (int8_t)r; } break; - } default: - cpu_abort(CPU(dc->cpu), "Invalid fetch size %d\n", size); - break; + g_assert_not_reached(); } return r; } @@ -2869,7 +2858,7 @@ static unsigned int crisv32_decoder(CPUCRISState *env, DisasContext *dc) int i; /* Load a halfword onto the instruction register. */ - dc->ir = cris_fetch(env, dc, dc->pc, 2, 0); + dc->ir = cris_fetch(env, dc, dc->pc, 2, 0); /* Now decode it. */ dc->opcode = EXTRACT_FIELD(dc->ir, 4, 11); @@ -3148,22 +3137,12 @@ static void cris_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) } } -static void cris_tr_disas_log(const DisasContextBase *dcbase, - CPUState *cpu, FILE *logfile) -{ - if (!DISAS_CRIS) { - fprintf(logfile, "IN: %s\n", lookup_symbol(dcbase->pc_first)); - target_disas(logfile, cpu, dcbase->pc_first, dcbase->tb->size); - } -} - static const TranslatorOps cris_tr_ops = { .init_disas_context = cris_tr_init_disas_context, .tb_start = cris_tr_tb_start, .insn_start = cris_tr_insn_start, .translate_insn = cris_tr_translate_insn, .tb_stop = cris_tr_tb_stop, - .disas_log = cris_tr_disas_log, }; void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns, diff --git a/target/cris/translate_v10.c.inc b/target/cris/translate_v10.c.inc index 73fc27c15d..c15ff47505 100644 --- a/target/cris/translate_v10.c.inc +++ b/target/cris/translate_v10.c.inc @@ -165,20 +165,7 @@ static int dec10_prep_move_m(CPUCRISState *env, DisasContext *dc, /* Load [$rs] onto T1. */ if (is_imm) { - if (memsize != 4) { - if (s_ext) { - if (memsize == 1) - imm = cpu_ldsb_code(env, dc->pc + 2); - else - imm = cpu_ldsw_code(env, dc->pc + 2); - } else { - if (memsize == 1) - imm = cpu_ldub_code(env, dc->pc + 2); - else - imm = cpu_lduw_code(env, dc->pc + 2); - } - } else - imm = cpu_ldl_code(env, dc->pc + 2); + imm = cris_fetch(env, dc, dc->pc + 2, memsize, s_ext); tcg_gen_movi_tl(dst, imm); @@ -929,10 +916,11 @@ static int dec10_dip(CPUCRISState *env, DisasContext *dc) LOG_DIS("dip pc=%x opcode=%d r%d r%d\n", dc->pc, dc->opcode, dc->src, dc->dst); if (dc->src == 15) { - imm = cpu_ldl_code(env, dc->pc + 2); + imm = cris_fetch(env, dc, dc->pc + 2, 4, 0); tcg_gen_movi_tl(cpu_PR[PR_PREFIX], imm); - if (dc->postinc) + if (dc->postinc) { insn_len += 4; + } tcg_gen_addi_tl(cpu_R[15], cpu_R[15], insn_len - 2); } else { gen_load(dc, cpu_PR[PR_PREFIX], cpu_R[dc->src], 4, 0); @@ -1095,10 +1083,10 @@ static unsigned int dec10_ind(CPUCRISState *env, DisasContext *dc) if (dc->src == 15) { LOG_DIS("jump.%d %d r%d r%d direct\n", size, dc->opcode, dc->src, dc->dst); - imm = cpu_ldl_code(env, dc->pc + 2); - if (dc->mode == CRISV10_MODE_AUTOINC) + imm = cris_fetch(env, dc, dc->pc + 2, size, 0); + if (dc->mode == CRISV10_MODE_AUTOINC) { insn_len += size; - + } c = tcg_constant_tl(dc->pc + insn_len); t_gen_mov_preg_TN(dc, dc->dst, c); dc->jmp_pc = imm; @@ -1164,7 +1152,7 @@ static unsigned int dec10_ind(CPUCRISState *env, DisasContext *dc) case CRISV10_IND_BCC_M: cris_cc_mask(dc, 0); - simm = cpu_ldsw_code(env, dc->pc + 2); + simm = cris_fetch(env, dc, dc->pc + 2, 2, 1); simm += 4; LOG_DIS("bcc_m: b%s %x\n", cc_name(dc->cond), dc->pc + simm); @@ -1185,7 +1173,7 @@ static unsigned int crisv10_decoder(CPUCRISState *env, DisasContext *dc) unsigned int insn_len = 2; /* Load a halfword onto the instruction register. */ - dc->ir = cpu_lduw_code(env, dc->pc); + dc->ir = cris_fetch(env, dc, dc->pc, 2, 0); /* Now decode it. */ dc->opcode = EXTRACT_FIELD(dc->ir, 6, 9); diff --git a/target/hexagon/README b/target/hexagon/README index 746ebec378..7ffd517d70 100644 --- a/target/hexagon/README +++ b/target/hexagon/README @@ -43,11 +43,9 @@ target/hexagon/gen_semantics.c. This step produces That file is consumed by the following python scripts to produce the indicated header files in <BUILD_DIR>/target/hexagon gen_opcodes_def.py -> opcodes_def_generated.h.inc - gen_op_regs.py -> op_regs_generated.h.inc gen_printinsn.py -> printinsn_generated.h.inc gen_op_attribs.py -> op_attribs_generated.h.inc gen_helper_protos.py -> helper_protos_generated.h.inc - gen_shortcode.py -> shortcode_generated.h.inc gen_tcg_funcs.py -> tcg_funcs_generated.c.inc gen_tcg_func_table.py -> tcg_func_table_generated.c.inc gen_helper_funcs.py -> helper_funcs_generated.c.inc @@ -183,10 +181,11 @@ when the override is present. } We also generate an analyze_<tag> function for each instruction. Currently, -these functions record the writes to registers by calling ctx_log_*. During -gen_start_packet, we invoke the analyze_<tag> function for each instruction in -the packet, and we mark the implicit writes. After the analysis is performed, -we initialize the result register for each of the predicated assignments. +these functions record the reads and writes to registers by calling ctx_log_*. +During gen_start_packet, we invoke the analyze_<tag> function for each instruction in +the packet, and we mark the implicit writes. The analysis determines if the packet +semantics can be short-circuited. If not, we initialize the result register for each +of the predicated assignments. In addition to instruction semantics, we use a generator to create the decode tree. This generation is a four step process. diff --git a/target/hexagon/attribs_def.h.inc b/target/hexagon/attribs_def.h.inc index 87942d46f4..9e3a05f882 100644 --- a/target/hexagon/attribs_def.h.inc +++ b/target/hexagon/attribs_def.h.inc @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -117,6 +117,7 @@ DEF_ATTRIB(IMPLICIT_READS_P1, "Reads the P1 register", "", "") DEF_ATTRIB(IMPLICIT_READS_P2, "Reads the P2 register", "", "") DEF_ATTRIB(IMPLICIT_READS_P3, "Reads the P3 register", "", "") DEF_ATTRIB(IMPLICIT_WRITES_USR, "May write USR", "", "") +DEF_ATTRIB(IMPLICIT_READS_SP, "Reads the SP register", "", "") DEF_ATTRIB(COMMUTES, "The operation is communitive", "", "") DEF_ATTRIB(DEALLOCRET, "dealloc_return", "", "") DEF_ATTRIB(DEALLOCFRAME, "deallocframe", "", "") diff --git a/target/hexagon/cpu.c b/target/hexagon/cpu.c index a56bb4b075..64cc05cca7 100644 --- a/target/hexagon/cpu.c +++ b/target/hexagon/cpu.c @@ -257,7 +257,7 @@ static vaddr hexagon_cpu_get_pc(CPUState *cs) static void hexagon_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb) { - tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL)); + tcg_debug_assert(!tcg_cflags_has(cs, CF_PCREL)); cpu_env(cs)->gpr[HEX_REG_PC] = tb->pc; } diff --git a/target/hexagon/decode.c b/target/hexagon/decode.c index a40210ca1e..23deba2426 100644 --- a/target/hexagon/decode.c +++ b/target/hexagon/decode.c @@ -115,22 +115,13 @@ static void decode_fill_newvalue_regno(Packet *packet) { int i, use_regidx, offset, def_idx, dst_idx; - uint16_t def_opcode, use_opcode; - char *dststr; for (i = 1; i < packet->num_insns; i++) { if (GET_ATTRIB(packet->insn[i].opcode, A_DOTNEWVALUE) && !GET_ATTRIB(packet->insn[i].opcode, A_EXTENSION)) { - use_opcode = packet->insn[i].opcode; - - /* It's a store, so we're adjusting the Nt field */ - if (GET_ATTRIB(use_opcode, A_STORE)) { - use_regidx = strchr(opcode_reginfo[use_opcode], 't') - - opcode_reginfo[use_opcode]; - } else { /* It's a Jump, so we're adjusting the Ns field */ - use_regidx = strchr(opcode_reginfo[use_opcode], 's') - - opcode_reginfo[use_opcode]; - } + + g_assert(packet->insn[i].new_read_idx != -1); + use_regidx = packet->insn[i].new_read_idx; /* * What's encoded at the N-field is the offset to who's producing @@ -151,37 +142,9 @@ decode_fill_newvalue_regno(Packet *packet) */ g_assert(!((def_idx < 0) || (def_idx > (packet->num_insns - 1)))); - /* - * packet->insn[def_idx] is the producer - * Figure out which type of destination it produces - * and the corresponding index in the reginfo - */ - def_opcode = packet->insn[def_idx].opcode; - dststr = strstr(opcode_wregs[def_opcode], "Rd"); - if (dststr) { - dststr = strchr(opcode_reginfo[def_opcode], 'd'); - } else { - dststr = strstr(opcode_wregs[def_opcode], "Rx"); - if (dststr) { - dststr = strchr(opcode_reginfo[def_opcode], 'x'); - } else { - dststr = strstr(opcode_wregs[def_opcode], "Re"); - if (dststr) { - dststr = strchr(opcode_reginfo[def_opcode], 'e'); - } else { - dststr = strstr(opcode_wregs[def_opcode], "Ry"); - if (dststr) { - dststr = strchr(opcode_reginfo[def_opcode], 'y'); - } else { - g_assert_not_reached(); - } - } - } - } - g_assert(dststr != NULL); - /* Now patch up the consumer with the register number */ - dst_idx = dststr - opcode_reginfo[def_opcode]; + g_assert(packet->insn[def_idx].dest_idx != -1); + dst_idx = packet->insn[def_idx].dest_idx; packet->insn[i].regno[use_regidx] = packet->insn[def_idx].regno[dst_idx]; /* @@ -362,8 +325,7 @@ static void decode_shuffle_for_execution(Packet *packet) for (flag = false, i = 0; i < last_insn + 1; i++) { int opcode = packet->insn[i].opcode; - if ((strstr(opcode_wregs[opcode], "Pd4") || - strstr(opcode_wregs[opcode], "Pe4")) && + if (packet->insn[i].has_pred_dest && GET_ATTRIB(opcode, A_STORE) == 0) { /* This should be a compare (not a store conditional) */ if (flag) { diff --git a/target/hexagon/gen_analyze_funcs.py b/target/hexagon/gen_analyze_funcs.py index a9af666cef..54bac19724 100755 --- a/target/hexagon/gen_analyze_funcs.py +++ b/target/hexagon/gen_analyze_funcs.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 ## -## Copyright(c) 2022-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. +## Copyright(c) 2022-2024 Qualcomm Innovation Center, Inc. All Rights Reserved. ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by @@ -43,59 +43,53 @@ def gen_analyze_func(f, tag, regs, imms): f.write("{\n") f.write(" Insn *insn G_GNUC_UNUSED = ctx->insn;\n") - - i = 0 - ## Analyze all the registers - for regtype, regid in regs: - reg = hex_common.get_register(tag, regtype, regid) - if reg.is_written(): - reg.analyze_write(f, tag, i) + if (hex_common.is_hvx_insn(tag)): + if hex_common.has_hvx_helper(tag): + f.write( + " const bool G_GNUC_UNUSED insn_has_hvx_helper = true;\n" + ) + f.write(" ctx_start_hvx_insn(ctx);\n") else: - reg.analyze_read(f, i) - i += 1 - - has_generated_helper = not hex_common.skip_qemu_helper( - tag - ) and not hex_common.is_idef_parser_enabled(tag) - - ## Mark HVX instructions with generated helpers - if (has_generated_helper and - "A_CVI" in hex_common.attribdict[tag]): - f.write(" ctx->has_hvx_helper = true;\n") + f.write( + " const bool G_GNUC_UNUSED insn_has_hvx_helper = false;\n" + ) + + ## Declare all the registers + for regno, register in enumerate(regs): + reg_type, reg_id = register + reg = hex_common.get_register(tag, reg_type, reg_id) + reg.decl_reg_num(f, regno) + + ## Analyze the register reads + for regno, register in enumerate(regs): + reg_type, reg_id = register + reg = hex_common.get_register(tag, reg_type, reg_id) + if reg.is_read(): + reg.analyze_read(f, regno) + + ## Analyze the register writes + for regno, register in enumerate(regs): + reg_type, reg_id = register + reg = hex_common.get_register(tag, reg_type, reg_id) + if reg.is_written(): + reg.analyze_write(f, tag, regno) f.write("}\n\n") def main(): - hex_common.read_semantics_file(sys.argv[1]) - hex_common.read_attribs_file(sys.argv[2]) - hex_common.read_overrides_file(sys.argv[3]) - hex_common.read_overrides_file(sys.argv[4]) - ## Whether or not idef-parser is enabled is - ## determined by the number of arguments to - ## this script: - ## - ## 5 args. -> not enabled, - ## 6 args. -> idef-parser enabled. - ## - ## The 6:th arg. then holds a list of the successfully - ## parsed instructions. - is_idef_parser_enabled = len(sys.argv) > 6 - if is_idef_parser_enabled: - hex_common.read_idef_parser_enabled_file(sys.argv[5]) - hex_common.calculate_attribs() - hex_common.init_registers() + hex_common.read_common_files() tagregs = hex_common.get_tagregs() tagimms = hex_common.get_tagimms() with open(sys.argv[-1], "w") as f: - f.write("#ifndef HEXAGON_TCG_FUNCS_H\n") - f.write("#define HEXAGON_TCG_FUNCS_H\n\n") + f.write("#ifndef HEXAGON_ANALYZE_FUNCS_C_INC\n") + f.write("#define HEXAGON_ANALYZE_FUNCS_C_INC\n\n") for tag in hex_common.tags: gen_analyze_func(f, tag, tagregs[tag], tagimms[tag]) - f.write("#endif /* HEXAGON_TCG_FUNCS_H */\n") + f.write("#endif /* HEXAGON_ANALYZE_FUNCS_C_INC */\n") if __name__ == "__main__": diff --git a/target/hexagon/gen_helper_funcs.py b/target/hexagon/gen_helper_funcs.py index 9cc3d69c49..e9685bff2f 100755 --- a/target/hexagon/gen_helper_funcs.py +++ b/target/hexagon/gen_helper_funcs.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 ## -## Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. +## Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved. ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by @@ -102,24 +102,7 @@ def gen_helper_function(f, tag, tagregs, tagimms): def main(): - hex_common.read_semantics_file(sys.argv[1]) - hex_common.read_attribs_file(sys.argv[2]) - hex_common.read_overrides_file(sys.argv[3]) - hex_common.read_overrides_file(sys.argv[4]) - ## Whether or not idef-parser is enabled is - ## determined by the number of arguments to - ## this script: - ## - ## 5 args. -> not enabled, - ## 6 args. -> idef-parser enabled. - ## - ## The 6:th arg. then holds a list of the successfully - ## parsed instructions. - is_idef_parser_enabled = len(sys.argv) > 6 - if is_idef_parser_enabled: - hex_common.read_idef_parser_enabled_file(sys.argv[5]) - hex_common.calculate_attribs() - hex_common.init_registers() + hex_common.read_common_files() tagregs = hex_common.get_tagregs() tagimms = hex_common.get_tagimms() diff --git a/target/hexagon/gen_helper_protos.py b/target/hexagon/gen_helper_protos.py index c82b0f54e4..fd2bfd0f36 100755 --- a/target/hexagon/gen_helper_protos.py +++ b/target/hexagon/gen_helper_protos.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 ## -## Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. +## Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved. ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by @@ -40,28 +40,19 @@ def gen_helper_prototype(f, tag, tagregs, tagimms): declared.append(arg.proto_arg) arguments = ", ".join(declared) - f.write(f"DEF_HELPER_{len(declared) - 1}({tag}, {arguments})\n") + + ## Add the TCG_CALL_NO_RWG_SE flag to helpers that don't take the env + ## argument and aren't HVX instructions. Since HVX instructions take + ## pointers to their arguments, they will have side effects. + if hex_common.need_env(tag) or hex_common.is_hvx_insn(tag): + f.write(f"DEF_HELPER_{len(declared) - 1}({tag}, {arguments})\n") + else: + f.write(f"DEF_HELPER_FLAGS_{len(declared) - 1}({tag}, " + f"TCG_CALL_NO_RWG_SE, {arguments})\n") def main(): - hex_common.read_semantics_file(sys.argv[1]) - hex_common.read_attribs_file(sys.argv[2]) - hex_common.read_overrides_file(sys.argv[3]) - hex_common.read_overrides_file(sys.argv[4]) - ## Whether or not idef-parser is enabled is - ## determined by the number of arguments to - ## this script: - ## - ## 5 args. -> not enabled, - ## 6 args. -> idef-parser enabled. - ## - ## The 6:th arg. then holds a list of the successfully - ## parsed instructions. - is_idef_parser_enabled = len(sys.argv) > 6 - if is_idef_parser_enabled: - hex_common.read_idef_parser_enabled_file(sys.argv[5]) - hex_common.calculate_attribs() - hex_common.init_registers() + hex_common.read_common_files() tagregs = hex_common.get_tagregs() tagimms = hex_common.get_tagimms() diff --git a/target/hexagon/gen_idef_parser_funcs.py b/target/hexagon/gen_idef_parser_funcs.py index 550a48cb7b..eb494abba8 100644 --- a/target/hexagon/gen_idef_parser_funcs.py +++ b/target/hexagon/gen_idef_parser_funcs.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 ## -## Copyright(c) 2019-2023 rev.ng Labs Srl. All Rights Reserved. +## Copyright(c) 2019-2024 rev.ng Labs Srl. All Rights Reserved. ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by @@ -44,13 +44,12 @@ import hex_common ## def main(): hex_common.read_semantics_file(sys.argv[1]) - hex_common.read_attribs_file(sys.argv[2]) hex_common.calculate_attribs() hex_common.init_registers() tagregs = hex_common.get_tagregs() tagimms = hex_common.get_tagimms() - with open(sys.argv[3], "w") as f: + with open(sys.argv[-1], "w") as f: f.write('#include "macros.inc"\n\n') for tag in hex_common.tags: diff --git a/target/hexagon/gen_op_attribs.py b/target/hexagon/gen_op_attribs.py index 41074b8573..99448220da 100755 --- a/target/hexagon/gen_op_attribs.py +++ b/target/hexagon/gen_op_attribs.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 ## -## Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. +## Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved. ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by @@ -25,13 +25,12 @@ import hex_common def main(): hex_common.read_semantics_file(sys.argv[1]) - hex_common.read_attribs_file(sys.argv[2]) hex_common.calculate_attribs() ## ## Generate all the attributes associated with each instruction ## - with open(sys.argv[3], "w") as f: + with open(sys.argv[-1], "w") as f: for tag in hex_common.tags: f.write( f"OP_ATTRIB({tag},ATTRIBS(" diff --git a/target/hexagon/gen_op_regs.py b/target/hexagon/gen_op_regs.py deleted file mode 100755 index 7b7b33895a..0000000000 --- a/target/hexagon/gen_op_regs.py +++ /dev/null @@ -1,125 +0,0 @@ -#!/usr/bin/env python3 - -## -## Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. -## -## This program is free software; you can redistribute it and/or modify -## it under the terms of the GNU General Public License as published by -## the Free Software Foundation; either version 2 of the License, or -## (at your option) any later version. -## -## This program is distributed in the hope that it will be useful, -## but WITHOUT ANY WARRANTY; without even the implied warranty of -## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -## GNU General Public License for more details. -## -## You should have received a copy of the GNU General Public License -## along with this program; if not, see <http://www.gnu.org/licenses/>. -## - -import sys -import re -import string -import hex_common - - -## -## Generate the register and immediate operands for each instruction -## -def calculate_regid_reg(tag): - def letter_inc(x): - return chr(ord(x) + 1) - - ordered_implregs = ["SP", "FP", "LR"] - srcdst_lett = "X" - src_lett = "S" - dst_lett = "D" - retstr = "" - mapdict = {} - for reg in ordered_implregs: - reg_rd = 0 - reg_wr = 0 - if ("A_IMPLICIT_WRITES_" + reg) in hex_common.attribdict[tag]: - reg_wr = 1 - if reg_rd and reg_wr: - retstr += srcdst_lett - mapdict[srcdst_lett] = reg - srcdst_lett = letter_inc(srcdst_lett) - elif reg_rd: - retstr += src_lett - mapdict[src_lett] = reg - src_lett = letter_inc(src_lett) - elif reg_wr: - retstr += dst_lett - mapdict[dst_lett] = reg - dst_lett = letter_inc(dst_lett) - return retstr, mapdict - - -def calculate_regid_letters(tag): - retstr, mapdict = calculate_regid_reg(tag) - return retstr - - -def strip_reg_prefix(x): - y = x.replace("UREG.", "") - y = y.replace("MREG.", "") - return y.replace("GREG.", "") - - -def main(): - hex_common.read_semantics_file(sys.argv[1]) - hex_common.read_attribs_file(sys.argv[2]) - hex_common.init_registers() - tagregs = hex_common.get_tagregs(full=True) - tagimms = hex_common.get_tagimms() - - with open(sys.argv[3], "w") as f: - for tag in hex_common.tags: - regs = tagregs[tag] - rregs = [] - wregs = [] - regids = "" - for regtype, regid, _, numregs in regs: - reg = hex_common.get_register(tag, regtype, regid) - if reg.is_read(): - if regid[0] not in regids: - regids += regid[0] - rregs.append(regtype + regid + numregs) - if reg.is_written(): - wregs.append(regtype + regid + numregs) - if regid[0] not in regids: - regids += regid[0] - for attrib in hex_common.attribdict[tag]: - if hex_common.attribinfo[attrib]["rreg"]: - rregs.append(strip_reg_prefix(attribinfo[attrib]["rreg"])) - if hex_common.attribinfo[attrib]["wreg"]: - wregs.append(strip_reg_prefix(attribinfo[attrib]["wreg"])) - regids += calculate_regid_letters(tag) - f.write( - f'REGINFO({tag},"{regids}",\t/*RD:*/\t"{",".join(rregs)}",' - f'\t/*WR:*/\t"{",".join(wregs)}")\n' - ) - - for tag in hex_common.tags: - imms = tagimms[tag] - f.write(f"IMMINFO({tag}") - if not imms: - f.write(""",'u',0,0,'U',0,0""") - for sign, size, shamt in imms: - if sign == "r": - sign = "s" - if not shamt: - shamt = "0" - f.write(f""",'{sign}',{size},{shamt}""") - if len(imms) == 1: - if sign.isupper(): - myu = "u" - else: - myu = "U" - f.write(f""",'{myu}',0,0""") - f.write(")\n") - - -if __name__ == "__main__": - main() diff --git a/target/hexagon/gen_opcodes_def.py b/target/hexagon/gen_opcodes_def.py index cddd868fe3..536f0eb68a 100755 --- a/target/hexagon/gen_opcodes_def.py +++ b/target/hexagon/gen_opcodes_def.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 ## -## Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. +## Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved. ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by @@ -29,7 +29,7 @@ def main(): ## ## Generate a list of all the opcodes ## - with open(sys.argv[3], "w") as f: + with open(sys.argv[-1], "w") as f: for tag in hex_common.tags: f.write(f"OPCODE({tag}),\n") diff --git a/target/hexagon/gen_printinsn.py b/target/hexagon/gen_printinsn.py index e570bd7c6a..8bf4d0985c 100755 --- a/target/hexagon/gen_printinsn.py +++ b/target/hexagon/gen_printinsn.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 ## -## Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. +## Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved. ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by @@ -97,11 +97,10 @@ def spacify(s): def main(): hex_common.read_semantics_file(sys.argv[1]) - hex_common.read_attribs_file(sys.argv[2]) immext_casere = re.compile(r"IMMEXT\(([A-Za-z])") - with open(sys.argv[3], "w") as f: + with open(sys.argv[-1], "w") as f: for tag in hex_common.tags: if not hex_common.behdict[tag]: continue diff --git a/target/hexagon/gen_shortcode.py b/target/hexagon/gen_shortcode.py deleted file mode 100755 index deb94446c4..0000000000 --- a/target/hexagon/gen_shortcode.py +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env python3 - -## -## Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. -## -## This program is free software; you can redistribute it and/or modify -## it under the terms of the GNU General Public License as published by -## the Free Software Foundation; either version 2 of the License, or -## (at your option) any later version. -## -## This program is distributed in the hope that it will be useful, -## but WITHOUT ANY WARRANTY; without even the implied warranty of -## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -## GNU General Public License for more details. -## -## You should have received a copy of the GNU General Public License -## along with this program; if not, see <http://www.gnu.org/licenses/>. -## - -import sys -import re -import string -import hex_common - - -def gen_shortcode(f, tag): - f.write(f"DEF_SHORTCODE({tag}, {hex_common.semdict[tag]})\n") - - -def main(): - hex_common.read_semantics_file(sys.argv[1]) - hex_common.read_attribs_file(sys.argv[2]) - hex_common.calculate_attribs() - tagregs = hex_common.get_tagregs() - tagimms = hex_common.get_tagimms() - - with open(sys.argv[3], "w") as f: - f.write("#ifndef DEF_SHORTCODE\n") - f.write("#define DEF_SHORTCODE(TAG,SHORTCODE) /* Nothing */\n") - f.write("#endif\n") - - for tag in hex_common.tags: - ## Skip the priv instructions - if "A_PRIV" in hex_common.attribdict[tag]: - continue - ## Skip the guest instructions - if "A_GUEST" in hex_common.attribdict[tag]: - continue - ## Skip the diag instructions - if tag == "Y6_diag": - continue - if tag == "Y6_diag0": - continue - if tag == "Y6_diag1": - continue - - gen_shortcode(f, tag) - - f.write("#undef DEF_SHORTCODE\n") - - -if __name__ == "__main__": - main() diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h index 1c4391b415..3fc1f4e281 100644 --- a/target/hexagon/gen_tcg.h +++ b/target/hexagon/gen_tcg.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -1369,3 +1369,6 @@ gen_helper_raise_exception(tcg_env, excp); \ } while (0) #endif + +#define fGEN_TCG_A2_nop(SHORTCODE) do { } while (0) +#define fGEN_TCG_SA1_setin1(SHORTCODE) tcg_gen_movi_tl(RdV, -1) diff --git a/target/hexagon/gen_tcg_func_table.py b/target/hexagon/gen_tcg_func_table.py index f998ef0992..978ac1819b 100755 --- a/target/hexagon/gen_tcg_func_table.py +++ b/target/hexagon/gen_tcg_func_table.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 ## -## Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. +## Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved. ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by @@ -25,12 +25,11 @@ import hex_common def main(): hex_common.read_semantics_file(sys.argv[1]) - hex_common.read_attribs_file(sys.argv[2]) hex_common.calculate_attribs() tagregs = hex_common.get_tagregs() tagimms = hex_common.get_tagimms() - with open(sys.argv[3], "w") as f: + with open(sys.argv[-1], "w") as f: f.write("#ifndef HEXAGON_FUNC_TABLE_H\n") f.write("#define HEXAGON_FUNC_TABLE_H\n\n") diff --git a/target/hexagon/gen_tcg_funcs.py b/target/hexagon/gen_tcg_funcs.py index 3d8e3cb6a2..05aa0a7855 100755 --- a/target/hexagon/gen_tcg_funcs.py +++ b/target/hexagon/gen_tcg_funcs.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 ## -## Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. +## Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved. ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by @@ -108,24 +108,7 @@ def gen_def_tcg_func(f, tag, tagregs, tagimms): def main(): - hex_common.read_semantics_file(sys.argv[1]) - hex_common.read_attribs_file(sys.argv[2]) - hex_common.read_overrides_file(sys.argv[3]) - hex_common.read_overrides_file(sys.argv[4]) - hex_common.calculate_attribs() - hex_common.init_registers() - ## Whether or not idef-parser is enabled is - ## determined by the number of arguments to - ## this script: - ## - ## 5 args. -> not enabled, - ## 6 args. -> idef-parser enabled. - ## - ## The 6:th arg. then holds a list of the successfully - ## parsed instructions. - is_idef_parser_enabled = len(sys.argv) > 6 - if is_idef_parser_enabled: - hex_common.read_idef_parser_enabled_file(sys.argv[5]) + is_idef_parser_enabled = hex_common.read_common_files() tagregs = hex_common.get_tagregs() tagimms = hex_common.get_tagimms() diff --git a/target/hexagon/gen_trans_funcs.py b/target/hexagon/gen_trans_funcs.py index 53e844a44b..9f86b4edbd 100755 --- a/target/hexagon/gen_trans_funcs.py +++ b/target/hexagon/gen_trans_funcs.py @@ -68,6 +68,9 @@ def mark_which_imm_extended(f, tag): ## insn->regno[0] = args->Rd; ## insn->regno[1] = args->Rs; ## insn->regno[2] = args->Rt; +## insn->new_read_idx = -1; +## insn->dest_idx = 0; +## insn->has_pred_dest = false; ## return true; ## } ## @@ -84,14 +87,21 @@ def gen_trans_funcs(f): insn->opcode = {tag}; """)) - regno = 0 - for reg in regs: - reg_type = reg[0] - reg_id = reg[1] + new_read_idx = -1 + dest_idx = -1 + has_pred_dest = "false" + for regno, (reg_type, reg_id, *_) in enumerate(regs): + reg = hex_common.get_register(tag, reg_type, reg_id) f.write(code_fmt(f"""\ insn->regno[{regno}] = args->{reg_type}{reg_id}; """)) - regno += 1 + if reg.is_read() and reg.is_new(): + new_read_idx = regno + # dest_idx should be the first destination, so check for -1 + if reg.is_written() and dest_idx == -1: + dest_idx = regno + if reg_type == "P" and reg.is_written() and not reg.is_read(): + has_pred_dest = "true" if len(imms) != 0: mark_which_imm_extended(f, tag) @@ -112,6 +122,11 @@ def gen_trans_funcs(f): insn->immed[{immno}] = args->{imm_type}{imm_letter}; """)) + f.write(code_fmt(f"""\ + insn->new_read_idx = {new_read_idx}; + insn->dest_idx = {dest_idx}; + insn->has_pred_dest = {has_pred_dest}; + """)) f.write(textwrap.dedent(f"""\ return true; {close_curly} @@ -120,5 +135,6 @@ def gen_trans_funcs(f): if __name__ == "__main__": hex_common.read_semantics_file(sys.argv[1]) + hex_common.init_registers() with open(sys.argv[2], "w") as f: gen_trans_funcs(f) diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py index 195620c7ec..15ed4980e4 100755 --- a/target/hexagon/hex_common.py +++ b/target/hexagon/hex_common.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 ## -## Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. +## Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved. ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by @@ -26,7 +26,6 @@ behdict = {} # tag ->behavior semdict = {} # tag -> semantics attribdict = {} # tag -> attributes macros = {} # macro -> macro information... -attribinfo = {} # Register information and misc registers = {} # register -> register functions new_registers = {} tags = [] # list of all tags @@ -101,6 +100,7 @@ def calculate_attribs(): add_qemu_macro_attrib('fLSBNEW1', 'A_IMPLICIT_READS_P1') add_qemu_macro_attrib('fLSBNEW1NOT', 'A_IMPLICIT_READS_P1') add_qemu_macro_attrib('fREAD_P3', 'A_IMPLICIT_READS_P3') + add_qemu_macro_attrib('fREAD_SP', 'A_IMPLICIT_READS_SP') # Recurse down macros, find attributes from sub-macros macroValues = list(macros.values()) @@ -197,6 +197,26 @@ def get_tagimms(): return dict(zip(tags, list(map(compute_tag_immediates, tags)))) +def need_p0(tag): + return "A_IMPLICIT_READS_P0" in attribdict[tag] + + +def need_sp(tag): + return "A_IMPLICIT_READS_SP" in attribdict[tag] + + +def is_hvx_insn(tag): + return "A_CVI" in attribdict[tag] + + +def need_env(tag): + return ("A_STORE" in attribdict[tag] or + "A_LOAD" in attribdict[tag] or + "A_CVI_GATHER" in attribdict[tag] or + "A_CVI_SCATTER" in attribdict[tag] or + "A_IMPLICIT_WRITES_USR" in attribdict[tag]) + + def need_slot(tag): if ( "A_CVI_SCATTER" not in attribdict[tag] @@ -241,6 +261,16 @@ def is_idef_parser_enabled(tag): return tag in idef_parser_enabled +def is_hvx_insn(tag): + return "A_CVI" in attribdict[tag] + + +def has_hvx_helper(tag): + return (is_hvx_insn(tag) and + not skip_qemu_helper(tag) and + not is_idef_parser_enabled(tag)) + + def imm_name(immlett): return f"{immlett}iV" @@ -257,19 +287,6 @@ def read_semantics_file(name): eval_line = "" -def read_attribs_file(name): - attribre = re.compile( - r"DEF_ATTRIB\(([A-Za-z0-9_]+), ([^,]*), " - + r'"([A-Za-z0-9_\.]*)", "([A-Za-z0-9_\.]*)"\)' - ) - for line in open(name, "rt").readlines(): - if not attribre.match(line): - continue - (attrib_base, descr, rreg, wreg) = attribre.findall(line)[0] - attrib_base = "A_" + attrib_base - attribinfo[attrib_base] = {"rreg": rreg, "wreg": wreg, "descr": descr} - - def read_overrides_file(name): overridere = re.compile(r"#define fGEN_TCG_([A-Za-z0-9_]+)\(.*") for line in open(name, "rt").readlines(): @@ -397,10 +414,18 @@ class Source: class OldSource(Source): def reg_tcg(self): return f"{self.regtype}{self.regid}V" + def is_old(self): + return True + def is_new(self): + return False class NewSource(Source): def reg_tcg(self): return f"{self.regtype}{self.regid}N" + def is_old(self): + return False + def is_new(self): + return True class ReadWrite: def reg_tcg(self): @@ -413,6 +438,10 @@ class ReadWrite: return True def is_readwrite(self): return True + def is_old(self): + return True + def is_new(self): + return False class GprDest(Register, Single, Dest): def decl_tcg(self, f, tag, regno): @@ -425,7 +454,6 @@ class GprDest(Register, Single, Dest): gen_log_reg_write(ctx, {self.reg_num}, {self.reg_tcg()}); """)) def analyze_write(self, f, tag, regno): - self.decl_reg_num(f, regno) predicated = "true" if is_predicated(tag) else "false" f.write(code_fmt(f"""\ ctx_log_reg_write(ctx, {self.reg_num}, {predicated}); @@ -438,7 +466,6 @@ class GprSource(Register, Single, OldSource): TCGv {self.reg_tcg()} = hex_gpr[{self.reg_num}]; """)) def analyze_read(self, f, regno): - self.decl_reg_num(f, regno) f.write(code_fmt(f"""\ ctx_log_reg_read(ctx, {self.reg_num}); """)) @@ -449,9 +476,8 @@ class GprNewSource(Register, Single, NewSource): TCGv {self.reg_tcg()} = get_result_gpr(ctx, insn->regno[{regno}]); """)) def analyze_read(self, f, regno): - self.decl_reg_num(f, regno) f.write(code_fmt(f"""\ - ctx_log_reg_read(ctx, {self.reg_num}); + ctx_log_reg_read_new(ctx, {self.reg_num}); """)) class GprReadWrite(Register, Single, ReadWrite): @@ -471,8 +497,11 @@ class GprReadWrite(Register, Single, ReadWrite): f.write(code_fmt(f"""\ gen_log_reg_write(ctx, {self.reg_num}, {self.reg_tcg()}); """)) + def analyze_read(self, f, regno): + f.write(code_fmt(f"""\ + ctx_log_reg_read(ctx, {self.reg_num}); + """)) def analyze_write(self, f, tag, regno): - self.decl_reg_num(f, regno) predicated = "true" if is_predicated(tag) else "false" f.write(code_fmt(f"""\ ctx_log_reg_write(ctx, {self.reg_num}, {predicated}); @@ -493,7 +522,6 @@ class ControlDest(Register, Single, Dest): gen_write_ctrl_reg(ctx, {self.reg_num}, {self.reg_tcg()}); """)) def analyze_write(self, f, tag, regno): - self.decl_reg_num(f, regno) predicated = "true" if is_predicated(tag) else "false" f.write(code_fmt(f"""\ ctx_log_reg_write(ctx, {self.reg_num}, {predicated}); @@ -511,7 +539,6 @@ class ControlSource(Register, Single, OldSource): gen_read_ctrl_reg(ctx, {self.reg_num}, {self.reg_tcg()}); """)) def analyze_read(self, f, regno): - self.decl_reg_num(f, regno) f.write(code_fmt(f"""\ ctx_log_reg_read(ctx, {self.reg_num}); """)) @@ -532,7 +559,6 @@ class ModifierSource(Register, Single, OldSource): declared.append(self.reg_tcg()) declared.append("CS") def analyze_read(self, f, regno): - self.decl_reg_num(f, regno) f.write(code_fmt(f"""\ ctx_log_reg_read(ctx, {self.reg_num}); """)) @@ -548,7 +574,6 @@ class PredDest(Register, Single, Dest): gen_log_pred_write(ctx, {self.reg_num}, {self.reg_tcg()}); """)) def analyze_write(self, f, tag, regno): - self.decl_reg_num(f, regno) f.write(code_fmt(f"""\ ctx_log_pred_write(ctx, {self.reg_num}); """)) @@ -560,7 +585,6 @@ class PredSource(Register, Single, OldSource): TCGv {self.reg_tcg()} = hex_pred[{self.reg_num}]; """)) def analyze_read(self, f, regno): - self.decl_reg_num(f, regno) f.write(code_fmt(f"""\ ctx_log_pred_read(ctx, {self.reg_num}); """)) @@ -571,9 +595,8 @@ class PredNewSource(Register, Single, NewSource): TCGv {self.reg_tcg()} = get_result_pred(ctx, insn->regno[{regno}]); """)) def analyze_read(self, f, regno): - self.decl_reg_num(f, regno) f.write(code_fmt(f"""\ - ctx_log_pred_read(ctx, {self.reg_num}); + ctx_log_pred_read_new(ctx, {self.reg_num}); """)) class PredReadWrite(Register, Single, ReadWrite): @@ -587,8 +610,11 @@ class PredReadWrite(Register, Single, ReadWrite): f.write(code_fmt(f"""\ gen_log_pred_write(ctx, {self.reg_num}, {self.reg_tcg()}); """)) + def analyze_read(self, f, regno): + f.write(code_fmt(f"""\ + ctx_log_pred_read(ctx, {self.reg_num}); + """)) def analyze_write(self, f, tag, regno): - self.decl_reg_num(f, regno) f.write(code_fmt(f"""\ ctx_log_pred_write(ctx, {self.reg_num}); """)) @@ -605,7 +631,6 @@ class PairDest(Register, Pair, Dest): gen_log_reg_write_pair(ctx, {self.reg_num}, {self.reg_tcg()}); """)) def analyze_write(self, f, tag, regno): - self.decl_reg_num(f, regno) predicated = "true" if is_predicated(tag) else "false" f.write(code_fmt(f"""\ ctx_log_reg_write_pair(ctx, {self.reg_num}, {predicated}); @@ -621,7 +646,6 @@ class PairSource(Register, Pair, OldSource): hex_gpr[{self.reg_num} + 1]); """)) def analyze_read(self, f, regno): - self.decl_reg_num(f, regno) f.write(code_fmt(f"""\ ctx_log_reg_read_pair(ctx, {self.reg_num}); """)) @@ -640,8 +664,11 @@ class PairReadWrite(Register, Pair, ReadWrite): f.write(code_fmt(f"""\ gen_log_reg_write_pair(ctx, {self.reg_num}, {self.reg_tcg()}); """)) + def analyze_read(self, f, regno): + f.write(code_fmt(f"""\ + ctx_log_reg_read_pair(ctx, {self.reg_num}); + """)) def analyze_write(self, f, tag, regno): - self.decl_reg_num(f, regno) predicated = "true" if is_predicated(tag) else "false" f.write(code_fmt(f"""\ ctx_log_reg_write_pair(ctx, {self.reg_num}, {predicated}); @@ -663,7 +690,6 @@ class ControlPairDest(Register, Pair, Dest): gen_write_ctrl_reg_pair(ctx, {self.reg_num}, {self.reg_tcg()}); """)) def analyze_write(self, f, tag, regno): - self.decl_reg_num(f, regno) predicated = "true" if is_predicated(tag) else "false" f.write(code_fmt(f"""\ ctx_log_reg_write_pair(ctx, {self.reg_num}, {predicated}); @@ -681,7 +707,6 @@ class ControlPairSource(Register, Pair, OldSource): gen_read_ctrl_reg_pair(ctx, {self.reg_num}, {self.reg_tcg()}); """)) def analyze_read(self, f, regno): - self.decl_reg_num(f, regno) f.write(code_fmt(f"""\ ctx_log_reg_read_pair(ctx, {self.reg_num}); """)) @@ -705,11 +730,11 @@ class VRegDest(Register, Hvx, Dest): /* {self.reg_tcg()} is *(MMVector *)({self.helper_arg_name()}) */ """)) def analyze_write(self, f, tag, regno): - self.decl_reg_num(f, regno) newv = hvx_newv(tag) predicated = "true" if is_predicated(tag) else "false" f.write(code_fmt(f"""\ - ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated}); + ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated}, + insn_has_hvx_helper); """)) class VRegSource(Register, Hvx, OldSource): @@ -728,9 +753,8 @@ class VRegSource(Register, Hvx, OldSource): /* {self.reg_tcg()} is *(MMVector *)({self.helper_arg_name()}) */ """)) def analyze_read(self, f, regno): - self.decl_reg_num(f, regno) f.write(code_fmt(f"""\ - ctx_log_vreg_read(ctx, {self.reg_num}); + ctx_log_vreg_read(ctx, {self.reg_num}, insn_has_hvx_helper); """)) class VRegNewSource(Register, Hvx, NewSource): @@ -746,9 +770,8 @@ class VRegNewSource(Register, Hvx, NewSource): /* {self.reg_tcg()} is *(MMVector *)({self.helper_arg_name()}) */ """)) def analyze_read(self, f, regno): - self.decl_reg_num(f, regno) f.write(code_fmt(f"""\ - ctx_log_vreg_read(ctx, {self.reg_num}); + ctx_log_vreg_read_new(ctx, {self.reg_num}, insn_has_hvx_helper); """)) class VRegReadWrite(Register, Hvx, ReadWrite): @@ -772,12 +795,16 @@ class VRegReadWrite(Register, Hvx, ReadWrite): f.write(code_fmt(f"""\ /* {self.reg_tcg()} is *(MMVector *)({self.helper_arg_name()}) */ """)) + def analyze_read(self, f, regno): + f.write(code_fmt(f"""\ + ctx_log_vreg_read(ctx, {self.reg_num}, insn_has_hvx_helper); + """)) def analyze_write(self, f, tag, regno): - self.decl_reg_num(f, regno) newv = hvx_newv(tag) predicated = "true" if is_predicated(tag) else "false" f.write(code_fmt(f"""\ - ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated}); + ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated}, + insn_has_hvx_helper); """)) class VRegTmp(Register, Hvx, ReadWrite): @@ -803,12 +830,16 @@ class VRegTmp(Register, Hvx, ReadWrite): f.write(code_fmt(f"""\ /* {self.reg_tcg()} is *(MMVector *)({self.helper_arg_name()}) */ """)) + def analyze_read(self, f, regno): + f.write(code_fmt(f"""\ + ctx_log_vreg_read(ctx, {self.reg_num}, insn_has_hvx_helper); + """)) def analyze_write(self, f, tag, regno): - self.decl_reg_num(f, regno) newv = hvx_newv(tag) predicated = "true" if is_predicated(tag) else "false" f.write(code_fmt(f"""\ - ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated}); + ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated}, + insn_has_hvx_helper); """)) class VRegPairDest(Register, Hvx, Dest): @@ -830,11 +861,11 @@ class VRegPairDest(Register, Hvx, Dest): /* {self.reg_tcg()} is *(MMVectorPair *)({self.helper_arg_name()}) */ """)) def analyze_write(self, f, tag, regno): - self.decl_reg_num(f, regno) newv = hvx_newv(tag) predicated = "true" if is_predicated(tag) else "false" f.write(code_fmt(f"""\ - ctx_log_vreg_write_pair(ctx, {self.reg_num}, {newv}, {predicated}); + ctx_log_vreg_write_pair(ctx, {self.reg_num}, {newv}, {predicated}, + insn_has_hvx_helper); """)) class VRegPairSource(Register, Hvx, OldSource): @@ -860,9 +891,8 @@ class VRegPairSource(Register, Hvx, OldSource): /* {self.reg_tcg()} is *(MMVectorPair *)({self.helper_arg_name()}) */ """)) def analyze_read(self, f, regno): - self.decl_reg_num(f, regno) f.write(code_fmt(f"""\ - ctx_log_vreg_read_pair(ctx, {self.reg_num}); + ctx_log_vreg_read_pair(ctx, {self.reg_num}, insn_has_hvx_helper); """)) class VRegPairReadWrite(Register, Hvx, ReadWrite): @@ -892,12 +922,16 @@ class VRegPairReadWrite(Register, Hvx, ReadWrite): f.write(code_fmt(f"""\ /* {self.reg_tcg()} is *(MMVectorPair *)({self.helper_arg_name()}) */ """)) + def analyze_read(self, f, regno): + f.write(code_fmt(f"""\ + ctx_log_vreg_read_pair(ctx, {self.reg_num}, insn_has_hvx_helper); + """)) def analyze_write(self, f, tag, regno): - self.decl_reg_num(f, regno) newv = hvx_newv(tag) predicated = "true" if is_predicated(tag) else "false" f.write(code_fmt(f"""\ - ctx_log_vreg_write_pair(ctx, {self.reg_num}, {newv}, {predicated}); + ctx_log_vreg_write_pair(ctx, {self.reg_num}, {newv}, {predicated}, + insn_has_hvx_helper); """)) class QRegDest(Register, Hvx, Dest): @@ -919,9 +953,8 @@ class QRegDest(Register, Hvx, Dest): /* {self.reg_tcg()} is *(MMQReg *)({self.helper_arg_name()}) */ """)) def analyze_write(self, f, tag, regno): - self.decl_reg_num(f, regno) f.write(code_fmt(f"""\ - ctx_log_qreg_write(ctx, {self.reg_num}); + ctx_log_qreg_write(ctx, {self.reg_num}, insn_has_hvx_helper); """)) class QRegSource(Register, Hvx, OldSource): @@ -941,9 +974,8 @@ class QRegSource(Register, Hvx, OldSource): /* {self.reg_tcg()} is *(MMQReg *)({self.helper_arg_name()}) */ """)) def analyze_read(self, f, regno): - self.decl_reg_num(f, regno) f.write(code_fmt(f"""\ - ctx_log_qreg_read(ctx, {self.reg_num}); + ctx_log_qreg_read(ctx, {self.reg_num}, insn_has_hvx_helper); """)) class QRegReadWrite(Register, Hvx, ReadWrite): @@ -967,10 +999,13 @@ class QRegReadWrite(Register, Hvx, ReadWrite): f.write(code_fmt(f"""\ /* {self.reg_tcg()} is *(MMQReg *)({self.helper_arg_name()}) */ """)) + def analyze_read(self, f, regno): + f.write(code_fmt(f"""\ + ctx_log_qreg_read(ctx, {self.reg_num}, insn_has_hvx_helper); + """)) def analyze_write(self, f, tag, regno): - self.decl_reg_num(f, regno) f.write(code_fmt(f"""\ - ctx_log_qreg_write(ctx, {self.reg_num}); + ctx_log_qreg_write(ctx, {self.reg_num}, insn_has_hvx_helper); """)) def init_registers(): @@ -1060,11 +1095,12 @@ def helper_args(tag, regs, imms): args = [] ## First argument is the CPU state - args.append(HelperArg( - "env", - "tcg_env", - "CPUHexagonState *env" - )) + if need_env(tag): + args.append(HelperArg( + "env", + "tcg_env", + "CPUHexagonState *env" + )) ## For predicated instructions, we pass in the destination register if is_predicated(tag): @@ -1118,6 +1154,18 @@ def helper_args(tag, regs, imms): "tcg_constant_tl(ctx->next_PC)", "target_ulong next_PC" )) + if need_p0(tag): + args.append(HelperArg( + "i32", + "hex_pred[0]", + "uint32_t P0" + )) + if need_sp(tag): + args.append(HelperArg( + "i32", + "hex_gpr[HEX_REG_SP]", + "uint32_t SP" + )) if need_slot(tag): args.append(HelperArg( "i32", @@ -1131,3 +1179,24 @@ def helper_args(tag, regs, imms): "uint32_t part1" )) return args + + +def read_common_files(): + read_semantics_file(sys.argv[1]) + read_overrides_file(sys.argv[2]) + read_overrides_file(sys.argv[3]) + ## Whether or not idef-parser is enabled is + ## determined by the number of arguments to + ## this script: + ## + ## 4 args. -> not enabled, + ## 5 args. -> idef-parser enabled. + ## + ## The 5:th arg. then holds a list of the successfully + ## parsed instructions. + is_idef_parser_enabled = len(sys.argv) > 5 + if is_idef_parser_enabled: + read_idef_parser_enabled_file(sys.argv[4]) + calculate_attribs() + init_registers() + return is_idef_parser_enabled diff --git a/target/hexagon/insn.h b/target/hexagon/insn.h index 3e7a22c91e..24dcf7fe9f 100644 --- a/target/hexagon/insn.h +++ b/target/hexagon/insn.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -39,6 +39,9 @@ struct Instruction { uint32_t slot:3; uint32_t which_extended:1; /* If has an extender, which immediate */ uint32_t new_value_producer_slot:4; + int32_t new_read_idx; + int32_t dest_idx; + bool has_pred_dest; bool part1; /* * cmp-jumps are split into two insns. diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h index 1376d6ccc1..feb798c6c0 100644 --- a/target/hexagon/macros.h +++ b/target/hexagon/macros.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -343,7 +343,7 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift) #define fREAD_LR() (env->gpr[HEX_REG_LR]) -#define fREAD_SP() (env->gpr[HEX_REG_SP]) +#define fREAD_SP() (SP) #define fREAD_LC0 (env->gpr[HEX_REG_LC0]) #define fREAD_LC1 (env->gpr[HEX_REG_LC1]) #define fREAD_SA0 (env->gpr[HEX_REG_SA0]) @@ -358,7 +358,7 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift) #endif #define fREAD_PC() (PC) -#define fREAD_P0() (env->pred[0]) +#define fREAD_P0() (P0) #define fCHECK_PCALIGN(A) diff --git a/target/hexagon/meson.build b/target/hexagon/meson.build index fb480afc03..b0b253aa6b 100644 --- a/target/hexagon/meson.build +++ b/target/hexagon/meson.build @@ -1,5 +1,5 @@ ## -## Copyright(c) 2020-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. +## Copyright(c) 2020-2024 Qualcomm Innovation Center, Inc. All Rights Reserved. ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by @@ -18,7 +18,6 @@ hexagon_ss = ss.source_set() hex_common_py = 'hex_common.py' -attribs_def = meson.current_source_dir() / 'attribs_def.h.inc' gen_tcg_h = meson.current_source_dir() / 'gen_tcg.h' gen_tcg_hvx_h = meson.current_source_dir() / 'gen_tcg_hvx.h' idef_parser_dir = meson.current_source_dir() / 'idef-parser' @@ -42,28 +41,17 @@ hexagon_ss.add(semantics_generated) # # Step 2 # We use Python scripts to generate the following files -# shortcode_generated.h.inc # tcg_func_table_generated.c.inc # printinsn_generated.h.inc -# op_regs_generated.h.inc # op_attribs_generated.h.inc # opcodes_def_generated.h.inc # -shortcode_generated = custom_target( - 'shortcode_generated.h.inc', - output: 'shortcode_generated.h.inc', - depends: [semantics_generated], - depend_files: [hex_common_py, attribs_def], - command: [python, files('gen_shortcode.py'), semantics_generated, attribs_def, '@OUTPUT@'], -) -hexagon_ss.add(shortcode_generated) - tcg_func_table_generated = custom_target( 'tcg_func_table_generated.c.inc', output: 'tcg_func_table_generated.c.inc', depends: [semantics_generated], - depend_files: [hex_common_py, attribs_def], - command: [python, files('gen_tcg_func_table.py'), semantics_generated, attribs_def, '@OUTPUT@'], + depend_files: [hex_common_py], + command: [python, files('gen_tcg_func_table.py'), semantics_generated, '@OUTPUT@'], ) hexagon_ss.add(tcg_func_table_generated) @@ -71,26 +59,17 @@ printinsn_generated = custom_target( 'printinsn_generated.h.inc', output: 'printinsn_generated.h.inc', depends: [semantics_generated], - depend_files: [hex_common_py, attribs_def], - command: [python, files('gen_printinsn.py'), semantics_generated, attribs_def, '@OUTPUT@'], + depend_files: [hex_common_py], + command: [python, files('gen_printinsn.py'), semantics_generated, '@OUTPUT@'], ) hexagon_ss.add(printinsn_generated) -op_regs_generated = custom_target( - 'op_regs_generated.h.inc', - output: 'op_regs_generated.h.inc', - depends: [semantics_generated], - depend_files: [hex_common_py, attribs_def], - command: [python, files('gen_op_regs.py'), semantics_generated, attribs_def, '@OUTPUT@'], -) -hexagon_ss.add(op_regs_generated) - op_attribs_generated = custom_target( 'op_attribs_generated.h.inc', output: 'op_attribs_generated.h.inc', depends: [semantics_generated], - depend_files: [hex_common_py, attribs_def], - command: [python, files('gen_op_attribs.py'), semantics_generated, attribs_def, '@OUTPUT@'], + depend_files: [hex_common_py], + command: [python, files('gen_op_attribs.py'), semantics_generated, '@OUTPUT@'], ) hexagon_ss.add(op_attribs_generated) @@ -98,8 +77,8 @@ opcodes_def_generated = custom_target( 'opcodes_def_generated.h.inc', output: 'opcodes_def_generated.h.inc', depends: [semantics_generated], - depend_files: [hex_common_py, attribs_def], - command: [python, files('gen_opcodes_def.py'), semantics_generated, attribs_def, '@OUTPUT@'], + depend_files: [hex_common_py], + command: [python, files('gen_opcodes_def.py'), semantics_generated, '@OUTPUT@'], ) hexagon_ss.add(opcodes_def_generated) @@ -110,7 +89,7 @@ hexagon_ss.add(opcodes_def_generated) # gen_dectree_import = executable( 'gen_dectree_import', - 'gen_dectree_import.c', opcodes_def_generated, op_regs_generated, + 'gen_dectree_import.c', opcodes_def_generated, native: true, build_by_default: false) iset_py = custom_target( @@ -298,7 +277,7 @@ if idef_parser_enabled and 'hexagon-linux-user' in target_dirs output: 'idef_parser_input.h.inc', depends: [semantics_generated], depend_files: [hex_common_py], - command: [python, files('gen_idef_parser_funcs.py'), semantics_generated, attribs_def, '@OUTPUT@'], + command: [python, files('gen_idef_parser_funcs.py'), semantics_generated, '@OUTPUT@'], ) preprocessed_idef_parser_input_generated = custom_target( @@ -367,12 +346,12 @@ if idef_parser_enabled and 'hexagon-linux-user' in target_dirs # Setup input and dependencies for the next step, this depends on whether or # not idef-parser is enabled helper_dep = [semantics_generated, idef_generated_tcg_c, idef_generated_tcg] - helper_in = [semantics_generated, attribs_def, gen_tcg_h, gen_tcg_hvx_h, idef_generated_list] + helper_in = [semantics_generated, gen_tcg_h, gen_tcg_hvx_h, idef_generated_list] else # Setup input and dependencies for the next step, this depends on whether or # not idef-parser is enabled helper_dep = [semantics_generated] - helper_in = [semantics_generated, attribs_def, gen_tcg_h, gen_tcg_hvx_h] + helper_in = [semantics_generated, gen_tcg_h, gen_tcg_hvx_h] endif # @@ -386,7 +365,7 @@ helper_protos_generated = custom_target( 'helper_protos_generated.h.inc', output: 'helper_protos_generated.h.inc', depends: helper_dep, - depend_files: [hex_common_py, attribs_def, gen_tcg_h, gen_tcg_hvx_h], + depend_files: [hex_common_py, gen_tcg_h, gen_tcg_hvx_h], command: [python, files('gen_helper_protos.py'), helper_in, '@OUTPUT@'], ) hexagon_ss.add(helper_protos_generated) @@ -395,7 +374,7 @@ helper_funcs_generated = custom_target( 'helper_funcs_generated.c.inc', output: 'helper_funcs_generated.c.inc', depends: helper_dep, - depend_files: [hex_common_py, attribs_def, gen_tcg_h, gen_tcg_hvx_h], + depend_files: [hex_common_py, gen_tcg_h, gen_tcg_hvx_h], command: [python, files('gen_helper_funcs.py'), helper_in, '@OUTPUT@'], ) hexagon_ss.add(helper_funcs_generated) @@ -404,7 +383,7 @@ tcg_funcs_generated = custom_target( 'tcg_funcs_generated.c.inc', output: 'tcg_funcs_generated.c.inc', depends: helper_dep, - depend_files: [hex_common_py, attribs_def, gen_tcg_h, gen_tcg_hvx_h], + depend_files: [hex_common_py, gen_tcg_h, gen_tcg_hvx_h], command: [python, files('gen_tcg_funcs.py'), helper_in, '@OUTPUT@'], ) hexagon_ss.add(tcg_funcs_generated) @@ -413,7 +392,7 @@ analyze_funcs_generated = custom_target( 'analyze_funcs_generated.c.inc', output: 'analyze_funcs_generated.c.inc', depends: helper_dep, - depend_files: [hex_common_py, attribs_def, gen_tcg_h, gen_tcg_hvx_h], + depend_files: [hex_common_py, gen_tcg_h, gen_tcg_hvx_h], command: [python, files('gen_analyze_funcs.py'), helper_in, '@OUTPUT@'], ) hexagon_ss.add(analyze_funcs_generated) diff --git a/target/hexagon/mmvec/decode_ext_mmvec.c b/target/hexagon/mmvec/decode_ext_mmvec.c index 202d84c7c0..f850d0154d 100644 --- a/target/hexagon/mmvec/decode_ext_mmvec.c +++ b/target/hexagon/mmvec/decode_ext_mmvec.c @@ -28,19 +28,15 @@ check_new_value(Packet *pkt) { /* .new value for a MMVector store */ int i, j; - const char *reginfo; - const char *destletters; - const char *dststr = NULL; uint16_t def_opcode; - char letter; for (i = 1; i < pkt->num_insns; i++) { uint16_t use_opcode = pkt->insn[i].opcode; if (GET_ATTRIB(use_opcode, A_DOTNEWVALUE) && GET_ATTRIB(use_opcode, A_CVI) && GET_ATTRIB(use_opcode, A_STORE)) { - int use_regidx = strchr(opcode_reginfo[use_opcode], 's') - - opcode_reginfo[use_opcode]; + int use_regidx = pkt->insn[i].new_read_idx; + g_assert(pkt->insn[i].new_read_idx != -1); /* * What's encoded at the N-field is the offset to who's producing * the value. @@ -68,31 +64,19 @@ check_new_value(Packet *pkt) /* def_idx is the index of the producer */ def_opcode = pkt->insn[def_idx].opcode; - reginfo = opcode_reginfo[def_opcode]; - destletters = "dexy"; - for (j = 0; (letter = destletters[j]) != 0; j++) { - dststr = strchr(reginfo, letter); - if (dststr != NULL) { - break; - } - } - if ((dststr == NULL) && GET_ATTRIB(def_opcode, A_CVI_GATHER)) { + if ((pkt->insn[def_idx].dest_idx == -1) && + GET_ATTRIB(def_opcode, A_CVI_GATHER)) { pkt->insn[i].regno[use_regidx] = def_oreg; pkt->insn[i].new_value_producer_slot = pkt->insn[def_idx].slot; } else { - if (dststr == NULL) { + if (pkt->insn[def_idx].dest_idx == -1) { /* still not there, we have a bad packet */ g_assert_not_reached(); } - int def_regnum = pkt->insn[def_idx].regno[dststr - reginfo]; + int def_regnum = + pkt->insn[def_idx].regno[pkt->insn[def_idx].dest_idx]; /* Now patch up the consumer with the register number */ pkt->insn[i].regno[use_regidx] = def_regnum ^ def_oreg; - /* special case for (Vx,Vy) */ - dststr = strchr(reginfo, 'y'); - if (def_oreg && strchr(reginfo, 'x') && dststr) { - def_regnum = pkt->insn[def_idx].regno[dststr - reginfo]; - pkt->insn[i].regno[use_regidx] = def_regnum; - } /* * We need to remember who produces this value to later * check if it was dynamically cancelled diff --git a/target/hexagon/opcodes.c b/target/hexagon/opcodes.c index 1f7f3def38..c8bde2f9e9 100644 --- a/target/hexagon/opcodes.c +++ b/target/hexagon/opcodes.c @@ -36,41 +36,6 @@ const char * const opcode_names[] = { #undef OPCODE }; -const char * const opcode_reginfo[] = { -#define IMMINFO(TAG, SIGN, SIZE, SHAMT, SIGN2, SIZE2, SHAMT2) /* nothing */ -#define REGINFO(TAG, REGINFO, RREGS, WREGS) REGINFO, -#include "op_regs_generated.h.inc" - NULL -#undef REGINFO -#undef IMMINFO -}; - - -const char * const opcode_rregs[] = { -#define IMMINFO(TAG, SIGN, SIZE, SHAMT, SIGN2, SIZE2, SHAMT2) /* nothing */ -#define REGINFO(TAG, REGINFO, RREGS, WREGS) RREGS, -#include "op_regs_generated.h.inc" - NULL -#undef REGINFO -#undef IMMINFO -}; - - -const char * const opcode_wregs[] = { -#define IMMINFO(TAG, SIGN, SIZE, SHAMT, SIGN2, SIZE2, SHAMT2) /* nothing */ -#define REGINFO(TAG, REGINFO, RREGS, WREGS) WREGS, -#include "op_regs_generated.h.inc" - NULL -#undef REGINFO -#undef IMMINFO -}; - -const char * const opcode_short_semantics[] = { -#define DEF_SHORTCODE(TAG, SHORTCODE) [TAG] = #SHORTCODE, -#include "shortcode_generated.h.inc" -#undef DEF_SHORTCODE - NULL -}; DECLARE_BITMAP(opcode_attribs[XX_LAST_OPCODE], A_ZZ_LASTATTRIB); diff --git a/target/hexagon/opcodes.h b/target/hexagon/opcodes.h index fa7e321950..0ee11bd445 100644 --- a/target/hexagon/opcodes.h +++ b/target/hexagon/opcodes.h @@ -40,10 +40,6 @@ typedef enum { extern const char * const opcode_names[]; -extern const char * const opcode_reginfo[]; -extern const char * const opcode_rregs[]; -extern const char * const opcode_wregs[]; - typedef struct { const char * const encoding; const EncClass enc_class; diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c index f163eefe97..4b1bee3c6d 100644 --- a/target/hexagon/translate.c +++ b/target/hexagon/translate.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -23,6 +23,7 @@ #include "exec/helper-gen.h" #include "exec/helper-proto.h" #include "exec/translation-block.h" +#include "exec/cpu_ldst.h" #include "exec/log.h" #include "internal.h" #include "attribs.h" @@ -379,70 +380,8 @@ static bool need_commit(DisasContext *ctx) return true; } - if (pkt->num_insns == 1) { - if (pkt->pkt_has_hvx) { - /* - * The HVX instructions with generated helpers use - * pass-by-reference, so they need the read/write overlap - * check below. - * The HVX instructions with overrides are OK. - */ - if (!ctx->has_hvx_helper) { - return false; - } - } else { - return false; - } - } - - /* Check for overlap between register reads and writes */ - for (int i = 0; i < ctx->reg_log_idx; i++) { - int rnum = ctx->reg_log[i]; - if (test_bit(rnum, ctx->regs_read)) { - return true; - } - } - - /* Check for overlap between predicate reads and writes */ - for (int i = 0; i < ctx->preg_log_idx; i++) { - int pnum = ctx->preg_log[i]; - if (test_bit(pnum, ctx->pregs_read)) { - return true; - } - } - - /* Check for overlap between HVX reads and writes */ - for (int i = 0; i < ctx->vreg_log_idx; i++) { - int vnum = ctx->vreg_log[i]; - if (test_bit(vnum, ctx->vregs_read)) { - return true; - } - } - if (!bitmap_empty(ctx->vregs_updated_tmp, NUM_VREGS)) { - int i = find_first_bit(ctx->vregs_updated_tmp, NUM_VREGS); - while (i < NUM_VREGS) { - if (test_bit(i, ctx->vregs_read)) { - return true; - } - i = find_next_bit(ctx->vregs_updated_tmp, NUM_VREGS, i + 1); - } - } - if (!bitmap_empty(ctx->vregs_select, NUM_VREGS)) { - int i = find_first_bit(ctx->vregs_select, NUM_VREGS); - while (i < NUM_VREGS) { - if (test_bit(i, ctx->vregs_read)) { - return true; - } - i = find_next_bit(ctx->vregs_select, NUM_VREGS, i + 1); - } - } - - /* Check for overlap between HVX predicate reads and writes */ - for (int i = 0; i < ctx->qreg_log_idx; i++) { - int qnum = ctx->qreg_log[i]; - if (test_bit(qnum, ctx->qregs_read)) { - return true; - } + if (ctx->read_after_write || ctx->has_hvx_overlap) { + return true; } return false; @@ -466,7 +405,8 @@ static void mark_implicit_pred_reads(DisasContext *ctx) static void analyze_packet(DisasContext *ctx) { Packet *pkt = ctx->pkt; - ctx->has_hvx_helper = false; + ctx->read_after_write = false; + ctx->has_hvx_overlap = false; for (int i = 0; i < pkt->num_insns; i++) { Insn *insn = &pkt->insn[i]; ctx->insn = insn; @@ -491,21 +431,19 @@ static void gen_start_packet(DisasContext *ctx) ctx->next_PC = next_PC; ctx->reg_log_idx = 0; bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS); - bitmap_zero(ctx->regs_read, TOTAL_PER_THREAD_REGS); bitmap_zero(ctx->predicated_regs, TOTAL_PER_THREAD_REGS); ctx->preg_log_idx = 0; bitmap_zero(ctx->pregs_written, NUM_PREGS); - bitmap_zero(ctx->pregs_read, NUM_PREGS); ctx->future_vregs_idx = 0; ctx->tmp_vregs_idx = 0; ctx->vreg_log_idx = 0; + bitmap_zero(ctx->vregs_written, NUM_VREGS); bitmap_zero(ctx->vregs_updated_tmp, NUM_VREGS); bitmap_zero(ctx->vregs_updated, NUM_VREGS); bitmap_zero(ctx->vregs_select, NUM_VREGS); bitmap_zero(ctx->predicated_future_vregs, NUM_VREGS); bitmap_zero(ctx->predicated_tmp_vregs, NUM_VREGS); - bitmap_zero(ctx->vregs_read, NUM_VREGS); - bitmap_zero(ctx->qregs_read, NUM_QREGS); + bitmap_zero(ctx->qregs_written, NUM_QREGS); ctx->qreg_log_idx = 0; for (i = 0; i < STORES_MAX; i++) { ctx->store_width[i] = 0; @@ -1084,7 +1022,7 @@ static bool pkt_crosses_page(CPUHexagonState *env, DisasContext *ctx) int nwords; for (nwords = 0; !found_end && nwords < PACKET_WORDS_MAX; nwords++) { - uint32_t word = cpu_ldl_code(env, + uint32_t word = translator_ldl(env, &ctx->base, ctx->base.pc_next + nwords * sizeof(uint32_t)); found_end = is_packet_end(word); } @@ -1137,21 +1075,12 @@ static void hexagon_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) } } -static void hexagon_tr_disas_log(const DisasContextBase *dcbase, - CPUState *cpu, FILE *logfile) -{ - fprintf(logfile, "IN: %s\n", lookup_symbol(dcbase->pc_first)); - target_disas(logfile, cpu, dcbase->pc_first, dcbase->tb->size); -} - - static const TranslatorOps hexagon_tr_ops = { .init_disas_context = hexagon_tr_init_disas_context, .tb_start = hexagon_tr_tb_start, .insn_start = hexagon_tr_insn_start, .translate_insn = hexagon_tr_translate_packet, .tb_stop = hexagon_tr_tb_stop, - .disas_log = hexagon_tr_disas_log, }; void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns, diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h index 4dd59c6726..00cc2bcd63 100644 --- a/target/hexagon/translate.h +++ b/target/hexagon/translate.h @@ -1,5 +1,5 @@ /* - * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved. + * Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -38,12 +38,10 @@ typedef struct DisasContext { int reg_log[REG_WRITES_MAX]; int reg_log_idx; DECLARE_BITMAP(regs_written, TOTAL_PER_THREAD_REGS); - DECLARE_BITMAP(regs_read, TOTAL_PER_THREAD_REGS); DECLARE_BITMAP(predicated_regs, TOTAL_PER_THREAD_REGS); int preg_log[PRED_WRITES_MAX]; int preg_log_idx; DECLARE_BITMAP(pregs_written, NUM_PREGS); - DECLARE_BITMAP(pregs_read, NUM_PREGS); uint8_t store_width[STORES_MAX]; bool s1_store_processed; int future_vregs_idx; @@ -52,22 +50,27 @@ typedef struct DisasContext { int tmp_vregs_num[VECTOR_TEMPS_MAX]; int vreg_log[NUM_VREGS]; int vreg_log_idx; + DECLARE_BITMAP(vregs_written, NUM_VREGS); + DECLARE_BITMAP(insn_vregs_written, NUM_VREGS); DECLARE_BITMAP(vregs_updated_tmp, NUM_VREGS); DECLARE_BITMAP(vregs_updated, NUM_VREGS); DECLARE_BITMAP(vregs_select, NUM_VREGS); DECLARE_BITMAP(predicated_future_vregs, NUM_VREGS); DECLARE_BITMAP(predicated_tmp_vregs, NUM_VREGS); - DECLARE_BITMAP(vregs_read, NUM_VREGS); + DECLARE_BITMAP(insn_vregs_read, NUM_VREGS); int qreg_log[NUM_QREGS]; int qreg_log_idx; - DECLARE_BITMAP(qregs_read, NUM_QREGS); + DECLARE_BITMAP(qregs_written, NUM_QREGS); + DECLARE_BITMAP(insn_qregs_written, NUM_QREGS); + DECLARE_BITMAP(insn_qregs_read, NUM_QREGS); bool pre_commit; bool need_commit; TCGCond branch_cond; target_ulong branch_dest; bool is_tight_loop; bool short_circuit; - bool has_hvx_helper; + bool read_after_write; + bool has_hvx_overlap; TCGv new_value[TOTAL_PER_THREAD_REGS]; TCGv new_pred_value[NUM_PREGS]; TCGv pred_written; @@ -75,6 +78,8 @@ typedef struct DisasContext { TCGv dczero_addr; } DisasContext; +bool is_gather_store_insn(DisasContext *ctx); + static inline void ctx_log_pred_write(DisasContext *ctx, int pnum) { if (!test_bit(pnum, ctx->pregs_written)) { @@ -86,7 +91,14 @@ static inline void ctx_log_pred_write(DisasContext *ctx, int pnum) static inline void ctx_log_pred_read(DisasContext *ctx, int pnum) { - set_bit(pnum, ctx->pregs_read); + if (test_bit(pnum, ctx->pregs_written)) { + ctx->read_after_write = true; + } +} + +static inline void ctx_log_pred_read_new(DisasContext *ctx, int pnum) +{ + g_assert(test_bit(pnum, ctx->pregs_written)); } static inline void ctx_log_reg_write(DisasContext *ctx, int rnum, @@ -117,7 +129,14 @@ static inline void ctx_log_reg_write_pair(DisasContext *ctx, int rnum, static inline void ctx_log_reg_read(DisasContext *ctx, int rnum) { - set_bit(rnum, ctx->regs_read); + if (test_bit(rnum, ctx->regs_written)) { + ctx->read_after_write = true; + } +} + +static inline void ctx_log_reg_read_new(DisasContext *ctx, int rnum) +{ + g_assert(test_bit(rnum, ctx->regs_written)); } static inline void ctx_log_reg_read_pair(DisasContext *ctx, int rnum) @@ -131,10 +150,25 @@ intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum, intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum, int num, bool alloc_ok); +static inline void ctx_start_hvx_insn(DisasContext *ctx) +{ + bitmap_zero(ctx->insn_vregs_written, NUM_VREGS); + bitmap_zero(ctx->insn_vregs_read, NUM_VREGS); + bitmap_zero(ctx->insn_qregs_written, NUM_QREGS); + bitmap_zero(ctx->insn_qregs_read, NUM_QREGS); +} + static inline void ctx_log_vreg_write(DisasContext *ctx, int rnum, VRegWriteType type, - bool is_predicated) + bool is_predicated, bool has_helper) { + if (has_helper) { + set_bit(rnum, ctx->insn_vregs_written); + if (test_bit(rnum, ctx->insn_vregs_read)) { + ctx->has_hvx_overlap = true; + } + } + set_bit(rnum, ctx->vregs_written); if (type != EXT_TMP) { if (!test_bit(rnum, ctx->vregs_updated)) { ctx->vreg_log[ctx->vreg_log_idx] = rnum; @@ -160,33 +194,77 @@ static inline void ctx_log_vreg_write(DisasContext *ctx, static inline void ctx_log_vreg_write_pair(DisasContext *ctx, int rnum, VRegWriteType type, - bool is_predicated) + bool is_predicated, bool has_helper) { - ctx_log_vreg_write(ctx, rnum ^ 0, type, is_predicated); - ctx_log_vreg_write(ctx, rnum ^ 1, type, is_predicated); + ctx_log_vreg_write(ctx, rnum ^ 0, type, is_predicated, has_helper); + ctx_log_vreg_write(ctx, rnum ^ 1, type, is_predicated, has_helper); } -static inline void ctx_log_vreg_read(DisasContext *ctx, int rnum) +static inline void ctx_log_vreg_read(DisasContext *ctx, int rnum, + bool has_helper) { - set_bit(rnum, ctx->vregs_read); + if (has_helper) { + set_bit(rnum, ctx->insn_vregs_read); + if (test_bit(rnum, ctx->insn_vregs_written)) { + ctx->has_hvx_overlap = true; + } + } + if (test_bit(rnum, ctx->vregs_written)) { + ctx->read_after_write = true; + } } -static inline void ctx_log_vreg_read_pair(DisasContext *ctx, int rnum) +static inline void ctx_log_vreg_read_new(DisasContext *ctx, int rnum, + bool has_helper) { - ctx_log_vreg_read(ctx, rnum ^ 0); - ctx_log_vreg_read(ctx, rnum ^ 1); + g_assert(is_gather_store_insn(ctx) || + test_bit(rnum, ctx->vregs_updated) || + test_bit(rnum, ctx->vregs_select) || + test_bit(rnum, ctx->vregs_updated_tmp)); + if (has_helper) { + set_bit(rnum, ctx->insn_vregs_read); + if (test_bit(rnum, ctx->insn_vregs_written)) { + ctx->has_hvx_overlap = true; + } + } + if (is_gather_store_insn(ctx)) { + ctx->read_after_write = true; + } +} + +static inline void ctx_log_vreg_read_pair(DisasContext *ctx, int rnum, + bool has_helper) +{ + ctx_log_vreg_read(ctx, rnum ^ 0, has_helper); + ctx_log_vreg_read(ctx, rnum ^ 1, has_helper); } static inline void ctx_log_qreg_write(DisasContext *ctx, - int rnum) + int rnum, bool has_helper) { + if (has_helper) { + set_bit(rnum, ctx->insn_qregs_written); + if (test_bit(rnum, ctx->insn_qregs_read)) { + ctx->has_hvx_overlap = true; + } + } + set_bit(rnum, ctx->qregs_written); ctx->qreg_log[ctx->qreg_log_idx] = rnum; ctx->qreg_log_idx++; } -static inline void ctx_log_qreg_read(DisasContext *ctx, int qnum) +static inline void ctx_log_qreg_read(DisasContext *ctx, + int qnum, bool has_helper) { - set_bit(qnum, ctx->qregs_read); + if (has_helper) { + set_bit(qnum, ctx->insn_qregs_read); + if (test_bit(qnum, ctx->insn_qregs_written)) { + ctx->has_hvx_overlap = true; + } + } + if (test_bit(qnum, ctx->qregs_written)) { + ctx->read_after_write = true; + } } extern TCGv hex_gpr[TOTAL_PER_THREAD_REGS]; @@ -205,7 +283,6 @@ extern TCGv hex_vstore_addr[VSTORES_MAX]; extern TCGv hex_vstore_size[VSTORES_MAX]; extern TCGv hex_vstore_pending[VSTORES_MAX]; -bool is_gather_store_insn(DisasContext *ctx); void process_store(DisasContext *ctx, int slot_num); FIELD(PROBE_PKT_SCALAR_STORE_S0, MMU_IDX, 0, 2) diff --git a/target/hppa/cpu-param.h b/target/hppa/cpu-param.h index bb3d7ef6f7..473d489f01 100644 --- a/target/hppa/cpu-param.h +++ b/target/hppa/cpu-param.h @@ -21,4 +21,12 @@ #define TARGET_PAGE_BITS 12 +/* PA-RISC 1.x processors have a strong memory model. */ +/* + * ??? While we do not yet implement PA-RISC 2.0, those processors have + * a weak memory model, but with TLB bits that force ordering on a per-page + * basis. It's probably easier to fall back to a strong memory model. + */ +#define TCG_GUEST_DEFAULT_MO TCG_MO_ALL + #endif diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c index 3831cb6db2..f0507874ce 100644 --- a/target/hppa/cpu.c +++ b/target/hppa/cpu.c @@ -32,61 +32,96 @@ static void hppa_cpu_set_pc(CPUState *cs, vaddr value) { HPPACPU *cpu = HPPA_CPU(cs); +#ifdef CONFIG_USER_ONLY + value |= PRIV_USER; +#endif cpu->env.iaoq_f = value; cpu->env.iaoq_b = value + 4; } static vaddr hppa_cpu_get_pc(CPUState *cs) { - HPPACPU *cpu = HPPA_CPU(cs); + CPUHPPAState *env = cpu_env(cs); - return cpu->env.iaoq_f; + return hppa_form_gva_psw(env->psw, (env->psw & PSW_C ? env->iasq_f : 0), + env->iaoq_f & -4); } -static void hppa_cpu_synchronize_from_tb(CPUState *cs, - const TranslationBlock *tb) +void cpu_get_tb_cpu_state(CPUHPPAState *env, vaddr *pc, + uint64_t *pcsbase, uint32_t *pflags) { - HPPACPU *cpu = HPPA_CPU(cs); + uint32_t flags = 0; + uint64_t cs_base = 0; + + /* + * TB lookup assumes that PC contains the complete virtual address. + * If we leave space+offset separate, we'll get ITLB misses to an + * incomplete virtual address. This also means that we must separate + * out current cpu privilege from the low bits of IAOQ_F. + */ + *pc = hppa_cpu_get_pc(env_cpu(env)); + flags |= (env->iaoq_f & 3) << TB_FLAG_PRIV_SHIFT; + + /* + * The only really interesting case is if IAQ_Back is on the same page + * as IAQ_Front, so that we can use goto_tb between the blocks. In all + * other cases, we'll be ending the TranslationBlock with one insn and + * not linking between them. + */ + if (env->iasq_f != env->iasq_b) { + cs_base |= CS_BASE_DIFFSPACE; + } else if ((env->iaoq_f ^ env->iaoq_b) & TARGET_PAGE_MASK) { + cs_base |= CS_BASE_DIFFPAGE; + } else { + cs_base |= env->iaoq_b & ~TARGET_PAGE_MASK; + } - tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL)); + /* ??? E, T, H, L bits need to be here, when implemented. */ + flags |= env->psw_n * PSW_N; + flags |= env->psw_xb; + flags |= env->psw & (PSW_W | PSW_C | PSW_D | PSW_P); #ifdef CONFIG_USER_ONLY - cpu->env.iaoq_f = tb->pc; - cpu->env.iaoq_b = tb->cs_base; + flags |= TB_FLAG_UNALIGN * !env_cpu(env)->prctl_unalign_sigbus; #else - /* Recover the IAOQ values from the GVA + PRIV. */ - uint32_t priv = (tb->flags >> TB_FLAG_PRIV_SHIFT) & 3; - target_ulong cs_base = tb->cs_base; - target_ulong iasq_f = cs_base & ~0xffffffffull; - int32_t diff = cs_base; - - cpu->env.iasq_f = iasq_f; - cpu->env.iaoq_f = (tb->pc & ~iasq_f) + priv; - if (diff) { - cpu->env.iaoq_b = cpu->env.iaoq_f + diff; + if ((env->sr[4] == env->sr[5]) + & (env->sr[4] == env->sr[6]) + & (env->sr[4] == env->sr[7])) { + flags |= TB_FLAG_SR_SAME; } #endif + *pcsbase = cs_base; + *pflags = flags; +} + +static void hppa_cpu_synchronize_from_tb(CPUState *cs, + const TranslationBlock *tb) +{ + HPPACPU *cpu = HPPA_CPU(cs); + + /* IAQ is always up-to-date before goto_tb. */ cpu->env.psw_n = (tb->flags & PSW_N) != 0; + cpu->env.psw_xb = tb->flags & (PSW_X | PSW_B); } static void hppa_restore_state_to_opc(CPUState *cs, const TranslationBlock *tb, const uint64_t *data) { - HPPACPU *cpu = HPPA_CPU(cs); + CPUHPPAState *env = cpu_env(cs); - cpu->env.iaoq_f = data[0]; - if (data[1] != (target_ulong)-1) { - cpu->env.iaoq_b = data[1]; + env->iaoq_f = (env->iaoq_f & TARGET_PAGE_MASK) | data[0]; + if (data[1] != INT32_MIN) { + env->iaoq_b = env->iaoq_f + data[1]; } - cpu->env.unwind_breg = data[2]; + env->unwind_breg = data[2]; /* * Since we were executing the instruction at IAOQ_F, and took some * sort of action that provoked the cpu_restore_state, we can infer * that the instruction was not nullified. */ - cpu->env.psw_n = 0; + env->psw_n = 0; } static bool hppa_cpu_has_work(CPUState *cs) @@ -152,6 +187,9 @@ static void hppa_cpu_realizefn(DeviceState *dev, Error **errp) hppa_ptlbe(&cpu->env); } #endif + + /* Use pc-relative instructions always to simplify the translator. */ + tcg_cflags_set(cs, CF_PCREL); } static void hppa_cpu_initfn(Object *obj) diff --git a/target/hppa/cpu.h b/target/hppa/cpu.h index a072d0bb63..2bcb3b602b 100644 --- a/target/hppa/cpu.h +++ b/target/hppa/cpu.h @@ -24,12 +24,7 @@ #include "exec/cpu-defs.h" #include "qemu/cpu-float.h" #include "qemu/interval-tree.h" - -/* PA-RISC 1.x processors have a strong memory model. */ -/* ??? While we do not yet implement PA-RISC 2.0, those processors have - a weak memory model, but with TLB bits that force ordering on a per-page - basis. It's probably easier to fall back to a strong memory model. */ -#define TCG_GUEST_DEFAULT_MO TCG_MO_ALL +#include "hw/registerfields.h" #define MMU_ABS_W_IDX 6 #define MMU_ABS_IDX 7 @@ -47,6 +42,9 @@ #define MMU_IDX_TO_P(MIDX) (((MIDX) - MMU_KERNEL_IDX) & 1) #define PRIV_P_TO_MMU_IDX(PRIV, P) ((PRIV) * 2 + !!(P) + MMU_KERNEL_IDX) +#define PRIV_KERNEL 0 +#define PRIV_USER 3 + #define TARGET_INSN_START_EXTRA_WORDS 2 /* No need to flush MMU_ABS*_IDX */ @@ -158,6 +156,30 @@ #define CR_IPSW 22 #define CR_EIRR 23 +FIELD(FPSR, ENA_I, 0, 1) +FIELD(FPSR, ENA_U, 1, 1) +FIELD(FPSR, ENA_O, 2, 1) +FIELD(FPSR, ENA_Z, 3, 1) +FIELD(FPSR, ENA_V, 4, 1) +FIELD(FPSR, ENABLES, 0, 5) +FIELD(FPSR, D, 5, 1) +FIELD(FPSR, T, 6, 1) +FIELD(FPSR, RM, 9, 2) +FIELD(FPSR, CQ, 11, 11) +FIELD(FPSR, CQ0_6, 15, 7) +FIELD(FPSR, CQ0_4, 17, 5) +FIELD(FPSR, CQ0_2, 19, 3) +FIELD(FPSR, CQ0, 21, 1) +FIELD(FPSR, CA, 15, 7) +FIELD(FPSR, CA0, 21, 1) +FIELD(FPSR, C, 26, 1) +FIELD(FPSR, FLG_I, 27, 1) +FIELD(FPSR, FLG_U, 28, 1) +FIELD(FPSR, FLG_O, 29, 1) +FIELD(FPSR, FLG_Z, 30, 1) +FIELD(FPSR, FLG_V, 31, 1) +FIELD(FPSR, FLAGS, 27, 5) + typedef struct HPPATLBEntry { union { IntervalTreeNode itree; @@ -186,7 +208,8 @@ typedef struct CPUArchState { uint64_t fr[32]; uint64_t sr[8]; /* stored shifted into place for gva */ - target_ulong psw; /* All psw bits except the following: */ + uint32_t psw; /* All psw bits except the following: */ + uint32_t psw_xb; /* X and B, in their normal positions */ target_ulong psw_n; /* boolean */ target_long psw_v; /* in most significant bit */ @@ -319,48 +342,11 @@ hwaddr hppa_abs_to_phys_pa2_w1(vaddr addr); #define TB_FLAG_SR_SAME PSW_I #define TB_FLAG_PRIV_SHIFT 8 #define TB_FLAG_UNALIGN 0x400 +#define CS_BASE_DIFFPAGE (1 << 12) +#define CS_BASE_DIFFSPACE (1 << 13) -static inline void cpu_get_tb_cpu_state(CPUHPPAState *env, vaddr *pc, - uint64_t *cs_base, uint32_t *pflags) -{ - uint32_t flags = env->psw_n * PSW_N; - - /* TB lookup assumes that PC contains the complete virtual address. - If we leave space+offset separate, we'll get ITLB misses to an - incomplete virtual address. This also means that we must separate - out current cpu privilege from the low bits of IAOQ_F. */ -#ifdef CONFIG_USER_ONLY - *pc = env->iaoq_f & -4; - *cs_base = env->iaoq_b & -4; - flags |= TB_FLAG_UNALIGN * !env_cpu(env)->prctl_unalign_sigbus; -#else - /* ??? E, T, H, L, B bits need to be here, when implemented. */ - flags |= env->psw & (PSW_W | PSW_C | PSW_D | PSW_P); - flags |= (env->iaoq_f & 3) << TB_FLAG_PRIV_SHIFT; - - *pc = hppa_form_gva_psw(env->psw, (env->psw & PSW_C ? env->iasq_f : 0), - env->iaoq_f & -4); - *cs_base = env->iasq_f; - - /* Insert a difference between IAOQ_B and IAOQ_F within the otherwise zero - low 32-bits of CS_BASE. This will succeed for all direct branches, - which is the primary case we care about -- using goto_tb within a page. - Failure is indicated by a zero difference. */ - if (env->iasq_f == env->iasq_b) { - target_long diff = env->iaoq_b - env->iaoq_f; - if (diff == (int32_t)diff) { - *cs_base |= (uint32_t)diff; - } - } - if ((env->sr[4] == env->sr[5]) - & (env->sr[4] == env->sr[6]) - & (env->sr[4] == env->sr[7])) { - flags |= TB_FLAG_SR_SAME; - } -#endif - - *pflags = flags; -} +void cpu_get_tb_cpu_state(CPUHPPAState *env, vaddr *pc, + uint64_t *cs_base, uint32_t *pflags); target_ulong cpu_hppa_get_psw(CPUHPPAState *env); void cpu_hppa_put_psw(CPUHPPAState *env, target_ulong); @@ -385,8 +371,7 @@ bool hppa_cpu_tlb_fill(CPUState *cs, vaddr address, int size, void hppa_cpu_do_interrupt(CPUState *cpu); bool hppa_cpu_exec_interrupt(CPUState *cpu, int int_req); int hppa_get_physical_address(CPUHPPAState *env, vaddr addr, int mmu_idx, - int type, hwaddr *pphys, int *pprot, - HPPATLBEntry **tlb_entry); + int type, hwaddr *pphys, int *pprot); void hppa_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr, vaddr addr, unsigned size, MMUAccessType access_type, @@ -395,7 +380,6 @@ void hppa_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr, extern const MemoryRegionOps hppa_io_eir_ops; extern const VMStateDescription vmstate_hppa_cpu; void hppa_cpu_alarm_timer(void *); -int hppa_artype_for_page(CPUHPPAState *env, target_ulong vaddr); #endif G_NORETURN void hppa_dynamic_excp(CPUHPPAState *env, int excp, uintptr_t ra); diff --git a/target/hppa/fpu_helper.c b/target/hppa/fpu_helper.c index 576f283b04..deaed2b65d 100644 --- a/target/hppa/fpu_helper.c +++ b/target/hppa/fpu_helper.c @@ -30,7 +30,7 @@ void HELPER(loaded_fr0)(CPUHPPAState *env) env->fr0_shadow = shadow; - switch (extract32(shadow, 9, 2)) { + switch (FIELD_EX32(shadow, FPSR, RM)) { default: rm = float_round_nearest_even; break; @@ -46,7 +46,7 @@ void HELPER(loaded_fr0)(CPUHPPAState *env) } set_float_rounding_mode(rm, &env->fp_status); - d = extract32(shadow, 5, 1); + d = FIELD_EX32(shadow, FPSR, D); set_flush_to_zero(d, &env->fp_status); set_flush_inputs_to_zero(d, &env->fp_status); } @@ -57,7 +57,7 @@ void cpu_hppa_loaded_fr0(CPUHPPAState *env) } #define CONVERT_BIT(X, SRC, DST) \ - ((SRC) > (DST) \ + ((unsigned)(SRC) > (unsigned)(DST) \ ? (X) / ((SRC) / (DST)) & (DST) \ : ((X) & (SRC)) * ((DST) / (SRC))) @@ -73,12 +73,12 @@ static void update_fr0_op(CPUHPPAState *env, uintptr_t ra) } set_float_exception_flags(0, &env->fp_status); - hard_exp |= CONVERT_BIT(soft_exp, float_flag_inexact, 1u << 0); - hard_exp |= CONVERT_BIT(soft_exp, float_flag_underflow, 1u << 1); - hard_exp |= CONVERT_BIT(soft_exp, float_flag_overflow, 1u << 2); - hard_exp |= CONVERT_BIT(soft_exp, float_flag_divbyzero, 1u << 3); - hard_exp |= CONVERT_BIT(soft_exp, float_flag_invalid, 1u << 4); - shadow |= hard_exp << (32 - 5); + hard_exp |= CONVERT_BIT(soft_exp, float_flag_inexact, R_FPSR_ENA_I_MASK); + hard_exp |= CONVERT_BIT(soft_exp, float_flag_underflow, R_FPSR_ENA_U_MASK); + hard_exp |= CONVERT_BIT(soft_exp, float_flag_overflow, R_FPSR_ENA_O_MASK); + hard_exp |= CONVERT_BIT(soft_exp, float_flag_divbyzero, R_FPSR_ENA_Z_MASK); + hard_exp |= CONVERT_BIT(soft_exp, float_flag_invalid, R_FPSR_ENA_V_MASK); + shadow |= hard_exp << (R_FPSR_FLAGS_SHIFT - R_FPSR_ENABLES_SHIFT); env->fr0_shadow = shadow; env->fr[0] = (uint64_t)shadow << 32; @@ -378,15 +378,15 @@ static void update_fr0_cmp(CPUHPPAState *env, uint32_t y, if (y) { /* targeted comparison */ /* set fpsr[ca[y - 1]] to current compare */ - shadow = deposit32(shadow, 21 - (y - 1), 1, c); + shadow = deposit32(shadow, R_FPSR_CA0_SHIFT - (y - 1), 1, c); } else { /* queued comparison */ /* shift cq right by one place */ - shadow = deposit32(shadow, 11, 10, extract32(shadow, 12, 10)); + shadow = (shadow & ~R_FPSR_CQ_MASK) | ((shadow >> 1) & R_FPSR_CQ_MASK); /* move fpsr[c] to fpsr[cq[0]] */ - shadow = deposit32(shadow, 21, 1, extract32(shadow, 26, 1)); + shadow = FIELD_DP32(shadow, FPSR, CQ0, FIELD_EX32(shadow, FPSR, C)); /* set fpsr[c] to current compare */ - shadow = deposit32(shadow, 26, 1, c); + shadow = FIELD_DP32(shadow, FPSR, C, c); } env->fr0_shadow = shadow; diff --git a/target/hppa/gdbstub.c b/target/hppa/gdbstub.c index 4a965b38d7..0daa52f7af 100644 --- a/target/hppa/gdbstub.c +++ b/target/hppa/gdbstub.c @@ -163,12 +163,18 @@ int hppa_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) env->cr[CR_SAR] = val & (hppa_is_pa20(env) ? 63 : 31); break; case 33: +#ifdef CONFIG_USER_ONLY + val |= PRIV_USER; +#endif env->iaoq_f = val; break; case 34: env->iasq_f = (uint64_t)val << 32; break; case 35: +#ifdef CONFIG_USER_ONLY + val |= PRIV_USER; +#endif env->iaoq_b = val; break; case 36: diff --git a/target/hppa/helper.c b/target/hppa/helper.c index 9d217d051c..b79ddd8184 100644 --- a/target/hppa/helper.c +++ b/target/hppa/helper.c @@ -54,7 +54,7 @@ target_ulong cpu_hppa_get_psw(CPUHPPAState *env) psw |= env->psw_n * PSW_N; psw |= (env->psw_v < 0) * PSW_V; - psw |= env->psw; + psw |= env->psw | env->psw_xb; return psw; } @@ -76,8 +76,8 @@ void cpu_hppa_put_psw(CPUHPPAState *env, target_ulong psw) } psw &= ~reserved; - env->psw = psw & (uint32_t)~(PSW_N | PSW_V | PSW_CB); - + env->psw = psw & (uint32_t)~(PSW_B | PSW_N | PSW_V | PSW_X | PSW_CB); + env->psw_xb = psw & (PSW_X | PSW_B); env->psw_n = (psw / PSW_N) & 1; env->psw_v = -((psw / PSW_V) & 1); @@ -102,6 +102,19 @@ void cpu_hppa_put_psw(CPUHPPAState *env, target_ulong psw) void hppa_cpu_dump_state(CPUState *cs, FILE *f, int flags) { +#ifndef CONFIG_USER_ONLY + static const char cr_name[32][5] = { + "RC", "CR1", "CR2", "CR3", + "CR4", "CR5", "CR6", "CR7", + "PID1", "PID2", "CCR", "SAR", + "PID3", "PID4", "IVA", "EIEM", + "ITMR", "ISQF", "IOQF", "IIR", + "ISR", "IOR", "IPSW", "EIRR", + "TR0", "TR1", "TR2", "TR3", + "TR4", "TR5", "TR6", "TR7", + }; +#endif + CPUHPPAState *env = cpu_env(cs); target_ulong psw = cpu_hppa_get_psw(env); target_ulong psw_cb; @@ -117,11 +130,12 @@ void hppa_cpu_dump_state(CPUState *cs, FILE *f, int flags) m = UINT32_MAX; } - qemu_fprintf(f, "IA_F " TARGET_FMT_lx " IA_B " TARGET_FMT_lx - " IIR %0*" PRIx64 "\n", + qemu_fprintf(f, "IA_F %08" PRIx64 ":%0*" PRIx64 " (" TARGET_FMT_lx ")\n" + "IA_B %08" PRIx64 ":%0*" PRIx64 " (" TARGET_FMT_lx ")\n", + env->iasq_f >> 32, w, m & env->iaoq_f, hppa_form_gva_psw(psw, env->iasq_f, env->iaoq_f), - hppa_form_gva_psw(psw, env->iasq_b, env->iaoq_b), - w, m & env->cr[CR_IIR]); + env->iasq_b >> 32, w, m & env->iaoq_b, + hppa_form_gva_psw(psw, env->iasq_b, env->iaoq_b)); psw_c[0] = (psw & PSW_W ? 'W' : '-'); psw_c[1] = (psw & PSW_E ? 'E' : '-'); @@ -154,12 +168,46 @@ void hppa_cpu_dump_state(CPUState *cs, FILE *f, int flags) (i & 3) == 3 ? '\n' : ' '); } #ifndef CONFIG_USER_ONLY + for (i = 0; i < 32; i++) { + qemu_fprintf(f, "%-4s %0*" PRIx64 "%c", + cr_name[i], w, m & env->cr[i], + (i & 3) == 3 ? '\n' : ' '); + } + qemu_fprintf(f, "ISQB %0*" PRIx64 " IOQB %0*" PRIx64 "\n", + w, m & env->cr_back[0], w, m & env->cr_back[1]); for (i = 0; i < 8; i++) { qemu_fprintf(f, "SR%02d %08x%c", i, (uint32_t)(env->sr[i] >> 32), (i & 3) == 3 ? '\n' : ' '); } #endif - qemu_fprintf(f, "\n"); - /* ??? FR */ + if (flags & CPU_DUMP_FPU) { + static const char rm[4][4] = { "RN", "RZ", "R+", "R-" }; + char flg[6], ena[6]; + uint32_t fpsr = env->fr0_shadow; + + flg[0] = (fpsr & R_FPSR_FLG_V_MASK ? 'V' : '-'); + flg[1] = (fpsr & R_FPSR_FLG_Z_MASK ? 'Z' : '-'); + flg[2] = (fpsr & R_FPSR_FLG_O_MASK ? 'O' : '-'); + flg[3] = (fpsr & R_FPSR_FLG_U_MASK ? 'U' : '-'); + flg[4] = (fpsr & R_FPSR_FLG_I_MASK ? 'I' : '-'); + flg[5] = '\0'; + + ena[0] = (fpsr & R_FPSR_ENA_V_MASK ? 'V' : '-'); + ena[1] = (fpsr & R_FPSR_ENA_Z_MASK ? 'Z' : '-'); + ena[2] = (fpsr & R_FPSR_ENA_O_MASK ? 'O' : '-'); + ena[3] = (fpsr & R_FPSR_ENA_U_MASK ? 'U' : '-'); + ena[4] = (fpsr & R_FPSR_ENA_I_MASK ? 'I' : '-'); + ena[5] = '\0'; + + qemu_fprintf(f, "FPSR %08x flag %s enable %s %s\n", + fpsr, flg, ena, rm[FIELD_EX32(fpsr, FPSR, RM)]); + + for (i = 0; i < 32; i++) { + qemu_fprintf(f, "FR%02d %016" PRIx64 "%c", + i, env->fr[i], (i & 3) == 3 ? '\n' : ' '); + } + } + + qemu_fprintf(f, "\n"); } diff --git a/target/hppa/helper.h b/target/hppa/helper.h index 5900fd70bc..de411923d9 100644 --- a/target/hppa/helper.h +++ b/target/hppa/helper.h @@ -1,6 +1,4 @@ DEF_HELPER_2(excp, noreturn, env, int) -DEF_HELPER_FLAGS_2(tsv, TCG_CALL_NO_WG, void, env, tl) -DEF_HELPER_FLAGS_2(tcond, TCG_CALL_NO_WG, void, env, tl) DEF_HELPER_FLAGS_3(stby_b, TCG_CALL_NO_WG, void, env, tl, tl) DEF_HELPER_FLAGS_3(stby_b_parallel, TCG_CALL_NO_WG, void, env, tl, tl) @@ -88,6 +86,7 @@ DEF_HELPER_1(halt, noreturn, env) DEF_HELPER_1(reset, noreturn, env) DEF_HELPER_1(rfi, void, env) DEF_HELPER_1(rfi_r, void, env) +DEF_HELPER_FLAGS_2(b_gate_priv, TCG_CALL_NO_WG, i64, env, i64) DEF_HELPER_FLAGS_2(write_interval_timer, TCG_CALL_NO_RWG, void, env, tl) DEF_HELPER_FLAGS_2(write_eirr, TCG_CALL_NO_RWG, void, env, tl) DEF_HELPER_FLAGS_2(swap_system_mask, TCG_CALL_NO_RWG, tl, env, tl) diff --git a/target/hppa/int_helper.c b/target/hppa/int_helper.c index a667ee380d..391f32f27d 100644 --- a/target/hppa/int_helper.c +++ b/target/hppa/int_helper.c @@ -134,13 +134,13 @@ void hppa_cpu_do_interrupt(CPUState *cs) switch (i) { case EXCP_ILL: case EXCP_BREAK: + case EXCP_OVERFLOW: + case EXCP_COND: case EXCP_PRIV_REG: case EXCP_PRIV_OPR: /* IIR set via translate.c. */ break; - case EXCP_OVERFLOW: - case EXCP_COND: case EXCP_ASSIST: case EXCP_DTLB_MISS: case EXCP_NA_ITLB_MISS: @@ -167,7 +167,7 @@ void hppa_cpu_do_interrupt(CPUState *cs) vaddr = hppa_form_gva_psw(old_psw, env->iasq_f, vaddr); t = hppa_get_physical_address(env, vaddr, MMU_KERNEL_IDX, - 0, &paddr, &prot, NULL); + 0, &paddr, &prot); if (t >= 0) { /* We can't re-load the instruction. */ env->cr[CR_IIR] = 0; @@ -241,21 +241,22 @@ void hppa_cpu_do_interrupt(CPUState *cs) [EXCP_SYSCALL_LWS] = "syscall-lws", [EXCP_TOC] = "TOC (transfer of control)", }; - static int count; - const char *name = NULL; - char unknown[16]; - if (i >= 0 && i < ARRAY_SIZE(names)) { - name = names[i]; - } - if (!name) { - snprintf(unknown, sizeof(unknown), "unknown %d", i); - name = unknown; + FILE *logfile = qemu_log_trylock(); + if (logfile) { + const char *name = NULL; + + if (i >= 0 && i < ARRAY_SIZE(names)) { + name = names[i]; + } + if (name) { + fprintf(logfile, "INT: cpu %d %s\n", cs->cpu_index, name); + } else { + fprintf(logfile, "INT: cpu %d unknown %d\n", cs->cpu_index, i); + } + hppa_cpu_dump_state(cs, logfile, 0); + qemu_log_unlock(logfile); } - qemu_log("INT %6d: %s @ " TARGET_FMT_lx ":" TARGET_FMT_lx - " for " TARGET_FMT_lx ":" TARGET_FMT_lx "\n", - ++count, name, env->cr[CR_IIASQ], env->cr[CR_IIAOQ], - env->cr[CR_ISR], env->cr[CR_IOR]); } cs->exception_index = -1; } diff --git a/target/hppa/mem_helper.c b/target/hppa/mem_helper.c index 84785b5a5c..b984f730aa 100644 --- a/target/hppa/mem_helper.c +++ b/target/hppa/mem_helper.c @@ -21,6 +21,7 @@ #include "qemu/log.h" #include "cpu.h" #include "exec/exec-all.h" +#include "exec/page-protection.h" #include "exec/helper-proto.h" #include "hw/core/cpu.h" #include "trace.h" @@ -196,18 +197,13 @@ static int match_prot_id64(CPUHPPAState *env, uint32_t access_id) } int hppa_get_physical_address(CPUHPPAState *env, vaddr addr, int mmu_idx, - int type, hwaddr *pphys, int *pprot, - HPPATLBEntry **tlb_entry) + int type, hwaddr *pphys, int *pprot) { hwaddr phys; int prot, r_prot, w_prot, x_prot, priv; HPPATLBEntry *ent; int ret = -1; - if (tlb_entry) { - *tlb_entry = NULL; - } - /* Virtual translation disabled. Map absolute to physical. */ if (MMU_IDX_MMU_DISABLED(mmu_idx)) { switch (mmu_idx) { @@ -237,10 +233,6 @@ int hppa_get_physical_address(CPUHPPAState *env, vaddr addr, int mmu_idx, goto egress; } - if (tlb_entry) { - *tlb_entry = ent; - } - /* We now know the physical address. */ phys = ent->pa + (addr - ent->itree.start); @@ -295,30 +287,38 @@ int hppa_get_physical_address(CPUHPPAState *env, vaddr addr, int mmu_idx, goto egress; } - /* In reverse priority order, check for conditions which raise faults. - As we go, remove PROT bits that cover the condition we want to check. - In this way, the resulting PROT will force a re-check of the - architectural TLB entry for the next access. */ - if (unlikely(!ent->d)) { + /* + * In priority order, check for conditions which raise faults. + * Remove PROT bits that cover the condition we want to check, + * so that the resulting PROT will force a re-check of the + * architectural TLB entry for the next access. + */ + if (unlikely(ent->t)) { + prot &= PAGE_EXEC; + if (!(type & PAGE_EXEC)) { + /* The T bit is set -- Page Reference Fault. */ + ret = EXCP_PAGE_REF; + } + } else if (!ent->d) { + prot &= PAGE_READ | PAGE_EXEC; if (type & PAGE_WRITE) { /* The D bit is not set -- TLB Dirty Bit Fault. */ ret = EXCP_TLB_DIRTY; } + } else if (unlikely(ent->b)) { prot &= PAGE_READ | PAGE_EXEC; - } - if (unlikely(ent->b)) { if (type & PAGE_WRITE) { - /* The B bit is set -- Data Memory Break Fault. */ - ret = EXCP_DMB; - } - prot &= PAGE_READ | PAGE_EXEC; - } - if (unlikely(ent->t)) { - if (!(type & PAGE_EXEC)) { - /* The T bit is set -- Page Reference Fault. */ - ret = EXCP_PAGE_REF; + /* + * The B bit is set -- Data Memory Break Fault. + * Except when PSW_X is set, allow this single access to succeed. + * The write bit will be invalidated for subsequent accesses. + */ + if (env->psw_xb & PSW_X) { + prot |= PAGE_WRITE_INV; + } else { + ret = EXCP_DMB; + } } - prot &= PAGE_EXEC; } egress: @@ -341,7 +341,7 @@ hwaddr hppa_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) cpu->env.psw & PSW_W ? MMU_ABS_W_IDX : MMU_ABS_IDX); excp = hppa_get_physical_address(&cpu->env, addr, mmu_idx, 0, - &phys, &prot, NULL); + &phys, &prot); /* Since we're translating for debugging, the only error that is a hard error is no translation at all. Otherwise, while a real cpu @@ -423,7 +423,6 @@ bool hppa_cpu_tlb_fill(CPUState *cs, vaddr addr, int size, { HPPACPU *cpu = HPPA_CPU(cs); CPUHPPAState *env = &cpu->env; - HPPATLBEntry *ent; int prot, excp, a_prot; hwaddr phys; @@ -439,8 +438,7 @@ bool hppa_cpu_tlb_fill(CPUState *cs, vaddr addr, int size, break; } - excp = hppa_get_physical_address(env, addr, mmu_idx, - a_prot, &phys, &prot, &ent); + excp = hppa_get_physical_address(env, addr, mmu_idx, a_prot, &phys, &prot); if (unlikely(excp >= 0)) { if (probe) { return false; @@ -681,7 +679,7 @@ target_ulong HELPER(lpa)(CPUHPPAState *env, target_ulong addr) int prot, excp; excp = hppa_get_physical_address(env, addr, MMU_KERNEL_IDX, 0, - &phys, &prot, NULL); + &phys, &prot); if (excp >= 0) { if (excp == EXCP_DTLB_MISS) { excp = EXCP_NA_DTLB_MISS; @@ -693,13 +691,6 @@ target_ulong HELPER(lpa)(CPUHPPAState *env, target_ulong addr) return phys; } -/* Return the ar_type of the TLB at VADDR, or -1. */ -int hppa_artype_for_page(CPUHPPAState *env, target_ulong vaddr) -{ - HPPATLBEntry *ent = hppa_find_tlb(env, vaddr); - return ent ? ent->ar_type : -1; -} - /* * diag_btlb() emulates the PDC PDC_BLOCK_TLB firmware call to * allow operating systems to modify the Block TLB (BTLB) entries. @@ -795,3 +786,30 @@ void HELPER(diag_btlb)(CPUHPPAState *env) break; } } + +uint64_t HELPER(b_gate_priv)(CPUHPPAState *env, uint64_t iaoq_f) +{ + uint64_t gva = hppa_form_gva(env, env->iasq_f, iaoq_f); + HPPATLBEntry *ent = hppa_find_tlb(env, gva); + + if (ent == NULL) { + raise_exception_with_ior(env, EXCP_ITLB_MISS, GETPC(), gva, false); + } + + /* + * There should be no need to check page permissions, as that will + * already have been done by tb_lookup via get_page_addr_code. + * All we need at this point is to check the ar_type. + * + * No change for non-gateway pages or for priv decrease. + */ + if (ent->ar_type & 4) { + int old_priv = iaoq_f & 3; + int new_priv = ent->ar_type & 3; + + if (new_priv < old_priv) { + iaoq_f = (iaoq_f & -4) | new_priv; + } + } + return iaoq_f; +} diff --git a/target/hppa/op_helper.c b/target/hppa/op_helper.c index 6cf49f33b7..7f79196fff 100644 --- a/target/hppa/op_helper.c +++ b/target/hppa/op_helper.c @@ -42,20 +42,6 @@ G_NORETURN void hppa_dynamic_excp(CPUHPPAState *env, int excp, uintptr_t ra) cpu_loop_exit_restore(cs, ra); } -void HELPER(tsv)(CPUHPPAState *env, target_ulong cond) -{ - if (unlikely((target_long)cond < 0)) { - hppa_dynamic_excp(env, EXCP_OVERFLOW, GETPC()); - } -} - -void HELPER(tcond)(CPUHPPAState *env, target_ulong cond) -{ - if (unlikely(cond)) { - hppa_dynamic_excp(env, EXCP_COND, GETPC()); - } -} - static void atomic_store_mask32(CPUHPPAState *env, target_ulong addr, uint32_t val, uint32_t mask, uintptr_t ra) { @@ -348,8 +334,7 @@ target_ulong HELPER(probe)(CPUHPPAState *env, target_ulong addr, } mmu_idx = PRIV_P_TO_MMU_IDX(level, env->psw & PSW_P); - excp = hppa_get_physical_address(env, addr, mmu_idx, 0, &phys, - &prot, NULL); + excp = hppa_get_physical_address(env, addr, mmu_idx, 0, &phys, &prot); if (excp >= 0) { cpu_restore_state(env_cpu(env), GETPC()); hppa_set_ior_and_isr(env, addr, MMU_IDX_MMU_DISABLED(mmu_idx)); diff --git a/target/hppa/sys_helper.c b/target/hppa/sys_helper.c index 22d6c89964..9b43b556fd 100644 --- a/target/hppa/sys_helper.c +++ b/target/hppa/sys_helper.c @@ -18,6 +18,7 @@ */ #include "qemu/osdep.h" +#include "qemu/log.h" #include "cpu.h" #include "exec/exec-all.h" #include "exec/helper-proto.h" @@ -93,6 +94,17 @@ void HELPER(rfi)(CPUHPPAState *env) env->iaoq_b = env->cr_back[1]; env->iasq_f = (env->cr[CR_IIASQ] << 32) & ~(env->iaoq_f & mask); env->iasq_b = (env->cr_back[0] << 32) & ~(env->iaoq_b & mask); + + if (qemu_loglevel_mask(CPU_LOG_INT)) { + FILE *logfile = qemu_log_trylock(); + if (logfile) { + CPUState *cs = env_cpu(env); + + fprintf(logfile, "RFI: cpu %d\n", cs->cpu_index); + hppa_cpu_dump_state(cs, logfile, 0); + qemu_log_unlock(logfile); + } + } } static void getshadowregs(CPUHPPAState *env) diff --git a/target/hppa/translate.c b/target/hppa/translate.c index 42fa480950..51c1762435 100644 --- a/target/hppa/translate.c +++ b/target/hppa/translate.c @@ -19,9 +19,9 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "disas/disas.h" #include "qemu/host-utils.h" #include "exec/exec-all.h" +#include "exec/page-protection.h" #include "tcg/tcg-op.h" #include "tcg/tcg-op-gvec.h" #include "exec/helper-proto.h" @@ -41,25 +41,51 @@ typedef struct DisasCond { TCGv_i64 a0, a1; } DisasCond; +typedef struct DisasIAQE { + /* IASQ; may be null for no change from TB. */ + TCGv_i64 space; + /* IAOQ base; may be null for relative address. */ + TCGv_i64 base; + /* IAOQ addend; if base is null, relative to cpu_iaoq_f. */ + int64_t disp; +} DisasIAQE; + +typedef struct DisasDelayException { + struct DisasDelayException *next; + TCGLabel *lab; + uint32_t insn; + bool set_iir; + int8_t set_n; + uint8_t excp; + /* Saved state at parent insn. */ + DisasIAQE iaq_f, iaq_b; +} DisasDelayException; + typedef struct DisasContext { DisasContextBase base; CPUState *cs; - uint64_t iaoq_f; - uint64_t iaoq_b; - uint64_t iaoq_n; - TCGv_i64 iaoq_n_var; + /* IAQ_Front, IAQ_Back. */ + DisasIAQE iaq_f, iaq_b; + /* IAQ_Next, for jumps, otherwise null for simple advance. */ + DisasIAQE iaq_j, *iaq_n; + + /* IAOQ_Front at entry to TB. */ + uint64_t iaoq_first; DisasCond null_cond; TCGLabel *null_lab; + DisasDelayException *delay_excp_list; TCGv_i64 zero; uint32_t insn; uint32_t tb_flags; int mmu_idx; int privilege; + uint32_t psw_xb; bool psw_n_nonzero; + bool psw_b_next; bool is_pa20; bool insn_start_updated; @@ -238,6 +264,7 @@ static TCGv_i64 cpu_psw_n; static TCGv_i64 cpu_psw_v; static TCGv_i64 cpu_psw_cb; static TCGv_i64 cpu_psw_cb_msb; +static TCGv_i32 cpu_psw_xb; void hppa_translate_init(void) { @@ -290,6 +317,9 @@ void hppa_translate_init(void) *v->var = tcg_global_mem_new(tcg_env, v->ofs, v->name); } + cpu_psw_xb = tcg_global_mem_new_i32(tcg_env, + offsetof(CPUHPPAState, psw_xb), + "psw_xb"); cpu_iasq_f = tcg_global_mem_new_i64(tcg_env, offsetof(CPUHPPAState, iasq_f), "iasq_f"); @@ -332,47 +362,32 @@ static DisasCond cond_make_n(void) }; } -static DisasCond cond_make_tmp(TCGCond c, TCGv_i64 a0, TCGv_i64 a1) +static DisasCond cond_make_tt(TCGCond c, TCGv_i64 a0, TCGv_i64 a1) { assert (c != TCG_COND_NEVER && c != TCG_COND_ALWAYS); return (DisasCond){ .c = c, .a0 = a0, .a1 = a1 }; } -static DisasCond cond_make_0_tmp(TCGCond c, TCGv_i64 a0) +static DisasCond cond_make_ti(TCGCond c, TCGv_i64 a0, uint64_t imm) { - return cond_make_tmp(c, a0, tcg_constant_i64(0)); + return cond_make_tt(c, a0, tcg_constant_i64(imm)); } -static DisasCond cond_make_0(TCGCond c, TCGv_i64 a0) +static DisasCond cond_make_vi(TCGCond c, TCGv_i64 a0, uint64_t imm) { TCGv_i64 tmp = tcg_temp_new_i64(); tcg_gen_mov_i64(tmp, a0); - return cond_make_0_tmp(c, tmp); + return cond_make_ti(c, tmp, imm); } -static DisasCond cond_make(TCGCond c, TCGv_i64 a0, TCGv_i64 a1) +static DisasCond cond_make_vv(TCGCond c, TCGv_i64 a0, TCGv_i64 a1) { TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); tcg_gen_mov_i64(t0, a0); tcg_gen_mov_i64(t1, a1); - return cond_make_tmp(c, t0, t1); -} - -static void cond_free(DisasCond *cond) -{ - switch (cond->c) { - default: - cond->a0 = NULL; - cond->a1 = NULL; - /* fallthru */ - case TCG_COND_ALWAYS: - cond->c = TCG_COND_NEVER; - break; - case TCG_COND_NEVER: - break; - } + return cond_make_tt(c, t0, t1); } static TCGv_i64 load_gpr(DisasContext *ctx, unsigned reg) @@ -499,6 +514,25 @@ static void load_spr(DisasContext *ctx, TCGv_i64 dest, unsigned reg) #endif } +/* + * Write a value to psw_xb, bearing in mind the known value. + * To be used just before exiting the TB, so do not update the known value. + */ +static void store_psw_xb(DisasContext *ctx, uint32_t xb) +{ + tcg_debug_assert(xb == 0 || xb == PSW_B); + if (ctx->psw_xb != xb) { + tcg_gen_movi_i32(cpu_psw_xb, xb); + } +} + +/* Write a value to psw_xb, and update the known value. */ +static void set_psw_xb(DisasContext *ctx, uint32_t xb) +{ + store_psw_xb(ctx, xb); + ctx->psw_xb = xb; +} + /* Skip over the implementation of an insn that has been nullified. Use this when the insn is too complex for a conditional move. */ static void nullify_over(DisasContext *ctx) @@ -524,7 +558,7 @@ static void nullify_over(DisasContext *ctx) tcg_gen_brcond_i64(ctx->null_cond.c, ctx->null_cond.a0, ctx->null_cond.a1, ctx->null_lab); - cond_free(&ctx->null_cond); + ctx->null_cond = cond_make_f(); } } @@ -542,7 +576,7 @@ static void nullify_save(DisasContext *ctx) ctx->null_cond.a0, ctx->null_cond.a1); ctx->psw_n_nonzero = true; } - cond_free(&ctx->null_cond); + ctx->null_cond = cond_make_f(); } /* Set a PSW[N] to X. The intention is that this is used immediately @@ -566,6 +600,8 @@ static bool nullify_end(DisasContext *ctx) /* For NEXT, NORETURN, STALE, we can easily continue (or exit). For UPDATED, we cannot update on the nullified path. */ assert(status != DISAS_IAQ_N_UPDATED); + /* Taken branches are handled manually. */ + assert(!ctx->psw_b_next); if (likely(null_lab == NULL)) { /* The current insn wasn't conditional or handled the condition @@ -594,31 +630,94 @@ static bool nullify_end(DisasContext *ctx) return true; } +static bool iaqe_variable(const DisasIAQE *e) +{ + return e->base || e->space; +} + +static DisasIAQE iaqe_incr(const DisasIAQE *e, int64_t disp) +{ + return (DisasIAQE){ + .space = e->space, + .base = e->base, + .disp = e->disp + disp, + }; +} + +static DisasIAQE iaqe_branchi(DisasContext *ctx, int64_t disp) +{ + return (DisasIAQE){ + .space = ctx->iaq_b.space, + .disp = ctx->iaq_f.disp + 8 + disp, + }; +} + +static DisasIAQE iaqe_next_absv(DisasContext *ctx, TCGv_i64 var) +{ + return (DisasIAQE){ + .space = ctx->iaq_b.space, + .base = var, + }; +} + static void copy_iaoq_entry(DisasContext *ctx, TCGv_i64 dest, - uint64_t ival, TCGv_i64 vval) + const DisasIAQE *src) { - uint64_t mask = gva_offset_mask(ctx->tb_flags); + tcg_gen_addi_i64(dest, src->base ? : cpu_iaoq_f, src->disp); +} - if (ival != -1) { - tcg_gen_movi_i64(dest, ival & mask); - return; +static void install_iaq_entries(DisasContext *ctx, const DisasIAQE *f, + const DisasIAQE *b) +{ + DisasIAQE b_next; + + if (b == NULL) { + b_next = iaqe_incr(f, 4); + b = &b_next; } - tcg_debug_assert(vval != NULL); /* - * We know that the IAOQ is already properly masked. - * This optimization is primarily for "iaoq_f = iaoq_b". + * There is an edge case + * bv r0(rN) + * b,l disp,r0 + * for which F will use cpu_iaoq_b (from the indirect branch), + * and B will use cpu_iaoq_f (from the direct branch). + * In this case we need an extra temporary. */ - if (vval == cpu_iaoq_f || vval == cpu_iaoq_b) { - tcg_gen_mov_i64(dest, vval); + if (f->base != cpu_iaoq_b) { + copy_iaoq_entry(ctx, cpu_iaoq_b, b); + copy_iaoq_entry(ctx, cpu_iaoq_f, f); + } else if (f->base == b->base) { + copy_iaoq_entry(ctx, cpu_iaoq_f, f); + tcg_gen_addi_i64(cpu_iaoq_b, cpu_iaoq_f, b->disp - f->disp); } else { - tcg_gen_andi_i64(dest, vval, mask); + TCGv_i64 tmp = tcg_temp_new_i64(); + copy_iaoq_entry(ctx, tmp, b); + copy_iaoq_entry(ctx, cpu_iaoq_f, f); + tcg_gen_mov_i64(cpu_iaoq_b, tmp); + } + + if (f->space) { + tcg_gen_mov_i64(cpu_iasq_f, f->space); + } + if (b->space || f->space) { + tcg_gen_mov_i64(cpu_iasq_b, b->space ? : f->space); } } -static inline uint64_t iaoq_dest(DisasContext *ctx, int64_t disp) +static void install_link(DisasContext *ctx, unsigned link, bool with_sr0) { - return ctx->iaoq_f + disp + 8; + tcg_debug_assert(ctx->null_cond.c == TCG_COND_NEVER); + if (!link) { + return; + } + DisasIAQE next = iaqe_incr(&ctx->iaq_b, 4); + copy_iaoq_entry(ctx, cpu_gr[link], &next); +#ifndef CONFIG_USER_ONLY + if (with_sr0) { + tcg_gen_mov_i64(cpu_sr[0], cpu_iasq_b); + } +#endif } static void gen_excp_1(int exception) @@ -628,20 +727,44 @@ static void gen_excp_1(int exception) static void gen_excp(DisasContext *ctx, int exception) { - copy_iaoq_entry(ctx, cpu_iaoq_f, ctx->iaoq_f, cpu_iaoq_f); - copy_iaoq_entry(ctx, cpu_iaoq_b, ctx->iaoq_b, cpu_iaoq_b); + install_iaq_entries(ctx, &ctx->iaq_f, &ctx->iaq_b); nullify_save(ctx); gen_excp_1(exception); ctx->base.is_jmp = DISAS_NORETURN; } +static DisasDelayException *delay_excp(DisasContext *ctx, uint8_t excp) +{ + DisasDelayException *e = tcg_malloc(sizeof(DisasDelayException)); + + memset(e, 0, sizeof(*e)); + e->next = ctx->delay_excp_list; + ctx->delay_excp_list = e; + + e->lab = gen_new_label(); + e->insn = ctx->insn; + e->set_iir = true; + e->set_n = ctx->psw_n_nonzero ? 0 : -1; + e->excp = excp; + e->iaq_f = ctx->iaq_f; + e->iaq_b = ctx->iaq_b; + + return e; +} + static bool gen_excp_iir(DisasContext *ctx, int exc) { - nullify_over(ctx); - tcg_gen_st_i64(tcg_constant_i64(ctx->insn), - tcg_env, offsetof(CPUHPPAState, cr[CR_IIR])); - gen_excp(ctx, exc); - return nullify_end(ctx); + if (ctx->null_cond.c == TCG_COND_NEVER) { + tcg_gen_st_i64(tcg_constant_i64(ctx->insn), + tcg_env, offsetof(CPUHPPAState, cr[CR_IIR])); + gen_excp(ctx, exc); + } else { + DisasDelayException *e = delay_excp(ctx, exc); + tcg_gen_brcond_i64(tcg_invert_cond(ctx->null_cond.c), + ctx->null_cond.a0, ctx->null_cond.a1, e->lab); + ctx->null_cond = cond_make_f(); + } + return true; } static bool gen_illegal(DisasContext *ctx) @@ -661,9 +784,12 @@ static bool gen_illegal(DisasContext *ctx) } while (0) #endif -static bool use_goto_tb(DisasContext *ctx, uint64_t dest) +static bool use_goto_tb(DisasContext *ctx, const DisasIAQE *f, + const DisasIAQE *b) { - return translator_use_goto_tb(&ctx->base, dest); + return (!iaqe_variable(f) && + (b == NULL || !iaqe_variable(b)) && + translator_use_goto_tb(&ctx->base, ctx->iaoq_first + f->disp)); } /* If the next insn is to be nullified, and it's on the same page, @@ -672,21 +798,20 @@ static bool use_goto_tb(DisasContext *ctx, uint64_t dest) executing a TB that merely branches to the next TB. */ static bool use_nullify_skip(DisasContext *ctx) { - return (((ctx->iaoq_b ^ ctx->iaoq_f) & TARGET_PAGE_MASK) == 0 - && !cpu_breakpoint_test(ctx->cs, ctx->iaoq_b, BP_ANY)); + return (!(tb_cflags(ctx->base.tb) & CF_BP_PAGE) + && !iaqe_variable(&ctx->iaq_b) + && (((ctx->iaoq_first + ctx->iaq_b.disp) ^ ctx->iaoq_first) + & TARGET_PAGE_MASK) == 0); } static void gen_goto_tb(DisasContext *ctx, int which, - uint64_t f, uint64_t b) + const DisasIAQE *f, const DisasIAQE *b) { - if (f != -1 && b != -1 && use_goto_tb(ctx, f)) { + install_iaq_entries(ctx, f, b); + if (use_goto_tb(ctx, f, b)) { tcg_gen_goto_tb(which); - copy_iaoq_entry(ctx, cpu_iaoq_f, f, NULL); - copy_iaoq_entry(ctx, cpu_iaoq_b, b, NULL); tcg_gen_exit_tb(ctx->base.tb, which); } else { - copy_iaoq_entry(ctx, cpu_iaoq_f, f, cpu_iaoq_b); - copy_iaoq_entry(ctx, cpu_iaoq_b, b, ctx->iaoq_n_var); tcg_gen_lookup_and_goto_ptr(); } } @@ -709,28 +834,36 @@ static bool cond_need_cb(int c) static DisasCond do_cond(DisasContext *ctx, unsigned cf, bool d, TCGv_i64 res, TCGv_i64 uv, TCGv_i64 sv) { + TCGCond sign_cond, zero_cond; + uint64_t sign_imm, zero_imm; DisasCond cond; TCGv_i64 tmp; + if (d) { + /* 64-bit condition. */ + sign_imm = 0; + sign_cond = TCG_COND_LT; + zero_imm = 0; + zero_cond = TCG_COND_EQ; + } else { + /* 32-bit condition. */ + sign_imm = 1ull << 31; + sign_cond = TCG_COND_TSTNE; + zero_imm = UINT32_MAX; + zero_cond = TCG_COND_TSTEQ; + } + switch (cf >> 1) { case 0: /* Never / TR (0 / 1) */ cond = cond_make_f(); break; case 1: /* = / <> (Z / !Z) */ - if (!d) { - tmp = tcg_temp_new_i64(); - tcg_gen_ext32u_i64(tmp, res); - res = tmp; - } - cond = cond_make_0(TCG_COND_EQ, res); + cond = cond_make_vi(zero_cond, res, zero_imm); break; case 2: /* < / >= (N ^ V / !(N ^ V) */ tmp = tcg_temp_new_i64(); tcg_gen_xor_i64(tmp, res, sv); - if (!d) { - tcg_gen_ext32s_i64(tmp, tmp); - } - cond = cond_make_0_tmp(TCG_COND_LT, tmp); + cond = cond_make_ti(sign_cond, tmp, sign_imm); break; case 3: /* <= / > (N ^ V) | Z / !((N ^ V) | Z) */ /* @@ -738,45 +871,29 @@ static DisasCond do_cond(DisasContext *ctx, unsigned cf, bool d, * (N ^ V) | Z * ((res < 0) ^ (sv < 0)) | !res * ((res ^ sv) < 0) | !res - * (~(res ^ sv) >= 0) | !res - * !(~(res ^ sv) >> 31) | !res - * !(~(res ^ sv) >> 31 & res) + * ((res ^ sv) < 0 ? 1 : !res) + * !((res ^ sv) < 0 ? 0 : res) */ tmp = tcg_temp_new_i64(); - tcg_gen_eqv_i64(tmp, res, sv); - if (!d) { - tcg_gen_sextract_i64(tmp, tmp, 31, 1); - tcg_gen_and_i64(tmp, tmp, res); - tcg_gen_ext32u_i64(tmp, tmp); - } else { - tcg_gen_sari_i64(tmp, tmp, 63); - tcg_gen_and_i64(tmp, tmp, res); - } - cond = cond_make_0_tmp(TCG_COND_EQ, tmp); + tcg_gen_xor_i64(tmp, res, sv); + tcg_gen_movcond_i64(sign_cond, tmp, + tmp, tcg_constant_i64(sign_imm), + ctx->zero, res); + cond = cond_make_ti(zero_cond, tmp, zero_imm); break; case 4: /* NUV / UV (!UV / UV) */ - cond = cond_make_0(TCG_COND_EQ, uv); + cond = cond_make_vi(TCG_COND_EQ, uv, 0); break; case 5: /* ZNV / VNZ (!UV | Z / UV & !Z) */ tmp = tcg_temp_new_i64(); tcg_gen_movcond_i64(TCG_COND_EQ, tmp, uv, ctx->zero, ctx->zero, res); - if (!d) { - tcg_gen_ext32u_i64(tmp, tmp); - } - cond = cond_make_0_tmp(TCG_COND_EQ, tmp); + cond = cond_make_ti(zero_cond, tmp, zero_imm); break; case 6: /* SV / NSV (V / !V) */ - if (!d) { - tmp = tcg_temp_new_i64(); - tcg_gen_ext32s_i64(tmp, sv); - sv = tmp; - } - cond = cond_make_0(TCG_COND_LT, sv); + cond = cond_make_vi(sign_cond, sv, sign_imm); break; case 7: /* OD / EV */ - tmp = tcg_temp_new_i64(); - tcg_gen_andi_i64(tmp, res, 1); - cond = cond_make_0_tmp(TCG_COND_NE, tmp); + cond = cond_make_vi(TCG_COND_TSTNE, res, 1); break; default: g_assert_not_reached(); @@ -838,9 +955,9 @@ static DisasCond do_sub_cond(DisasContext *ctx, unsigned cf, bool d, tcg_gen_ext32s_i64(t1, in1); tcg_gen_ext32s_i64(t2, in2); } - return cond_make_tmp(tc, t1, t2); + return cond_make_tt(tc, t1, t2); } - return cond_make(tc, in1, in2); + return cond_make_vv(tc, in1, in2); } /* @@ -856,65 +973,41 @@ static DisasCond do_log_cond(DisasContext *ctx, unsigned cf, bool d, TCGv_i64 res) { TCGCond tc; - bool ext_uns; + uint64_t imm; - switch (cf) { - case 0: /* never */ - case 9: /* undef, C */ - case 11: /* undef, C & !Z */ - case 12: /* undef, V */ - return cond_make_f(); - - case 1: /* true */ - case 8: /* undef, !C */ - case 10: /* undef, !C | Z */ - case 13: /* undef, !V */ - return cond_make_t(); - - case 2: /* == */ - tc = TCG_COND_EQ; - ext_uns = true; - break; - case 3: /* <> */ - tc = TCG_COND_NE; - ext_uns = true; - break; - case 4: /* < */ - tc = TCG_COND_LT; - ext_uns = false; - break; - case 5: /* >= */ - tc = TCG_COND_GE; - ext_uns = false; + switch (cf >> 1) { + case 0: /* never / always */ + case 4: /* undef, C */ + case 5: /* undef, C & !Z */ + case 6: /* undef, V */ + return cf & 1 ? cond_make_t() : cond_make_f(); + case 1: /* == / <> */ + tc = d ? TCG_COND_EQ : TCG_COND_TSTEQ; + imm = d ? 0 : UINT32_MAX; break; - case 6: /* <= */ - tc = TCG_COND_LE; - ext_uns = false; + case 2: /* < / >= */ + tc = d ? TCG_COND_LT : TCG_COND_TSTNE; + imm = d ? 0 : 1ull << 31; break; - case 7: /* > */ - tc = TCG_COND_GT; - ext_uns = false; + case 3: /* <= / > */ + tc = cf & 1 ? TCG_COND_GT : TCG_COND_LE; + if (!d) { + TCGv_i64 tmp = tcg_temp_new_i64(); + tcg_gen_ext32s_i64(tmp, res); + return cond_make_ti(tc, tmp, 0); + } + return cond_make_vi(tc, res, 0); + case 7: /* OD / EV */ + tc = TCG_COND_TSTNE; + imm = 1; break; - - case 14: /* OD */ - case 15: /* EV */ - return do_cond(ctx, cf, d, res, NULL, NULL); - default: g_assert_not_reached(); } - - if (!d) { - TCGv_i64 tmp = tcg_temp_new_i64(); - - if (ext_uns) { - tcg_gen_ext32u_i64(tmp, res); - } else { - tcg_gen_ext32s_i64(tmp, res); - } - return cond_make_0_tmp(tc, tmp); + if (cf & 1) { + tc = tcg_invert_cond(tc); } - return cond_make_0(tc, res); + return cond_make_vi(tc, res, imm); } /* Similar, but for shift/extract/deposit conditions. */ @@ -971,9 +1064,8 @@ static DisasCond do_unit_zero_cond(unsigned cf, bool d, TCGv_i64 res) tmp = tcg_temp_new_i64(); tcg_gen_subi_i64(tmp, res, ones); tcg_gen_andc_i64(tmp, tmp, res); - tcg_gen_andi_i64(tmp, tmp, sgns); - return cond_make_0_tmp(cf & 1 ? TCG_COND_EQ : TCG_COND_NE, tmp); + return cond_make_ti(cf & 1 ? TCG_COND_TSTEQ : TCG_COND_TSTNE, tmp, sgns); } static TCGv_i64 get_carry(DisasContext *ctx, bool d, @@ -1061,6 +1153,36 @@ static TCGv_i64 do_sub_sv(DisasContext *ctx, TCGv_i64 res, return sv; } +static void gen_tc(DisasContext *ctx, DisasCond *cond) +{ + DisasDelayException *e; + + switch (cond->c) { + case TCG_COND_NEVER: + break; + case TCG_COND_ALWAYS: + gen_excp_iir(ctx, EXCP_COND); + break; + default: + e = delay_excp(ctx, EXCP_COND); + tcg_gen_brcond_i64(cond->c, cond->a0, cond->a1, e->lab); + /* In the non-trap path, the condition is known false. */ + *cond = cond_make_f(); + break; + } +} + +static void gen_tsv(DisasContext *ctx, TCGv_i64 *sv, bool d) +{ + DisasCond cond = do_cond(ctx, /* SV */ 12, d, NULL, NULL, *sv); + DisasDelayException *e = delay_excp(ctx, EXCP_OVERFLOW); + + tcg_gen_brcond_i64(cond.c, cond.a0, cond.a1, e->lab); + + /* In the non-trap path, V is known zero. */ + *sv = tcg_constant_i64(0); +} + static void do_add(DisasContext *ctx, unsigned rt, TCGv_i64 orig_in1, TCGv_i64 in2, unsigned shift, bool is_l, bool is_tsv, bool is_tc, bool is_c, unsigned cf, bool d) @@ -1103,10 +1225,7 @@ static void do_add(DisasContext *ctx, unsigned rt, TCGv_i64 orig_in1, if (is_tsv || cond_need_sv(c)) { sv = do_add_sv(ctx, dest, in1, in2, orig_in1, shift, d); if (is_tsv) { - if (!d) { - tcg_gen_ext32s_i64(sv, sv); - } - gen_helper_tsv(tcg_env, sv); + gen_tsv(ctx, &sv, d); } } @@ -1119,9 +1238,7 @@ static void do_add(DisasContext *ctx, unsigned rt, TCGv_i64 orig_in1, /* Emit any conditional trap before any writeback. */ cond = do_cond(ctx, cf, d, dest, uv, sv); if (is_tc) { - tmp = tcg_temp_new_i64(); - tcg_gen_setcond_i64(cond.c, tmp, cond.a0, cond.a1); - gen_helper_tcond(tcg_env, tmp); + gen_tc(ctx, &cond); } /* Write back the result. */ @@ -1132,7 +1249,6 @@ static void do_add(DisasContext *ctx, unsigned rt, TCGv_i64 orig_in1, save_gpr(ctx, rt, dest); /* Install the new nullification. */ - cond_free(&ctx->null_cond); ctx->null_cond = cond; } @@ -1141,6 +1257,10 @@ static bool do_add_reg(DisasContext *ctx, arg_rrr_cf_d_sh *a, { TCGv_i64 tcg_r1, tcg_r2; + if (unlikely(is_tc && a->cf == 1)) { + /* Unconditional trap on condition. */ + return gen_excp_iir(ctx, EXCP_COND); + } if (a->cf) { nullify_over(ctx); } @@ -1156,6 +1276,10 @@ static bool do_add_imm(DisasContext *ctx, arg_rri_cf *a, { TCGv_i64 tcg_im, tcg_r2; + if (unlikely(is_tc && a->cf == 1)) { + /* Unconditional trap on condition. */ + return gen_excp_iir(ctx, EXCP_COND); + } if (a->cf) { nullify_over(ctx); } @@ -1170,7 +1294,7 @@ static void do_sub(DisasContext *ctx, unsigned rt, TCGv_i64 in1, TCGv_i64 in2, bool is_tsv, bool is_b, bool is_tc, unsigned cf, bool d) { - TCGv_i64 dest, sv, cb, cb_msb, tmp; + TCGv_i64 dest, sv, cb, cb_msb; unsigned c = cf >> 1; DisasCond cond; @@ -1202,10 +1326,7 @@ static void do_sub(DisasContext *ctx, unsigned rt, TCGv_i64 in1, if (is_tsv || cond_need_sv(c)) { sv = do_sub_sv(ctx, dest, in1, in2); if (is_tsv) { - if (!d) { - tcg_gen_ext32s_i64(sv, sv); - } - gen_helper_tsv(tcg_env, sv); + gen_tsv(ctx, &sv, d); } } @@ -1218,9 +1339,7 @@ static void do_sub(DisasContext *ctx, unsigned rt, TCGv_i64 in1, /* Emit any conditional trap before any writeback. */ if (is_tc) { - tmp = tcg_temp_new_i64(); - tcg_gen_setcond_i64(cond.c, tmp, cond.a0, cond.a1); - gen_helper_tcond(tcg_env, tmp); + gen_tc(ctx, &cond); } /* Write back the result. */ @@ -1229,7 +1348,6 @@ static void do_sub(DisasContext *ctx, unsigned rt, TCGv_i64 in1, save_gpr(ctx, rt, dest); /* Install the new nullification. */ - cond_free(&ctx->null_cond); ctx->null_cond = cond; } @@ -1284,7 +1402,6 @@ static void do_cmpclr(DisasContext *ctx, unsigned rt, TCGv_i64 in1, save_gpr(ctx, rt, dest); /* Install the new nullification. */ - cond_free(&ctx->null_cond); ctx->null_cond = cond; } @@ -1299,10 +1416,7 @@ static void do_log(DisasContext *ctx, unsigned rt, TCGv_i64 in1, save_gpr(ctx, rt, dest); /* Install the new nullification. */ - cond_free(&ctx->null_cond); - if (cf) { - ctx->null_cond = do_log_cond(ctx, cf, d, dest); - } + ctx->null_cond = do_log_cond(ctx, cf, d, dest); } static bool do_log_reg(DisasContext *ctx, arg_rrr_cf_d *a, @@ -1386,18 +1500,15 @@ static void do_unit_addsub(DisasContext *ctx, unsigned rt, TCGv_i64 in1, tcg_gen_shri_i64(cb, cb, 1); } - tcg_gen_andi_i64(cb, cb, test_cb); - cond = cond_make_0_tmp(cf & 1 ? TCG_COND_EQ : TCG_COND_NE, cb); + cond = cond_make_ti(cf & 1 ? TCG_COND_TSTEQ : TCG_COND_TSTNE, + cb, test_cb); } if (is_tc) { - TCGv_i64 tmp = tcg_temp_new_i64(); - tcg_gen_setcond_i64(cond.c, tmp, cond.a0, cond.a1); - gen_helper_tcond(tcg_env, tmp); + gen_tc(ctx, &cond); } save_gpr(ctx, rt, dest); - cond_free(&ctx->null_cond); ctx->null_cond = cond; } @@ -1764,36 +1875,43 @@ static bool do_fop_dedd(DisasContext *ctx, unsigned rt, /* Emit an unconditional branch to a direct target, which may or may not have already had nullification handled. */ -static bool do_dbranch(DisasContext *ctx, uint64_t dest, +static bool do_dbranch(DisasContext *ctx, int64_t disp, unsigned link, bool is_n) { + ctx->iaq_j = iaqe_branchi(ctx, disp); + if (ctx->null_cond.c == TCG_COND_NEVER && ctx->null_lab == NULL) { - if (link != 0) { - copy_iaoq_entry(ctx, cpu_gr[link], ctx->iaoq_n, ctx->iaoq_n_var); - } - ctx->iaoq_n = dest; + install_link(ctx, link, false); if (is_n) { + if (use_nullify_skip(ctx)) { + nullify_set(ctx, 0); + store_psw_xb(ctx, 0); + gen_goto_tb(ctx, 0, &ctx->iaq_j, NULL); + ctx->base.is_jmp = DISAS_NORETURN; + return true; + } ctx->null_cond.c = TCG_COND_ALWAYS; } + ctx->iaq_n = &ctx->iaq_j; + ctx->psw_b_next = true; } else { nullify_over(ctx); - if (link != 0) { - copy_iaoq_entry(ctx, cpu_gr[link], ctx->iaoq_n, ctx->iaoq_n_var); - } - + install_link(ctx, link, false); if (is_n && use_nullify_skip(ctx)) { nullify_set(ctx, 0); - gen_goto_tb(ctx, 0, dest, dest + 4); + store_psw_xb(ctx, 0); + gen_goto_tb(ctx, 0, &ctx->iaq_j, NULL); } else { nullify_set(ctx, is_n); - gen_goto_tb(ctx, 0, ctx->iaoq_b, dest); + store_psw_xb(ctx, PSW_B); + gen_goto_tb(ctx, 0, &ctx->iaq_b, &ctx->iaq_j); } - nullify_end(ctx); nullify_set(ctx, 0); - gen_goto_tb(ctx, 1, ctx->iaoq_b, ctx->iaoq_n); + store_psw_xb(ctx, 0); + gen_goto_tb(ctx, 1, &ctx->iaq_b, NULL); ctx->base.is_jmp = DISAS_NORETURN; } return true; @@ -1804,7 +1922,7 @@ static bool do_dbranch(DisasContext *ctx, uint64_t dest, static bool do_cbranch(DisasContext *ctx, int64_t disp, bool is_n, DisasCond *cond) { - uint64_t dest = iaoq_dest(ctx, disp); + DisasIAQE next; TCGLabel *taken = NULL; TCGCond c = cond->c; bool n; @@ -1813,45 +1931,43 @@ static bool do_cbranch(DisasContext *ctx, int64_t disp, bool is_n, /* Handle TRUE and NEVER as direct branches. */ if (c == TCG_COND_ALWAYS) { - return do_dbranch(ctx, dest, 0, is_n && disp >= 0); - } - if (c == TCG_COND_NEVER) { - return do_dbranch(ctx, ctx->iaoq_n, 0, is_n && disp < 0); + return do_dbranch(ctx, disp, 0, is_n && disp >= 0); } taken = gen_new_label(); tcg_gen_brcond_i64(c, cond->a0, cond->a1, taken); - cond_free(cond); /* Not taken: Condition not satisfied; nullify on backward branches. */ n = is_n && disp < 0; if (n && use_nullify_skip(ctx)) { nullify_set(ctx, 0); - gen_goto_tb(ctx, 0, ctx->iaoq_n, ctx->iaoq_n + 4); + store_psw_xb(ctx, 0); + next = iaqe_incr(&ctx->iaq_b, 4); + gen_goto_tb(ctx, 0, &next, NULL); } else { if (!n && ctx->null_lab) { gen_set_label(ctx->null_lab); ctx->null_lab = NULL; } nullify_set(ctx, n); - if (ctx->iaoq_n == -1) { - /* The temporary iaoq_n_var died at the branch above. - Regenerate it here instead of saving it. */ - tcg_gen_addi_i64(ctx->iaoq_n_var, cpu_iaoq_b, 4); - } - gen_goto_tb(ctx, 0, ctx->iaoq_b, ctx->iaoq_n); + store_psw_xb(ctx, 0); + gen_goto_tb(ctx, 0, &ctx->iaq_b, NULL); } gen_set_label(taken); /* Taken: Condition satisfied; nullify on forward branches. */ n = is_n && disp >= 0; + + next = iaqe_branchi(ctx, disp); if (n && use_nullify_skip(ctx)) { nullify_set(ctx, 0); - gen_goto_tb(ctx, 1, dest, dest + 4); + store_psw_xb(ctx, 0); + gen_goto_tb(ctx, 1, &next, NULL); } else { nullify_set(ctx, n); - gen_goto_tb(ctx, 1, ctx->iaoq_b, dest); + store_psw_xb(ctx, PSW_B); + gen_goto_tb(ctx, 1, &ctx->iaq_b, &next); } /* Not taken: the branch itself was nullified. */ @@ -1865,89 +1981,45 @@ static bool do_cbranch(DisasContext *ctx, int64_t disp, bool is_n, return true; } -/* Emit an unconditional branch to an indirect target. This handles - nullification of the branch itself. */ -static bool do_ibranch(DisasContext *ctx, TCGv_i64 dest, - unsigned link, bool is_n) +/* + * Emit an unconditional branch to an indirect target, in ctx->iaq_j. + * This handles nullification of the branch itself. + */ +static bool do_ibranch(DisasContext *ctx, unsigned link, + bool with_sr0, bool is_n) { - TCGv_i64 a0, a1, next, tmp; - TCGCond c; - - assert(ctx->null_lab == NULL); - - if (ctx->null_cond.c == TCG_COND_NEVER) { - if (link != 0) { - copy_iaoq_entry(ctx, cpu_gr[link], ctx->iaoq_n, ctx->iaoq_n_var); - } - next = tcg_temp_new_i64(); - tcg_gen_mov_i64(next, dest); + if (ctx->null_cond.c == TCG_COND_NEVER && ctx->null_lab == NULL) { + install_link(ctx, link, with_sr0); if (is_n) { if (use_nullify_skip(ctx)) { - copy_iaoq_entry(ctx, cpu_iaoq_f, -1, next); - tcg_gen_addi_i64(next, next, 4); - copy_iaoq_entry(ctx, cpu_iaoq_b, -1, next); + install_iaq_entries(ctx, &ctx->iaq_j, NULL); nullify_set(ctx, 0); ctx->base.is_jmp = DISAS_IAQ_N_UPDATED; return true; } ctx->null_cond.c = TCG_COND_ALWAYS; } - ctx->iaoq_n = -1; - ctx->iaoq_n_var = next; - } else if (is_n && use_nullify_skip(ctx)) { - /* The (conditional) branch, B, nullifies the next insn, N, - and we're allowed to skip execution N (no single-step or - tracepoint in effect). Since the goto_ptr that we must use - for the indirect branch consumes no special resources, we - can (conditionally) skip B and continue execution. */ - /* The use_nullify_skip test implies we have a known control path. */ - tcg_debug_assert(ctx->iaoq_b != -1); - tcg_debug_assert(ctx->iaoq_n != -1); - - /* We do have to handle the non-local temporary, DEST, before - branching. Since IOAQ_F is not really live at this point, we - can simply store DEST optimistically. Similarly with IAOQ_B. */ - copy_iaoq_entry(ctx, cpu_iaoq_f, -1, dest); - next = tcg_temp_new_i64(); - tcg_gen_addi_i64(next, dest, 4); - copy_iaoq_entry(ctx, cpu_iaoq_b, -1, next); - - nullify_over(ctx); - if (link != 0) { - copy_iaoq_entry(ctx, cpu_gr[link], ctx->iaoq_n, ctx->iaoq_n_var); - } - tcg_gen_lookup_and_goto_ptr(); - return nullify_end(ctx); - } else { - c = ctx->null_cond.c; - a0 = ctx->null_cond.a0; - a1 = ctx->null_cond.a1; - - tmp = tcg_temp_new_i64(); - next = tcg_temp_new_i64(); - - copy_iaoq_entry(ctx, tmp, ctx->iaoq_n, ctx->iaoq_n_var); - tcg_gen_movcond_i64(c, next, a0, a1, tmp, dest); - ctx->iaoq_n = -1; - ctx->iaoq_n_var = next; + ctx->iaq_n = &ctx->iaq_j; + ctx->psw_b_next = true; + return true; + } - if (link != 0) { - tcg_gen_movcond_i64(c, cpu_gr[link], a0, a1, cpu_gr[link], tmp); - } + nullify_over(ctx); - if (is_n) { - /* The branch nullifies the next insn, which means the state of N - after the branch is the inverse of the state of N that applied - to the branch. */ - tcg_gen_setcond_i64(tcg_invert_cond(c), cpu_psw_n, a0, a1); - cond_free(&ctx->null_cond); - ctx->null_cond = cond_make_n(); - ctx->psw_n_nonzero = true; - } else { - cond_free(&ctx->null_cond); - } + install_link(ctx, link, with_sr0); + if (is_n && use_nullify_skip(ctx)) { + install_iaq_entries(ctx, &ctx->iaq_j, NULL); + nullify_set(ctx, 0); + store_psw_xb(ctx, 0); + } else { + install_iaq_entries(ctx, &ctx->iaq_b, &ctx->iaq_j); + nullify_set(ctx, is_n); + store_psw_xb(ctx, PSW_B); } - return true; + + tcg_gen_lookup_and_goto_ptr(); + ctx->base.is_jmp = DISAS_NORETURN; + return nullify_end(ctx); } /* Implement @@ -1959,21 +2031,20 @@ static bool do_ibranch(DisasContext *ctx, TCGv_i64 dest, */ static TCGv_i64 do_ibranch_priv(DisasContext *ctx, TCGv_i64 offset) { - TCGv_i64 dest; + TCGv_i64 dest = tcg_temp_new_i64(); switch (ctx->privilege) { case 0: /* Privilege 0 is maximum and is allowed to decrease. */ - return offset; + tcg_gen_mov_i64(dest, offset); + break; case 3: /* Privilege 3 is minimum and is never allowed to increase. */ - dest = tcg_temp_new_i64(); tcg_gen_ori_i64(dest, offset, 3); break; default: - dest = tcg_temp_new_i64(); tcg_gen_andi_i64(dest, offset, -4); tcg_gen_ori_i64(dest, dest, ctx->privilege); - tcg_gen_movcond_i64(TCG_COND_GTU, dest, dest, offset, dest, offset); + tcg_gen_umax_i64(dest, dest, offset); break; } return dest; @@ -1989,7 +2060,7 @@ static TCGv_i64 do_ibranch_priv(DisasContext *ctx, TCGv_i64 offset) aforementioned BE. */ static void do_page_zero(DisasContext *ctx) { - TCGv_i64 tmp; + assert(ctx->iaq_f.disp == 0); /* If by some means we get here with PSW[N]=1, that implies that the B,GATE instruction would be skipped, and we'd fault on the @@ -2006,15 +2077,12 @@ static void do_page_zero(DisasContext *ctx) g_assert_not_reached(); } - /* Check that we didn't arrive here via some means that allowed - non-sequential instruction execution. Normally the PSW[B] bit - detects this by disallowing the B,GATE instruction to execute - under such conditions. */ - if (ctx->iaoq_b != ctx->iaoq_f + 4) { + /* If PSW[B] is set, the B,GATE insn would trap. */ + if (ctx->psw_xb & PSW_B) { goto do_sigill; } - switch (ctx->iaoq_f & -4) { + switch (ctx->base.pc_first) { case 0x00: /* Null pointer call */ gen_excp_1(EXCP_IMP); ctx->base.is_jmp = DISAS_NORETURN; @@ -2026,13 +2094,15 @@ static void do_page_zero(DisasContext *ctx) break; case 0xe0: /* SET_THREAD_POINTER */ - tcg_gen_st_i64(cpu_gr[26], tcg_env, offsetof(CPUHPPAState, cr[27])); - tmp = tcg_temp_new_i64(); - tcg_gen_ori_i64(tmp, cpu_gr[31], 3); - copy_iaoq_entry(ctx, cpu_iaoq_f, -1, tmp); - tcg_gen_addi_i64(tmp, tmp, 4); - copy_iaoq_entry(ctx, cpu_iaoq_b, -1, tmp); - ctx->base.is_jmp = DISAS_IAQ_N_UPDATED; + { + DisasIAQE next = { .base = tcg_temp_new_i64() }; + + tcg_gen_st_i64(cpu_gr[26], tcg_env, + offsetof(CPUHPPAState, cr[27])); + tcg_gen_ori_i64(next.base, cpu_gr[31], PRIV_USER); + install_iaq_entries(ctx, &next, NULL); + ctx->base.is_jmp = DISAS_IAQ_N_UPDATED; + } break; case 0x100: /* SYSCALL */ @@ -2051,7 +2121,7 @@ static void do_page_zero(DisasContext *ctx) static bool trans_nop(DisasContext *ctx, arg_nop *a) { - cond_free(&ctx->null_cond); + ctx->null_cond = cond_make_f(); return true; } @@ -2065,18 +2135,19 @@ static bool trans_sync(DisasContext *ctx, arg_sync *a) /* No point in nullifying the memory barrier. */ tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL); - cond_free(&ctx->null_cond); + ctx->null_cond = cond_make_f(); return true; } static bool trans_mfia(DisasContext *ctx, arg_mfia *a) { - unsigned rt = a->t; - TCGv_i64 tmp = dest_gpr(ctx, rt); - tcg_gen_movi_i64(tmp, ctx->iaoq_f & ~3ULL); - save_gpr(ctx, rt, tmp); + TCGv_i64 dest = dest_gpr(ctx, a->t); + + copy_iaoq_entry(ctx, dest, &ctx->iaq_f); + tcg_gen_andi_i64(dest, dest, -4); - cond_free(&ctx->null_cond); + save_gpr(ctx, a->t, dest); + ctx->null_cond = cond_make_f(); return true; } @@ -2091,7 +2162,7 @@ static bool trans_mfsp(DisasContext *ctx, arg_mfsp *a) save_gpr(ctx, rt, t0); - cond_free(&ctx->null_cond); + ctx->null_cond = cond_make_f(); return true; } @@ -2136,7 +2207,7 @@ static bool trans_mfctl(DisasContext *ctx, arg_mfctl *a) save_gpr(ctx, rt, tmp); done: - cond_free(&ctx->null_cond); + ctx->null_cond = cond_make_f(); return true; } @@ -2176,7 +2247,7 @@ static bool trans_mtctl(DisasContext *ctx, arg_mtctl *a) tcg_gen_andi_i64(tmp, reg, ctx->is_pa20 ? 63 : 31); save_or_nullify(ctx, cpu_sar, tmp); - cond_free(&ctx->null_cond); + ctx->null_cond = cond_make_f(); return true; } @@ -2250,7 +2321,7 @@ static bool trans_mtsarcm(DisasContext *ctx, arg_mtsarcm *a) tcg_gen_andi_i64(tmp, tmp, ctx->is_pa20 ? 63 : 31); save_or_nullify(ctx, cpu_sar, tmp); - cond_free(&ctx->null_cond); + ctx->null_cond = cond_make_f(); return true; } @@ -2267,7 +2338,7 @@ static bool trans_ldsid(DisasContext *ctx, arg_ldsid *a) #endif save_gpr(ctx, a->t, dest); - cond_free(&ctx->null_cond); + ctx->null_cond = cond_make_f(); return true; } @@ -2367,6 +2438,7 @@ static bool trans_halt(DisasContext *ctx, arg_halt *a) { CHECK_MOST_PRIVILEGED(EXCP_PRIV_OPR); #ifndef CONFIG_USER_ONLY + set_psw_xb(ctx, 0); nullify_over(ctx); gen_helper_halt(tcg_env); ctx->base.is_jmp = DISAS_NORETURN; @@ -2378,6 +2450,7 @@ static bool trans_reset(DisasContext *ctx, arg_reset *a) { CHECK_MOST_PRIVILEGED(EXCP_PRIV_OPR); #ifndef CONFIG_USER_ONLY + set_psw_xb(ctx, 0); nullify_over(ctx); gen_helper_reset(tcg_env); ctx->base.is_jmp = DISAS_NORETURN; @@ -2429,7 +2502,7 @@ static bool trans_nop_addrx(DisasContext *ctx, arg_ldst *a) tcg_gen_add_i64(dest, src1, src2); save_gpr(ctx, a->b, dest); } - cond_free(&ctx->null_cond); + ctx->null_cond = cond_make_f(); return true; } @@ -2671,7 +2744,7 @@ static bool trans_lci(DisasContext *ctx, arg_lci *a) since the entire address space is coherent. */ save_gpr(ctx, a->t, ctx->zero); - cond_free(&ctx->null_cond); + ctx->null_cond = cond_make_f(); return true; } @@ -2748,7 +2821,7 @@ static bool trans_or(DisasContext *ctx, arg_rrr_cf_d *a) unsigned rt = a->t; if (rt == 0) { /* NOP */ - cond_free(&ctx->null_cond); + ctx->null_cond = cond_make_f(); return true; } if (r2 == 0) { /* COPY */ @@ -2759,7 +2832,7 @@ static bool trans_or(DisasContext *ctx, arg_rrr_cf_d *a) } else { save_gpr(ctx, rt, cpu_gr[r1]); } - cond_free(&ctx->null_cond); + ctx->null_cond = cond_make_f(); return true; } #ifndef CONFIG_USER_ONLY @@ -2772,11 +2845,13 @@ static bool trans_or(DisasContext *ctx, arg_rrr_cf_d *a) if ((rt == 10 || rt == 31) && r1 == rt && r2 == rt) { /* PAUSE */ /* No need to check for supervisor, as userland can only pause until the next timer interrupt. */ + + set_psw_xb(ctx, 0); + nullify_over(ctx); /* Advance the instruction queue. */ - copy_iaoq_entry(ctx, cpu_iaoq_f, ctx->iaoq_b, cpu_iaoq_b); - copy_iaoq_entry(ctx, cpu_iaoq_b, ctx->iaoq_n, ctx->iaoq_n_var); + install_iaq_entries(ctx, &ctx->iaq_b, NULL); nullify_set(ctx, 0); /* Tell the qemu main loop to halt until this cpu has work. */ @@ -2825,11 +2900,7 @@ static bool trans_uxor(DisasContext *ctx, arg_rrr_cf_d *a) tcg_gen_xor_i64(dest, tcg_r1, tcg_r2); save_gpr(ctx, a->t, dest); - cond_free(&ctx->null_cond); - if (a->cf) { - ctx->null_cond = do_unit_zero_cond(a->cf, a->d, dest); - } - + ctx->null_cond = do_unit_zero_cond(a->cf, a->d, dest); return nullify_end(ctx); } @@ -2855,7 +2926,7 @@ static bool do_uaddcm(DisasContext *ctx, arg_rrr_cf_d *a, bool is_tc) tcg_gen_subi_i64(tmp, tmp, 1); } save_gpr(ctx, a->t, tmp); - cond_free(&ctx->null_cond); + ctx->null_cond = cond_make_f(); return true; } @@ -3381,7 +3452,7 @@ static bool trans_ldil(DisasContext *ctx, arg_ldil *a) tcg_gen_movi_i64(tcg_rt, a->i); save_gpr(ctx, a->t, tcg_rt); - cond_free(&ctx->null_cond); + ctx->null_cond = cond_make_f(); return true; } @@ -3392,7 +3463,7 @@ static bool trans_addil(DisasContext *ctx, arg_addil *a) tcg_gen_addi_i64(tcg_r1, tcg_rt, a->i); save_gpr(ctx, 1, tcg_r1); - cond_free(&ctx->null_cond); + ctx->null_cond = cond_make_f(); return true; } @@ -3408,7 +3479,7 @@ static bool trans_ldo(DisasContext *ctx, arg_ldo *a) tcg_gen_addi_i64(tcg_rt, cpu_gr[a->b], a->i); } save_gpr(ctx, a->t, tcg_rt); - cond_free(&ctx->null_cond); + ctx->null_cond = cond_make_f(); return true; } @@ -3525,24 +3596,18 @@ static bool trans_bb_sar(DisasContext *ctx, arg_bb_sar *a) tcg_gen_shl_i64(tmp, tcg_r, tmp); } - cond = cond_make_0_tmp(a->c ? TCG_COND_GE : TCG_COND_LT, tmp); + cond = cond_make_ti(a->c ? TCG_COND_GE : TCG_COND_LT, tmp, 0); return do_cbranch(ctx, a->disp, a->n, &cond); } static bool trans_bb_imm(DisasContext *ctx, arg_bb_imm *a) { - TCGv_i64 tmp, tcg_r; DisasCond cond; - int p; + int p = a->p | (a->d ? 0 : 32); nullify_over(ctx); - - tmp = tcg_temp_new_i64(); - tcg_r = load_gpr(ctx, a->r); - p = a->p | (a->d ? 0 : 32); - tcg_gen_shli_i64(tmp, tcg_r, p); - - cond = cond_make_0(a->c ? TCG_COND_GE : TCG_COND_LT, tmp); + cond = cond_make_vi(a->c ? TCG_COND_TSTEQ : TCG_COND_TSTNE, + load_gpr(ctx, a->r), 1ull << (63 - p)); return do_cbranch(ctx, a->disp, a->n, &cond); } @@ -3640,10 +3705,7 @@ static bool trans_shrp_sar(DisasContext *ctx, arg_shrp_sar *a) save_gpr(ctx, a->t, dest); /* Install the new nullification. */ - cond_free(&ctx->null_cond); - if (a->c) { - ctx->null_cond = do_sed_cond(ctx, a->c, a->d, dest); - } + ctx->null_cond = do_sed_cond(ctx, a->c, a->d, dest); return nullify_end(ctx); } @@ -3683,10 +3745,7 @@ static bool trans_shrp_imm(DisasContext *ctx, arg_shrp_imm *a) save_gpr(ctx, a->t, dest); /* Install the new nullification. */ - cond_free(&ctx->null_cond); - if (a->c) { - ctx->null_cond = do_sed_cond(ctx, a->c, a->d, dest); - } + ctx->null_cond = do_sed_cond(ctx, a->c, a->d, dest); return nullify_end(ctx); } @@ -3728,10 +3787,7 @@ static bool trans_extr_sar(DisasContext *ctx, arg_extr_sar *a) save_gpr(ctx, a->t, dest); /* Install the new nullification. */ - cond_free(&ctx->null_cond); - if (a->c) { - ctx->null_cond = do_sed_cond(ctx, a->c, a->d, dest); - } + ctx->null_cond = do_sed_cond(ctx, a->c, a->d, dest); return nullify_end(ctx); } @@ -3764,10 +3820,7 @@ static bool trans_extr_imm(DisasContext *ctx, arg_extr_imm *a) save_gpr(ctx, a->t, dest); /* Install the new nullification. */ - cond_free(&ctx->null_cond); - if (a->c) { - ctx->null_cond = do_sed_cond(ctx, a->c, a->d, dest); - } + ctx->null_cond = do_sed_cond(ctx, a->c, a->d, dest); return nullify_end(ctx); } @@ -3804,10 +3857,7 @@ static bool trans_depi_imm(DisasContext *ctx, arg_depi_imm *a) save_gpr(ctx, a->t, dest); /* Install the new nullification. */ - cond_free(&ctx->null_cond); - if (a->c) { - ctx->null_cond = do_sed_cond(ctx, a->c, a->d, dest); - } + ctx->null_cond = do_sed_cond(ctx, a->c, a->d, dest); return nullify_end(ctx); } @@ -3840,10 +3890,7 @@ static bool trans_dep_imm(DisasContext *ctx, arg_dep_imm *a) save_gpr(ctx, a->t, dest); /* Install the new nullification. */ - cond_free(&ctx->null_cond); - if (a->c) { - ctx->null_cond = do_sed_cond(ctx, a->c, a->d, dest); - } + ctx->null_cond = do_sed_cond(ctx, a->c, a->d, dest); return nullify_end(ctx); } @@ -3877,10 +3924,7 @@ static bool do_dep_sar(DisasContext *ctx, unsigned rt, unsigned c, save_gpr(ctx, rt, dest); /* Install the new nullification. */ - cond_free(&ctx->null_cond); - if (c) { - ctx->null_cond = do_sed_cond(ctx, c, d, dest); - } + ctx->null_cond = do_sed_cond(ctx, c, d, dest); return nullify_end(ctx); } @@ -3910,104 +3954,53 @@ static bool trans_depi_sar(DisasContext *ctx, arg_depi_sar *a) static bool trans_be(DisasContext *ctx, arg_be *a) { - TCGv_i64 tmp; - -#ifdef CONFIG_USER_ONLY - /* ??? It seems like there should be a good way of using - "be disp(sr2, r0)", the canonical gateway entry mechanism - to our advantage. But that appears to be inconvenient to - manage along side branch delay slots. Therefore we handle - entry into the gateway page via absolute address. */ - /* Since we don't implement spaces, just branch. Do notice the special - case of "be disp(*,r0)" using a direct branch to disp, so that we can - goto_tb to the TB containing the syscall. */ - if (a->b == 0) { - return do_dbranch(ctx, a->disp, a->l, a->n); - } -#else - nullify_over(ctx); +#ifndef CONFIG_USER_ONLY + ctx->iaq_j.space = tcg_temp_new_i64(); + load_spr(ctx, ctx->iaq_j.space, a->sp); #endif - tmp = tcg_temp_new_i64(); - tcg_gen_addi_i64(tmp, load_gpr(ctx, a->b), a->disp); - tmp = do_ibranch_priv(ctx, tmp); + ctx->iaq_j.base = tcg_temp_new_i64(); + ctx->iaq_j.disp = 0; -#ifdef CONFIG_USER_ONLY - return do_ibranch(ctx, tmp, a->l, a->n); -#else - TCGv_i64 new_spc = tcg_temp_new_i64(); + tcg_gen_addi_i64(ctx->iaq_j.base, load_gpr(ctx, a->b), a->disp); + ctx->iaq_j.base = do_ibranch_priv(ctx, ctx->iaq_j.base); - load_spr(ctx, new_spc, a->sp); - if (a->l) { - copy_iaoq_entry(ctx, cpu_gr[31], ctx->iaoq_n, ctx->iaoq_n_var); - tcg_gen_mov_i64(cpu_sr[0], cpu_iasq_b); - } - if (a->n && use_nullify_skip(ctx)) { - copy_iaoq_entry(ctx, cpu_iaoq_f, -1, tmp); - tcg_gen_addi_i64(tmp, tmp, 4); - copy_iaoq_entry(ctx, cpu_iaoq_b, -1, tmp); - tcg_gen_mov_i64(cpu_iasq_f, new_spc); - tcg_gen_mov_i64(cpu_iasq_b, cpu_iasq_f); - nullify_set(ctx, 0); - } else { - copy_iaoq_entry(ctx, cpu_iaoq_f, ctx->iaoq_b, cpu_iaoq_b); - if (ctx->iaoq_b == -1) { - tcg_gen_mov_i64(cpu_iasq_f, cpu_iasq_b); - } - copy_iaoq_entry(ctx, cpu_iaoq_b, -1, tmp); - tcg_gen_mov_i64(cpu_iasq_b, new_spc); - nullify_set(ctx, a->n); - } - tcg_gen_lookup_and_goto_ptr(); - ctx->base.is_jmp = DISAS_NORETURN; - return nullify_end(ctx); -#endif + return do_ibranch(ctx, a->l, true, a->n); } static bool trans_bl(DisasContext *ctx, arg_bl *a) { - return do_dbranch(ctx, iaoq_dest(ctx, a->disp), a->l, a->n); + return do_dbranch(ctx, a->disp, a->l, a->n); } static bool trans_b_gate(DisasContext *ctx, arg_b_gate *a) { - uint64_t dest = iaoq_dest(ctx, a->disp); - - nullify_over(ctx); + int64_t disp = a->disp; + bool indirect = false; - /* Make sure the caller hasn't done something weird with the queue. - * ??? This is not quite the same as the PSW[B] bit, which would be - * expensive to track. Real hardware will trap for - * b gateway - * b gateway+4 (in delay slot of first branch) - * However, checking for a non-sequential instruction queue *will* - * diagnose the security hole - * b gateway - * b evil - * in which instructions at evil would run with increased privs. - */ - if (ctx->iaoq_b == -1 || ctx->iaoq_b != ctx->iaoq_f + 4) { + /* Trap if PSW[B] is set. */ + if (ctx->psw_xb & PSW_B) { return gen_illegal(ctx); } + nullify_over(ctx); + #ifndef CONFIG_USER_ONLY - if (ctx->tb_flags & PSW_C) { - int type = hppa_artype_for_page(cpu_env(ctx->cs), ctx->base.pc_next); - /* If we could not find a TLB entry, then we need to generate an - ITLB miss exception so the kernel will provide it. - The resulting TLB fill operation will invalidate this TB and - we will re-translate, at which point we *will* be able to find - the TLB entry and determine if this is in fact a gateway page. */ - if (type < 0) { - gen_excp(ctx, EXCP_ITLB_MISS); - return true; - } - /* No change for non-gateway pages or for priv decrease. */ - if (type >= 4 && type - 4 < ctx->privilege) { - dest = deposit64(dest, 0, 2, type - 4); - } + if (ctx->privilege == 0) { + /* Privilege cannot decrease. */ + } else if (!(ctx->tb_flags & PSW_C)) { + /* With paging disabled, priv becomes 0. */ + disp -= ctx->privilege; } else { - dest &= -4; /* priv = 0 */ + /* Adjust the dest offset for the privilege change from the PTE. */ + TCGv_i64 off = tcg_temp_new_i64(); + + copy_iaoq_entry(ctx, off, &ctx->iaq_f); + gen_helper_b_gate_priv(off, tcg_env, off); + + ctx->iaq_j.base = off; + ctx->iaq_j.disp = disp + 8; + indirect = true; } #endif @@ -4020,20 +4013,29 @@ static bool trans_b_gate(DisasContext *ctx, arg_b_gate *a) save_gpr(ctx, a->l, tmp); } - return do_dbranch(ctx, dest, 0, a->n); + if (indirect) { + return do_ibranch(ctx, 0, false, a->n); + } + return do_dbranch(ctx, disp, 0, a->n); } static bool trans_blr(DisasContext *ctx, arg_blr *a) { if (a->x) { - TCGv_i64 tmp = tcg_temp_new_i64(); - tcg_gen_shli_i64(tmp, load_gpr(ctx, a->x), 3); - tcg_gen_addi_i64(tmp, tmp, ctx->iaoq_f + 8); + DisasIAQE next = iaqe_incr(&ctx->iaq_f, 8); + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + /* The computation here never changes privilege level. */ - return do_ibranch(ctx, tmp, a->l, a->n); + copy_iaoq_entry(ctx, t0, &next); + tcg_gen_shli_i64(t1, load_gpr(ctx, a->x), 3); + tcg_gen_add_i64(t0, t0, t1); + + ctx->iaq_j = iaqe_next_absv(ctx, t0); + return do_ibranch(ctx, a->l, false, a->n); } else { /* BLR R0,RX is a good way to load PC+8 into RX. */ - return do_dbranch(ctx, ctx->iaoq_f + 8, a->l, a->n); + return do_dbranch(ctx, 0, a->l, a->n); } } @@ -4049,34 +4051,22 @@ static bool trans_bv(DisasContext *ctx, arg_bv *a) tcg_gen_add_i64(dest, dest, load_gpr(ctx, a->b)); } dest = do_ibranch_priv(ctx, dest); - return do_ibranch(ctx, dest, 0, a->n); + ctx->iaq_j = iaqe_next_absv(ctx, dest); + + return do_ibranch(ctx, 0, false, a->n); } static bool trans_bve(DisasContext *ctx, arg_bve *a) { - TCGv_i64 dest; + TCGv_i64 b = load_gpr(ctx, a->b); -#ifdef CONFIG_USER_ONLY - dest = do_ibranch_priv(ctx, load_gpr(ctx, a->b)); - return do_ibranch(ctx, dest, a->l, a->n); -#else - nullify_over(ctx); - dest = do_ibranch_priv(ctx, load_gpr(ctx, a->b)); - - copy_iaoq_entry(ctx, cpu_iaoq_f, ctx->iaoq_b, cpu_iaoq_b); - if (ctx->iaoq_b == -1) { - tcg_gen_mov_i64(cpu_iasq_f, cpu_iasq_b); - } - copy_iaoq_entry(ctx, cpu_iaoq_b, -1, dest); - tcg_gen_mov_i64(cpu_iasq_b, space_select(ctx, 0, dest)); - if (a->l) { - copy_iaoq_entry(ctx, cpu_gr[a->l], ctx->iaoq_n, ctx->iaoq_n_var); - } - nullify_set(ctx, a->n); - tcg_gen_lookup_and_goto_ptr(); - ctx->base.is_jmp = DISAS_NORETURN; - return nullify_end(ctx); +#ifndef CONFIG_USER_ONLY + ctx->iaq_j.space = space_select(ctx, 0, b); #endif + ctx->iaq_j.base = do_ibranch_priv(ctx, b); + ctx->iaq_j.disp = 0; + + return do_ibranch(ctx, a->l, false, a->n); } static bool trans_nopbts(DisasContext *ctx, arg_nopbts *a) @@ -4377,6 +4367,8 @@ static bool trans_fcmp_d(DisasContext *ctx, arg_fclass2 *a) static bool trans_ftest(DisasContext *ctx, arg_ftest *a) { + TCGCond tc = TCG_COND_TSTNE; + uint32_t mask; TCGv_i64 t; nullify_over(ctx); @@ -4385,55 +4377,41 @@ static bool trans_ftest(DisasContext *ctx, arg_ftest *a) tcg_gen_ld32u_i64(t, tcg_env, offsetof(CPUHPPAState, fr0_shadow)); if (a->y == 1) { - int mask; - bool inv = false; - switch (a->c) { case 0: /* simple */ - tcg_gen_andi_i64(t, t, 0x4000000); - ctx->null_cond = cond_make_0(TCG_COND_NE, t); - goto done; + mask = R_FPSR_C_MASK; + break; case 2: /* rej */ - inv = true; + tc = TCG_COND_TSTEQ; /* fallthru */ case 1: /* acc */ - mask = 0x43ff800; + mask = R_FPSR_C_MASK | R_FPSR_CQ_MASK; break; case 6: /* rej8 */ - inv = true; + tc = TCG_COND_TSTEQ; /* fallthru */ case 5: /* acc8 */ - mask = 0x43f8000; + mask = R_FPSR_C_MASK | R_FPSR_CQ0_6_MASK; break; case 9: /* acc6 */ - mask = 0x43e0000; + mask = R_FPSR_C_MASK | R_FPSR_CQ0_4_MASK; break; case 13: /* acc4 */ - mask = 0x4380000; + mask = R_FPSR_C_MASK | R_FPSR_CQ0_2_MASK; break; case 17: /* acc2 */ - mask = 0x4200000; + mask = R_FPSR_C_MASK | R_FPSR_CQ0_MASK; break; default: gen_illegal(ctx); return true; } - if (inv) { - TCGv_i64 c = tcg_constant_i64(mask); - tcg_gen_or_i64(t, t, c); - ctx->null_cond = cond_make(TCG_COND_EQ, t, c); - } else { - tcg_gen_andi_i64(t, t, mask); - ctx->null_cond = cond_make_0(TCG_COND_EQ, t); - } } else { unsigned cbit = (a->y ^ 1) - 1; - - tcg_gen_extract_i64(t, t, 21 - cbit, 1); - ctx->null_cond = cond_make_0(TCG_COND_NE, t); + mask = R_FPSR_CA0_MASK >> cbit; } - done: + ctx->null_cond = cond_make_ti(tc, t, mask); return nullify_end(ctx); } @@ -4639,34 +4617,38 @@ static bool trans_diag_unimp(DisasContext *ctx, arg_diag_unimp *a) static void hppa_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) { DisasContext *ctx = container_of(dcbase, DisasContext, base); + uint64_t cs_base; int bound; ctx->cs = cs; ctx->tb_flags = ctx->base.tb->flags; ctx->is_pa20 = hppa_is_pa20(cpu_env(cs)); + ctx->psw_xb = ctx->tb_flags & (PSW_X | PSW_B); #ifdef CONFIG_USER_ONLY - ctx->privilege = MMU_IDX_TO_PRIV(MMU_USER_IDX); + ctx->privilege = PRIV_USER; ctx->mmu_idx = MMU_USER_IDX; - ctx->iaoq_f = ctx->base.pc_first | ctx->privilege; - ctx->iaoq_b = ctx->base.tb->cs_base | ctx->privilege; ctx->unalign = (ctx->tb_flags & TB_FLAG_UNALIGN ? MO_UNALN : MO_ALIGN); #else ctx->privilege = (ctx->tb_flags >> TB_FLAG_PRIV_SHIFT) & 3; ctx->mmu_idx = (ctx->tb_flags & PSW_D ? PRIV_P_TO_MMU_IDX(ctx->privilege, ctx->tb_flags & PSW_P) : ctx->tb_flags & PSW_W ? MMU_ABS_W_IDX : MMU_ABS_IDX); +#endif - /* Recover the IAOQ values from the GVA + PRIV. */ - uint64_t cs_base = ctx->base.tb->cs_base; - uint64_t iasq_f = cs_base & ~0xffffffffull; - int32_t diff = cs_base; + cs_base = ctx->base.tb->cs_base; + ctx->iaoq_first = ctx->base.pc_first + ctx->privilege; - ctx->iaoq_f = (ctx->base.pc_first & ~iasq_f) + ctx->privilege; - ctx->iaoq_b = (diff ? ctx->iaoq_f + diff : -1); -#endif - ctx->iaoq_n = -1; - ctx->iaoq_n_var = NULL; + if (unlikely(cs_base & CS_BASE_DIFFSPACE)) { + ctx->iaq_b.space = cpu_iasq_b; + ctx->iaq_b.base = cpu_iaoq_b; + } else if (unlikely(cs_base & CS_BASE_DIFFPAGE)) { + ctx->iaq_b.base = cpu_iaoq_b; + } else { + uint64_t iaoq_f_pgofs = ctx->iaoq_first & ~TARGET_PAGE_MASK; + uint64_t iaoq_b_pgofs = cs_base & ~TARGET_PAGE_MASK; + ctx->iaq_b.disp = iaoq_b_pgofs - iaoq_f_pgofs; + } ctx->zero = tcg_constant_i64(0); @@ -4692,8 +4674,23 @@ static void hppa_tr_tb_start(DisasContextBase *dcbase, CPUState *cs) static void hppa_tr_insn_start(DisasContextBase *dcbase, CPUState *cs) { DisasContext *ctx = container_of(dcbase, DisasContext, base); + uint64_t iaoq_f, iaoq_b; + int64_t diff; + + tcg_debug_assert(!iaqe_variable(&ctx->iaq_f)); + + iaoq_f = ctx->iaoq_first + ctx->iaq_f.disp; + if (iaqe_variable(&ctx->iaq_b)) { + diff = INT32_MIN; + } else { + iaoq_b = ctx->iaoq_first + ctx->iaq_b.disp; + diff = iaoq_b - iaoq_f; + /* Direct branches can only produce a 24-bit displacement. */ + tcg_debug_assert(diff == (int32_t)diff); + tcg_debug_assert(diff != INT32_MIN); + } - tcg_gen_insn_start(ctx->iaoq_f, ctx->iaoq_b, 0); + tcg_gen_insn_start(iaoq_f & ~TARGET_PAGE_MASK, diff, 0); ctx->insn_start_updated = false; } @@ -4716,16 +4713,13 @@ static void hppa_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs) the page permissions for execute. */ uint32_t insn = translator_ldl(env, &ctx->base, ctx->base.pc_next); - /* Set up the IA queue for the next insn. - This will be overwritten by a branch. */ - if (ctx->iaoq_b == -1) { - ctx->iaoq_n = -1; - ctx->iaoq_n_var = tcg_temp_new_i64(); - tcg_gen_addi_i64(ctx->iaoq_n_var, cpu_iaoq_b, 4); - } else { - ctx->iaoq_n = ctx->iaoq_b + 4; - ctx->iaoq_n_var = NULL; - } + /* + * Set up the IA queue for the next insn. + * This will be overwritten by a branch. + */ + ctx->iaq_n = NULL; + memset(&ctx->iaq_j, 0, sizeof(ctx->iaq_j)); + ctx->psw_b_next = false; if (unlikely(ctx->null_cond.c == TCG_COND_ALWAYS)) { ctx->null_cond.c = TCG_COND_NEVER; @@ -4738,51 +4732,47 @@ static void hppa_tr_translate_insn(DisasContextBase *dcbase, CPUState *cs) ret = ctx->base.is_jmp; assert(ctx->null_lab == NULL); } - } - /* Advance the insn queue. Note that this check also detects - a priority change within the instruction queue. */ - if (ret == DISAS_NEXT && ctx->iaoq_b != ctx->iaoq_f + 4) { - if (ctx->iaoq_b != -1 && ctx->iaoq_n != -1 - && use_goto_tb(ctx, ctx->iaoq_b) - && (ctx->null_cond.c == TCG_COND_NEVER - || ctx->null_cond.c == TCG_COND_ALWAYS)) { - nullify_set(ctx, ctx->null_cond.c == TCG_COND_ALWAYS); - gen_goto_tb(ctx, 0, ctx->iaoq_b, ctx->iaoq_n); - ctx->base.is_jmp = ret = DISAS_NORETURN; - } else { - ctx->base.is_jmp = ret = DISAS_IAQ_N_STALE; + if (ret != DISAS_NORETURN) { + set_psw_xb(ctx, ctx->psw_b_next ? PSW_B : 0); } } - ctx->iaoq_f = ctx->iaoq_b; - ctx->iaoq_b = ctx->iaoq_n; - ctx->base.pc_next += 4; - switch (ret) { - case DISAS_NORETURN: - case DISAS_IAQ_N_UPDATED: - break; - - case DISAS_NEXT: - case DISAS_IAQ_N_STALE: - case DISAS_IAQ_N_STALE_EXIT: - if (ctx->iaoq_f == -1) { - copy_iaoq_entry(ctx, cpu_iaoq_f, -1, cpu_iaoq_b); - copy_iaoq_entry(ctx, cpu_iaoq_b, ctx->iaoq_n, ctx->iaoq_n_var); -#ifndef CONFIG_USER_ONLY - tcg_gen_mov_i64(cpu_iasq_f, cpu_iasq_b); -#endif - nullify_save(ctx); - ctx->base.is_jmp = (ret == DISAS_IAQ_N_STALE_EXIT - ? DISAS_EXIT - : DISAS_IAQ_N_UPDATED); - } else if (ctx->iaoq_b == -1) { - copy_iaoq_entry(ctx, cpu_iaoq_b, -1, ctx->iaoq_n_var); - } - break; + /* If the TranslationBlock must end, do so. */ + ctx->base.pc_next += 4; + if (ret != DISAS_NEXT) { + return; + } + /* Note this also detects a priority change. */ + if (iaqe_variable(&ctx->iaq_b) + || ctx->iaq_b.disp != ctx->iaq_f.disp + 4) { + ctx->base.is_jmp = DISAS_IAQ_N_STALE; + return; + } - default: - g_assert_not_reached(); + /* + * Advance the insn queue. + * The only exit now is DISAS_TOO_MANY from the translator loop. + */ + ctx->iaq_f.disp = ctx->iaq_b.disp; + if (!ctx->iaq_n) { + ctx->iaq_b.disp += 4; + return; + } + /* + * If IAQ_Next is variable in any way, we need to copy into the + * IAQ_Back globals, in case the next insn raises an exception. + */ + if (ctx->iaq_n->base) { + copy_iaoq_entry(ctx, cpu_iaoq_b, ctx->iaq_n); + ctx->iaq_b.base = cpu_iaoq_b; + ctx->iaq_b.disp = 0; + } else { + ctx->iaq_b.disp = ctx->iaq_n->disp; + } + if (ctx->iaq_n->space) { + tcg_gen_mov_i64(cpu_iasq_b, ctx->iaq_n->space); + ctx->iaq_b.space = cpu_iasq_b; } } @@ -4790,56 +4780,82 @@ static void hppa_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs) { DisasContext *ctx = container_of(dcbase, DisasContext, base); DisasJumpType is_jmp = ctx->base.is_jmp; + /* Assume the insn queue has not been advanced. */ + DisasIAQE *f = &ctx->iaq_b; + DisasIAQE *b = ctx->iaq_n; switch (is_jmp) { case DISAS_NORETURN: break; case DISAS_TOO_MANY: + /* The insn queue has not been advanced. */ + f = &ctx->iaq_f; + b = &ctx->iaq_b; + /* FALLTHRU */ case DISAS_IAQ_N_STALE: + if (use_goto_tb(ctx, f, b) + && (ctx->null_cond.c == TCG_COND_NEVER + || ctx->null_cond.c == TCG_COND_ALWAYS)) { + nullify_set(ctx, ctx->null_cond.c == TCG_COND_ALWAYS); + gen_goto_tb(ctx, 0, f, b); + break; + } + /* FALLTHRU */ case DISAS_IAQ_N_STALE_EXIT: - copy_iaoq_entry(ctx, cpu_iaoq_f, ctx->iaoq_f, cpu_iaoq_f); - copy_iaoq_entry(ctx, cpu_iaoq_b, ctx->iaoq_b, cpu_iaoq_b); + install_iaq_entries(ctx, f, b); nullify_save(ctx); - /* FALLTHRU */ - case DISAS_IAQ_N_UPDATED: - if (is_jmp != DISAS_IAQ_N_STALE_EXIT) { - tcg_gen_lookup_and_goto_ptr(); + if (is_jmp == DISAS_IAQ_N_STALE_EXIT) { + tcg_gen_exit_tb(NULL, 0); break; } /* FALLTHRU */ + case DISAS_IAQ_N_UPDATED: + tcg_gen_lookup_and_goto_ptr(); + break; case DISAS_EXIT: tcg_gen_exit_tb(NULL, 0); break; default: g_assert_not_reached(); } + + for (DisasDelayException *e = ctx->delay_excp_list; e ; e = e->next) { + gen_set_label(e->lab); + if (e->set_n >= 0) { + tcg_gen_movi_i64(cpu_psw_n, e->set_n); + } + if (e->set_iir) { + tcg_gen_st_i64(tcg_constant_i64(e->insn), tcg_env, + offsetof(CPUHPPAState, cr[CR_IIR])); + } + install_iaq_entries(ctx, &e->iaq_f, &e->iaq_b); + gen_excp_1(e->excp); + } } -static void hppa_tr_disas_log(const DisasContextBase *dcbase, +#ifdef CONFIG_USER_ONLY +static bool hppa_tr_disas_log(const DisasContextBase *dcbase, CPUState *cs, FILE *logfile) { target_ulong pc = dcbase->pc_first; -#ifdef CONFIG_USER_ONLY switch (pc) { case 0x00: fprintf(logfile, "IN:\n0x00000000: (null)\n"); - return; + return true; case 0xb0: fprintf(logfile, "IN:\n0x000000b0: light-weight-syscall\n"); - return; + return true; case 0xe0: fprintf(logfile, "IN:\n0x000000e0: set-thread-pointer-syscall\n"); - return; + return true; case 0x100: fprintf(logfile, "IN:\n0x00000100: syscall\n"); - return; + return true; } -#endif - - fprintf(logfile, "IN: %s\n", lookup_symbol(pc)); - target_disas(logfile, cs, pc, dcbase->tb->size); + return false; } +#endif static const TranslatorOps hppa_tr_ops = { .init_disas_context = hppa_tr_init_disas_context, @@ -4847,12 +4863,14 @@ static const TranslatorOps hppa_tr_ops = { .insn_start = hppa_tr_insn_start, .translate_insn = hppa_tr_translate_insn, .tb_stop = hppa_tr_tb_stop, +#ifdef CONFIG_USER_ONLY .disas_log = hppa_tr_disas_log, +#endif }; void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns, vaddr pc, void *host_pc) { - DisasContext ctx; + DisasContext ctx = { }; translator_loop(cs, tb, max_insns, pc, host_pc, &hppa_tr_ops, &ctx.base); } diff --git a/target/i386/Kconfig b/target/i386/Kconfig index ce6968906e..6b0feef029 100644 --- a/target/i386/Kconfig +++ b/target/i386/Kconfig @@ -1,5 +1,9 @@ config I386 bool + select APIC + # kvm_arch_fixup_msi_route() needs to access PCIDevice + select PCI if KVM config X86_64 bool + select I386 diff --git a/target/i386/cpu-apic.c b/target/i386/cpu-apic.c new file mode 100644 index 0000000000..d397ec94dc --- /dev/null +++ b/target/i386/cpu-apic.c @@ -0,0 +1,112 @@ +/* + * QEMU x86 CPU <-> APIC + * + * Copyright (c) 2003-2004 Fabrice Bellard + * + * SPDX-License-Identifier: MIT + */ + +#include "qemu/osdep.h" +#include "qapi/qmp/qdict.h" +#include "qapi/error.h" +#include "monitor/monitor.h" +#include "monitor/hmp-target.h" +#include "sysemu/hw_accel.h" +#include "sysemu/kvm.h" +#include "sysemu/xen.h" +#include "exec/address-spaces.h" +#include "hw/qdev-properties.h" +#include "hw/i386/apic_internal.h" +#include "cpu-internal.h" + +APICCommonClass *apic_get_class(Error **errp) +{ + const char *apic_type = "apic"; + + /* TODO: in-kernel irqchip for hvf */ + if (kvm_enabled()) { + if (!kvm_irqchip_in_kernel()) { + error_setg(errp, "KVM does not support userspace APIC"); + return NULL; + } + apic_type = "kvm-apic"; + } else if (xen_enabled()) { + apic_type = "xen-apic"; + } else if (whpx_apic_in_platform()) { + apic_type = "whpx-apic"; + } + + return APIC_COMMON_CLASS(object_class_by_name(apic_type)); +} + +void x86_cpu_apic_create(X86CPU *cpu, Error **errp) +{ + APICCommonState *apic; + APICCommonClass *apic_class = apic_get_class(errp); + + if (!apic_class) { + return; + } + + cpu->apic_state = DEVICE(object_new_with_class(OBJECT_CLASS(apic_class))); + object_property_add_child(OBJECT(cpu), "lapic", + OBJECT(cpu->apic_state)); + object_unref(OBJECT(cpu->apic_state)); + + /* TODO: convert to link<> */ + apic = APIC_COMMON(cpu->apic_state); + apic->cpu = cpu; + apic->apicbase = APIC_DEFAULT_ADDRESS | MSR_IA32_APICBASE_ENABLE; + + /* + * apic_common_set_id needs to check if the CPU has x2APIC + * feature in case APIC ID >= 255, so we need to set apic->cpu + * before setting APIC ID + */ + qdev_prop_set_uint32(cpu->apic_state, "id", cpu->apic_id); +} + +void x86_cpu_apic_realize(X86CPU *cpu, Error **errp) +{ + APICCommonState *apic; + static bool apic_mmio_map_once; + + if (cpu->apic_state == NULL) { + return; + } + qdev_realize(DEVICE(cpu->apic_state), NULL, errp); + + /* Map APIC MMIO area */ + apic = APIC_COMMON(cpu->apic_state); + if (!apic_mmio_map_once) { + memory_region_add_subregion_overlap(get_system_memory(), + apic->apicbase & + MSR_IA32_APICBASE_BASE, + &apic->io_memory, + 0x1000); + apic_mmio_map_once = true; + } +} + +void hmp_info_local_apic(Monitor *mon, const QDict *qdict) +{ + CPUState *cs; + + if (qdict_haskey(qdict, "apic-id")) { + int id = qdict_get_try_int(qdict, "apic-id", 0); + + cs = cpu_by_arch_id(id); + if (cs) { + cpu_synchronize_state(cs); + } + } else { + cs = mon_get_cpu(mon); + } + + + if (!cs) { + monitor_printf(mon, "No CPU available\n"); + return; + } + x86_cpu_dump_local_apic_state(cs, CPU_DUMP_FPU); +} diff --git a/target/i386/cpu-param.h b/target/i386/cpu-param.h index 911b4cd51b..5e15335203 100644 --- a/target/i386/cpu-param.h +++ b/target/i386/cpu-param.h @@ -24,4 +24,7 @@ #endif #define TARGET_PAGE_BITS 12 +/* The x86 has a strong memory model with some store-after-load re-ordering */ +#define TCG_GUEST_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD) + #endif diff --git a/target/i386/cpu-sysemu.c b/target/i386/cpu-sysemu.c index 3f9093d285..227ac021f6 100644 --- a/target/i386/cpu-sysemu.c +++ b/target/i386/cpu-sysemu.c @@ -19,19 +19,12 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "sysemu/kvm.h" -#include "sysemu/xen.h" -#include "sysemu/whpx.h" #include "qapi/error.h" #include "qapi/qapi-visit-run-state.h" #include "qapi/qmp/qdict.h" #include "qapi/qobject-input-visitor.h" #include "qom/qom-qobject.h" #include "qapi/qapi-commands-machine-target.h" -#include "hw/qdev-properties.h" - -#include "exec/address-spaces.h" -#include "hw/i386/apic_internal.h" #include "cpu-internal.h" @@ -273,75 +266,6 @@ void x86_cpu_machine_reset_cb(void *opaque) cpu_reset(CPU(cpu)); } -APICCommonClass *apic_get_class(Error **errp) -{ - const char *apic_type = "apic"; - - /* TODO: in-kernel irqchip for hvf */ - if (kvm_enabled()) { - if (!kvm_irqchip_in_kernel()) { - error_setg(errp, "KVM does not support userspace APIC"); - return NULL; - } - apic_type = "kvm-apic"; - } else if (xen_enabled()) { - apic_type = "xen-apic"; - } else if (whpx_apic_in_platform()) { - apic_type = "whpx-apic"; - } - - return APIC_COMMON_CLASS(object_class_by_name(apic_type)); -} - -void x86_cpu_apic_create(X86CPU *cpu, Error **errp) -{ - APICCommonState *apic; - APICCommonClass *apic_class = apic_get_class(errp); - - if (!apic_class) { - return; - } - - cpu->apic_state = DEVICE(object_new_with_class(OBJECT_CLASS(apic_class))); - object_property_add_child(OBJECT(cpu), "lapic", - OBJECT(cpu->apic_state)); - object_unref(OBJECT(cpu->apic_state)); - - /* TODO: convert to link<> */ - apic = APIC_COMMON(cpu->apic_state); - apic->cpu = cpu; - apic->apicbase = APIC_DEFAULT_ADDRESS | MSR_IA32_APICBASE_ENABLE; - - /* - * apic_common_set_id needs to check if the CPU has x2APIC - * feature in case APIC ID >= 255, so we need to set apic->cpu - * before setting APIC ID - */ - qdev_prop_set_uint32(cpu->apic_state, "id", cpu->apic_id); -} - -void x86_cpu_apic_realize(X86CPU *cpu, Error **errp) -{ - APICCommonState *apic; - static bool apic_mmio_map_once; - - if (cpu->apic_state == NULL) { - return; - } - qdev_realize(DEVICE(cpu->apic_state), NULL, errp); - - /* Map APIC MMIO area */ - apic = APIC_COMMON(cpu->apic_state); - if (!apic_mmio_map_once) { - memory_region_add_subregion_overlap(get_system_memory(), - apic->apicbase & - MSR_IA32_APICBASE_BASE, - &apic->io_memory, - 0x1000); - apic_mmio_map_once = true; - } -} - GuestPanicInformation *x86_cpu_get_crash_info(CPUState *cs) { X86CPU *cpu = X86_CPU(cs); @@ -385,4 +309,3 @@ void x86_cpu_get_crash_info_qom(Object *obj, Visitor *v, errp); qapi_free_GuestPanicInformation(panic_info); } - diff --git a/target/i386/cpu.c b/target/i386/cpu.c index fa1ea3735d..bc2dceb647 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -235,22 +235,53 @@ static uint8_t cpuid2_cache_descriptor(CPUCacheInfo *cache) ((t) == UNIFIED_CACHE) ? CACHE_TYPE_UNIFIED : \ 0 /* Invalid value */) +static uint32_t max_thread_ids_for_cache(X86CPUTopoInfo *topo_info, + enum CPUTopoLevel share_level) +{ + uint32_t num_ids = 0; + + switch (share_level) { + case CPU_TOPO_LEVEL_CORE: + num_ids = 1 << apicid_core_offset(topo_info); + break; + case CPU_TOPO_LEVEL_DIE: + num_ids = 1 << apicid_die_offset(topo_info); + break; + case CPU_TOPO_LEVEL_PACKAGE: + num_ids = 1 << apicid_pkg_offset(topo_info); + break; + default: + /* + * Currently there is no use case for SMT and MODULE, so use + * assert directly to facilitate debugging. + */ + g_assert_not_reached(); + } + + return num_ids - 1; +} + +static uint32_t max_core_ids_in_package(X86CPUTopoInfo *topo_info) +{ + uint32_t num_cores = 1 << (apicid_pkg_offset(topo_info) - + apicid_core_offset(topo_info)); + return num_cores - 1; +} /* Encode cache info for CPUID[4] */ static void encode_cache_cpuid4(CPUCacheInfo *cache, - int num_apic_ids, int num_cores, + X86CPUTopoInfo *topo_info, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) { assert(cache->size == cache->line_size * cache->associativity * cache->partitions * cache->sets); - assert(num_apic_ids > 0); *eax = CACHE_TYPE(cache->type) | CACHE_LEVEL(cache->level) | (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0) | - ((num_cores - 1) << 26) | - ((num_apic_ids - 1) << 14); + (max_core_ids_in_package(topo_info) << 26) | + (max_thread_ids_for_cache(topo_info, cache->share_level) << 14); assert(cache->line_size > 0); assert(cache->partitions > 0); @@ -269,6 +300,122 @@ static void encode_cache_cpuid4(CPUCacheInfo *cache, (cache->complex_indexing ? CACHE_COMPLEX_IDX : 0); } +static uint32_t num_threads_by_topo_level(X86CPUTopoInfo *topo_info, + enum CPUTopoLevel topo_level) +{ + switch (topo_level) { + case CPU_TOPO_LEVEL_SMT: + return 1; + case CPU_TOPO_LEVEL_CORE: + return topo_info->threads_per_core; + case CPU_TOPO_LEVEL_MODULE: + return topo_info->threads_per_core * topo_info->cores_per_module; + case CPU_TOPO_LEVEL_DIE: + return topo_info->threads_per_core * topo_info->cores_per_module * + topo_info->modules_per_die; + case CPU_TOPO_LEVEL_PACKAGE: + return topo_info->threads_per_core * topo_info->cores_per_module * + topo_info->modules_per_die * topo_info->dies_per_pkg; + default: + g_assert_not_reached(); + } + return 0; +} + +static uint32_t apicid_offset_by_topo_level(X86CPUTopoInfo *topo_info, + enum CPUTopoLevel topo_level) +{ + switch (topo_level) { + case CPU_TOPO_LEVEL_SMT: + return 0; + case CPU_TOPO_LEVEL_CORE: + return apicid_core_offset(topo_info); + case CPU_TOPO_LEVEL_MODULE: + return apicid_module_offset(topo_info); + case CPU_TOPO_LEVEL_DIE: + return apicid_die_offset(topo_info); + case CPU_TOPO_LEVEL_PACKAGE: + return apicid_pkg_offset(topo_info); + default: + g_assert_not_reached(); + } + return 0; +} + +static uint32_t cpuid1f_topo_type(enum CPUTopoLevel topo_level) +{ + switch (topo_level) { + case CPU_TOPO_LEVEL_INVALID: + return CPUID_1F_ECX_TOPO_LEVEL_INVALID; + case CPU_TOPO_LEVEL_SMT: + return CPUID_1F_ECX_TOPO_LEVEL_SMT; + case CPU_TOPO_LEVEL_CORE: + return CPUID_1F_ECX_TOPO_LEVEL_CORE; + case CPU_TOPO_LEVEL_MODULE: + return CPUID_1F_ECX_TOPO_LEVEL_MODULE; + case CPU_TOPO_LEVEL_DIE: + return CPUID_1F_ECX_TOPO_LEVEL_DIE; + default: + /* Other types are not supported in QEMU. */ + g_assert_not_reached(); + } + return 0; +} + +static void encode_topo_cpuid1f(CPUX86State *env, uint32_t count, + X86CPUTopoInfo *topo_info, + uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx) +{ + X86CPU *cpu = env_archcpu(env); + unsigned long level, next_level; + uint32_t num_threads_next_level, offset_next_level; + + assert(count + 1 < CPU_TOPO_LEVEL_MAX); + + /* + * Find the No.(count + 1) topology level in avail_cpu_topo bitmap. + * The search starts from bit 1 (CPU_TOPO_LEVEL_INVALID + 1). + */ + level = CPU_TOPO_LEVEL_INVALID; + for (int i = 0; i <= count; i++) { + level = find_next_bit(env->avail_cpu_topo, + CPU_TOPO_LEVEL_PACKAGE, + level + 1); + + /* + * CPUID[0x1f] doesn't explicitly encode the package level, + * and it just encodes the invalid level (all fields are 0) + * into the last subleaf of 0x1f. + */ + if (level == CPU_TOPO_LEVEL_PACKAGE) { + level = CPU_TOPO_LEVEL_INVALID; + break; + } + } + + if (level == CPU_TOPO_LEVEL_INVALID) { + num_threads_next_level = 0; + offset_next_level = 0; + } else { + next_level = find_next_bit(env->avail_cpu_topo, + CPU_TOPO_LEVEL_PACKAGE, + level + 1); + num_threads_next_level = num_threads_by_topo_level(topo_info, + next_level); + offset_next_level = apicid_offset_by_topo_level(topo_info, + next_level); + } + + *eax = offset_next_level; + /* The count (bits 15-00) doesn't need to be reliable. */ + *ebx = num_threads_next_level & 0xffff; + *ecx = (count & 0xff) | (cpuid1f_topo_type(level) << 8); + *edx = cpu->apic_id; + + assert(!(*eax & ~0x1f)); +} + /* Encode cache info for CPUID[0x80000005].ECX or CPUID[0x80000005].EDX */ static uint32_t encode_cache_cpuid80000005(CPUCacheInfo *cache) { @@ -331,20 +478,12 @@ static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) { - uint32_t l3_threads; assert(cache->size == cache->line_size * cache->associativity * cache->partitions * cache->sets); *eax = CACHE_TYPE(cache->type) | CACHE_LEVEL(cache->level) | (cache->self_init ? CACHE_SELF_INIT_LEVEL : 0); - - /* L3 is shared among multiple cores */ - if (cache->level == 3) { - l3_threads = topo_info->cores_per_die * topo_info->threads_per_core; - *eax |= (l3_threads - 1) << 14; - } else { - *eax |= ((topo_info->threads_per_core - 1) << 14); - } + *eax |= max_thread_ids_for_cache(topo_info, cache->share_level) << 14; assert(cache->line_size > 0); assert(cache->partitions > 0); @@ -398,12 +537,9 @@ static void encode_topo_cpuid8000001e(X86CPU *cpu, X86CPUTopoInfo *topo_info, * 31:11 Reserved. * 10:8 NodesPerProcessor: Node per processor. Read-only. Reset: XXXb. * ValidValues: - * Value Description - * 000b 1 node per processor. - * 001b 2 nodes per processor. - * 010b Reserved. - * 011b 4 nodes per processor. - * 111b-100b Reserved. + * Value Description + * 0h 1 node per processor. + * 7h-1h Reserved. * 7:0 NodeId: Node ID. Read-only. Reset: XXh. * * NOTE: Hardware reserves 3 bits for number of nodes per processor. @@ -412,8 +548,12 @@ static void encode_topo_cpuid8000001e(X86CPU *cpu, X86CPUTopoInfo *topo_info, * NodeId is combination of node and socket_id which is already decoded * in apic_id. Just use it by shifting. */ - *ecx = ((topo_info->dies_per_pkg - 1) << 8) | - ((cpu->apic_id >> apicid_die_offset(topo_info)) & 0xFF); + if (cpu->legacy_multi_node) { + *ecx = ((topo_info->dies_per_pkg - 1) << 8) | + ((cpu->apic_id >> apicid_die_offset(topo_info)) & 0xFF); + } else { + *ecx = (cpu->apic_id >> apicid_pkg_offset(topo_info)) & 0xFF; + } *edx = 0; } @@ -435,6 +575,7 @@ static CPUCacheInfo legacy_l1d_cache = { .sets = 64, .partitions = 1, .no_invd_sharing = true, + .share_level = CPU_TOPO_LEVEL_CORE, }; /*FIXME: CPUID leaf 0x80000005 is inconsistent with leaves 2 & 4 */ @@ -449,6 +590,7 @@ static CPUCacheInfo legacy_l1d_cache_amd = { .partitions = 1, .lines_per_tag = 1, .no_invd_sharing = true, + .share_level = CPU_TOPO_LEVEL_CORE, }; /* L1 instruction cache: */ @@ -462,6 +604,7 @@ static CPUCacheInfo legacy_l1i_cache = { .sets = 64, .partitions = 1, .no_invd_sharing = true, + .share_level = CPU_TOPO_LEVEL_CORE, }; /*FIXME: CPUID leaf 0x80000005 is inconsistent with leaves 2 & 4 */ @@ -476,6 +619,7 @@ static CPUCacheInfo legacy_l1i_cache_amd = { .partitions = 1, .lines_per_tag = 1, .no_invd_sharing = true, + .share_level = CPU_TOPO_LEVEL_CORE, }; /* Level 2 unified cache: */ @@ -489,6 +633,7 @@ static CPUCacheInfo legacy_l2_cache = { .sets = 4096, .partitions = 1, .no_invd_sharing = true, + .share_level = CPU_TOPO_LEVEL_CORE, }; /*FIXME: CPUID leaf 2 descriptor is inconsistent with CPUID leaf 4 */ @@ -498,6 +643,7 @@ static CPUCacheInfo legacy_l2_cache_cpuid2 = { .size = 2 * MiB, .line_size = 64, .associativity = 8, + .share_level = CPU_TOPO_LEVEL_INVALID, }; @@ -511,6 +657,7 @@ static CPUCacheInfo legacy_l2_cache_amd = { .associativity = 16, .sets = 512, .partitions = 1, + .share_level = CPU_TOPO_LEVEL_CORE, }; /* Level 3 unified cache: */ @@ -526,6 +673,7 @@ static CPUCacheInfo legacy_l3_cache = { .self_init = true, .inclusive = true, .complex_indexing = true, + .share_level = CPU_TOPO_LEVEL_DIE, }; /* TLB definitions: */ @@ -712,7 +860,7 @@ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, #endif #define TCG_7_0_EBX_FEATURES (CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_SMAP | \ CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ADX | \ - CPUID_7_0_EBX_PCOMMIT | CPUID_7_0_EBX_CLFLUSHOPT | \ + CPUID_7_0_EBX_CLFLUSHOPT | \ CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_MPX | CPUID_7_0_EBX_FSGSBASE | \ CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_RDSEED | \ CPUID_7_0_EBX_SHA_NI | CPUID_7_0_EBX_KERNEL_FEATURES) @@ -968,7 +1116,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { "fsrc", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "amx-fp16", NULL, "avx-ifma", - NULL, NULL, NULL, NULL, + NULL, NULL, "lam", NULL, NULL, NULL, NULL, NULL, }, .cpuid = { @@ -1550,8 +1698,8 @@ static FeatureDep feature_dependencies[] = { .to = { FEAT_SVM, ~0ull }, }, { - .from = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_USER_WAIT_PAUSE }, - .to = { FEAT_7_0_ECX, CPUID_7_0_ECX_WAITPKG }, + .from = { FEAT_7_0_ECX, CPUID_7_0_ECX_WAITPKG }, + .to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_USER_WAIT_PAUSE }, }, }; @@ -1824,6 +1972,7 @@ static const CPUCaches epyc_cache_info = { .lines_per_tag = 1, .self_init = 1, .no_invd_sharing = true, + .share_level = CPU_TOPO_LEVEL_CORE, }, .l1i_cache = &(CPUCacheInfo) { .type = INSTRUCTION_CACHE, @@ -1836,6 +1985,7 @@ static const CPUCaches epyc_cache_info = { .lines_per_tag = 1, .self_init = 1, .no_invd_sharing = true, + .share_level = CPU_TOPO_LEVEL_CORE, }, .l2_cache = &(CPUCacheInfo) { .type = UNIFIED_CACHE, @@ -1846,6 +1996,7 @@ static const CPUCaches epyc_cache_info = { .partitions = 1, .sets = 1024, .lines_per_tag = 1, + .share_level = CPU_TOPO_LEVEL_CORE, }, .l3_cache = &(CPUCacheInfo) { .type = UNIFIED_CACHE, @@ -1859,6 +2010,7 @@ static const CPUCaches epyc_cache_info = { .self_init = true, .inclusive = true, .complex_indexing = true, + .share_level = CPU_TOPO_LEVEL_DIE, }, }; @@ -1874,6 +2026,7 @@ static CPUCaches epyc_v4_cache_info = { .lines_per_tag = 1, .self_init = 1, .no_invd_sharing = true, + .share_level = CPU_TOPO_LEVEL_CORE, }, .l1i_cache = &(CPUCacheInfo) { .type = INSTRUCTION_CACHE, @@ -1886,6 +2039,7 @@ static CPUCaches epyc_v4_cache_info = { .lines_per_tag = 1, .self_init = 1, .no_invd_sharing = true, + .share_level = CPU_TOPO_LEVEL_CORE, }, .l2_cache = &(CPUCacheInfo) { .type = UNIFIED_CACHE, @@ -1896,6 +2050,7 @@ static CPUCaches epyc_v4_cache_info = { .partitions = 1, .sets = 1024, .lines_per_tag = 1, + .share_level = CPU_TOPO_LEVEL_CORE, }, .l3_cache = &(CPUCacheInfo) { .type = UNIFIED_CACHE, @@ -1909,6 +2064,7 @@ static CPUCaches epyc_v4_cache_info = { .self_init = true, .inclusive = true, .complex_indexing = false, + .share_level = CPU_TOPO_LEVEL_DIE, }, }; @@ -1924,6 +2080,7 @@ static const CPUCaches epyc_rome_cache_info = { .lines_per_tag = 1, .self_init = 1, .no_invd_sharing = true, + .share_level = CPU_TOPO_LEVEL_CORE, }, .l1i_cache = &(CPUCacheInfo) { .type = INSTRUCTION_CACHE, @@ -1936,6 +2093,7 @@ static const CPUCaches epyc_rome_cache_info = { .lines_per_tag = 1, .self_init = 1, .no_invd_sharing = true, + .share_level = CPU_TOPO_LEVEL_CORE, }, .l2_cache = &(CPUCacheInfo) { .type = UNIFIED_CACHE, @@ -1946,6 +2104,7 @@ static const CPUCaches epyc_rome_cache_info = { .partitions = 1, .sets = 1024, .lines_per_tag = 1, + .share_level = CPU_TOPO_LEVEL_CORE, }, .l3_cache = &(CPUCacheInfo) { .type = UNIFIED_CACHE, @@ -1959,6 +2118,7 @@ static const CPUCaches epyc_rome_cache_info = { .self_init = true, .inclusive = true, .complex_indexing = true, + .share_level = CPU_TOPO_LEVEL_DIE, }, }; @@ -1974,6 +2134,7 @@ static const CPUCaches epyc_rome_v3_cache_info = { .lines_per_tag = 1, .self_init = 1, .no_invd_sharing = true, + .share_level = CPU_TOPO_LEVEL_CORE, }, .l1i_cache = &(CPUCacheInfo) { .type = INSTRUCTION_CACHE, @@ -1986,6 +2147,7 @@ static const CPUCaches epyc_rome_v3_cache_info = { .lines_per_tag = 1, .self_init = 1, .no_invd_sharing = true, + .share_level = CPU_TOPO_LEVEL_CORE, }, .l2_cache = &(CPUCacheInfo) { .type = UNIFIED_CACHE, @@ -1996,6 +2158,7 @@ static const CPUCaches epyc_rome_v3_cache_info = { .partitions = 1, .sets = 1024, .lines_per_tag = 1, + .share_level = CPU_TOPO_LEVEL_CORE, }, .l3_cache = &(CPUCacheInfo) { .type = UNIFIED_CACHE, @@ -2009,6 +2172,7 @@ static const CPUCaches epyc_rome_v3_cache_info = { .self_init = true, .inclusive = true, .complex_indexing = false, + .share_level = CPU_TOPO_LEVEL_DIE, }, }; @@ -2024,6 +2188,7 @@ static const CPUCaches epyc_milan_cache_info = { .lines_per_tag = 1, .self_init = 1, .no_invd_sharing = true, + .share_level = CPU_TOPO_LEVEL_CORE, }, .l1i_cache = &(CPUCacheInfo) { .type = INSTRUCTION_CACHE, @@ -2036,6 +2201,7 @@ static const CPUCaches epyc_milan_cache_info = { .lines_per_tag = 1, .self_init = 1, .no_invd_sharing = true, + .share_level = CPU_TOPO_LEVEL_CORE, }, .l2_cache = &(CPUCacheInfo) { .type = UNIFIED_CACHE, @@ -2046,6 +2212,7 @@ static const CPUCaches epyc_milan_cache_info = { .partitions = 1, .sets = 1024, .lines_per_tag = 1, + .share_level = CPU_TOPO_LEVEL_CORE, }, .l3_cache = &(CPUCacheInfo) { .type = UNIFIED_CACHE, @@ -2059,6 +2226,7 @@ static const CPUCaches epyc_milan_cache_info = { .self_init = true, .inclusive = true, .complex_indexing = true, + .share_level = CPU_TOPO_LEVEL_DIE, }, }; @@ -2074,6 +2242,7 @@ static const CPUCaches epyc_milan_v2_cache_info = { .lines_per_tag = 1, .self_init = 1, .no_invd_sharing = true, + .share_level = CPU_TOPO_LEVEL_CORE, }, .l1i_cache = &(CPUCacheInfo) { .type = INSTRUCTION_CACHE, @@ -2086,6 +2255,7 @@ static const CPUCaches epyc_milan_v2_cache_info = { .lines_per_tag = 1, .self_init = 1, .no_invd_sharing = true, + .share_level = CPU_TOPO_LEVEL_CORE, }, .l2_cache = &(CPUCacheInfo) { .type = UNIFIED_CACHE, @@ -2096,6 +2266,7 @@ static const CPUCaches epyc_milan_v2_cache_info = { .partitions = 1, .sets = 1024, .lines_per_tag = 1, + .share_level = CPU_TOPO_LEVEL_CORE, }, .l3_cache = &(CPUCacheInfo) { .type = UNIFIED_CACHE, @@ -2109,6 +2280,7 @@ static const CPUCaches epyc_milan_v2_cache_info = { .self_init = true, .inclusive = true, .complex_indexing = false, + .share_level = CPU_TOPO_LEVEL_DIE, }, }; @@ -2124,6 +2296,7 @@ static const CPUCaches epyc_genoa_cache_info = { .lines_per_tag = 1, .self_init = 1, .no_invd_sharing = true, + .share_level = CPU_TOPO_LEVEL_CORE, }, .l1i_cache = &(CPUCacheInfo) { .type = INSTRUCTION_CACHE, @@ -2136,6 +2309,7 @@ static const CPUCaches epyc_genoa_cache_info = { .lines_per_tag = 1, .self_init = 1, .no_invd_sharing = true, + .share_level = CPU_TOPO_LEVEL_CORE, }, .l2_cache = &(CPUCacheInfo) { .type = UNIFIED_CACHE, @@ -2146,6 +2320,7 @@ static const CPUCaches epyc_genoa_cache_info = { .partitions = 1, .sets = 2048, .lines_per_tag = 1, + .share_level = CPU_TOPO_LEVEL_CORE, }, .l3_cache = &(CPUCacheInfo) { .type = UNIFIED_CACHE, @@ -2159,6 +2334,7 @@ static const CPUCaches epyc_genoa_cache_info = { .self_init = true, .inclusive = true, .complex_indexing = false, + .share_level = CPU_TOPO_LEVEL_DIE, }, }; @@ -3970,6 +4146,17 @@ static const X86CPUDefinition builtin_x86_defs[] = { { /* end of list */ } } }, + { + .version = 3, + .props = (PropValue[]) { + { "ss", "on" }, + { "tsc-adjust", "on" }, + { "cldemote", "on" }, + { "movdiri", "on" }, + { "movdir64b", "on" }, + { /* end of list */ } + } + }, { /* end of list */ } } }, @@ -5708,7 +5895,7 @@ static void x86_cpu_list_entry(gpointer data, gpointer user_data) desc = g_strdup_printf("%s (deprecated)", olddesc); } - qemu_printf("x86 %-20s %s\n", name, desc); + qemu_printf(" %-20s %s\n", name, desc); } /* list available CPU models and flags */ @@ -6150,15 +6337,21 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, { X86CPU *cpu = env_archcpu(env); CPUState *cs = env_cpu(env); - uint32_t die_offset; uint32_t limit; uint32_t signature[3]; X86CPUTopoInfo topo_info; + uint32_t cores_per_pkg; + uint32_t threads_per_pkg; topo_info.dies_per_pkg = env->nr_dies; - topo_info.cores_per_die = cs->nr_cores / env->nr_dies; + topo_info.modules_per_die = env->nr_modules; + topo_info.cores_per_module = cs->nr_cores / env->nr_dies / env->nr_modules; topo_info.threads_per_core = cs->nr_threads; + cores_per_pkg = topo_info.cores_per_module * topo_info.modules_per_die * + topo_info.dies_per_pkg; + threads_per_pkg = cores_per_pkg * topo_info.threads_per_core; + /* Calculate & apply limits for different index ranges */ if (index >= 0xC0000000) { limit = env->cpuid_xlevel2; @@ -6194,8 +6387,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *ecx |= CPUID_EXT_OSXSAVE; } *edx = env->features[FEAT_1_EDX]; - if (cs->nr_cores * cs->nr_threads > 1) { - *ebx |= (cs->nr_cores * cs->nr_threads) << 16; + if (threads_per_pkg > 1) { + *ebx |= threads_per_pkg << 16; *edx |= CPUID_HT; } if (!cpu->enable_pmu) { @@ -6232,41 +6425,50 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, */ if (*eax & 31) { int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14); - int vcpus_per_socket = cs->nr_cores * cs->nr_threads; - if (cs->nr_cores > 1) { + + if (cores_per_pkg > 1) { *eax &= ~0xFC000000; - *eax |= (pow2ceil(cs->nr_cores) - 1) << 26; + *eax |= max_core_ids_in_package(&topo_info) << 26; } - if (host_vcpus_per_cache > vcpus_per_socket) { + if (host_vcpus_per_cache > threads_per_pkg) { *eax &= ~0x3FFC000; - *eax |= (pow2ceil(vcpus_per_socket) - 1) << 14; + + /* Share the cache at package level. */ + *eax |= max_thread_ids_for_cache(&topo_info, + CPU_TOPO_LEVEL_PACKAGE) << 14; } } } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) { *eax = *ebx = *ecx = *edx = 0; } else { *eax = 0; + switch (count) { case 0: /* L1 dcache info */ encode_cache_cpuid4(env->cache_info_cpuid4.l1d_cache, - 1, cs->nr_cores, + &topo_info, eax, ebx, ecx, edx); + if (!cpu->l1_cache_per_core) { + *eax &= ~MAKE_64BIT_MASK(14, 12); + } break; case 1: /* L1 icache info */ encode_cache_cpuid4(env->cache_info_cpuid4.l1i_cache, - 1, cs->nr_cores, + &topo_info, eax, ebx, ecx, edx); + if (!cpu->l1_cache_per_core) { + *eax &= ~MAKE_64BIT_MASK(14, 12); + } break; case 2: /* L2 cache info */ encode_cache_cpuid4(env->cache_info_cpuid4.l2_cache, - cs->nr_threads, cs->nr_cores, + &topo_info, eax, ebx, ecx, edx); break; case 3: /* L3 cache info */ - die_offset = apicid_die_offset(&topo_info); if (cpu->enable_l3_cache) { encode_cache_cpuid4(env->cache_info_cpuid4.l3_cache, - (1 << die_offset), cs->nr_cores, + &topo_info, eax, ebx, ecx, edx); break; } @@ -6369,18 +6571,18 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, switch (count) { case 0: *eax = apicid_core_offset(&topo_info); - *ebx = cs->nr_threads; - *ecx |= CPUID_TOPOLOGY_LEVEL_SMT; + *ebx = topo_info.threads_per_core; + *ecx |= CPUID_B_ECX_TOPO_LEVEL_SMT << 8; break; case 1: *eax = apicid_pkg_offset(&topo_info); - *ebx = cs->nr_cores * cs->nr_threads; - *ecx |= CPUID_TOPOLOGY_LEVEL_CORE; + *ebx = threads_per_pkg; + *ecx |= CPUID_B_ECX_TOPO_LEVEL_CORE << 8; break; default: *eax = 0; *ebx = 0; - *ecx |= CPUID_TOPOLOGY_LEVEL_INVALID; + *ecx |= CPUID_B_ECX_TOPO_LEVEL_INVALID << 8; } assert(!(*eax & ~0x1f)); @@ -6394,36 +6596,12 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, break; case 0x1F: /* V2 Extended Topology Enumeration Leaf */ - if (env->nr_dies < 2) { + if (!x86_has_extended_topo(env->avail_cpu_topo)) { *eax = *ebx = *ecx = *edx = 0; break; } - *ecx = count & 0xff; - *edx = cpu->apic_id; - switch (count) { - case 0: - *eax = apicid_core_offset(&topo_info); - *ebx = cs->nr_threads; - *ecx |= CPUID_TOPOLOGY_LEVEL_SMT; - break; - case 1: - *eax = apicid_die_offset(&topo_info); - *ebx = topo_info.cores_per_die * topo_info.threads_per_core; - *ecx |= CPUID_TOPOLOGY_LEVEL_CORE; - break; - case 2: - *eax = apicid_pkg_offset(&topo_info); - *ebx = cs->nr_cores * cs->nr_threads; - *ecx |= CPUID_TOPOLOGY_LEVEL_DIE; - break; - default: - *eax = 0; - *ebx = 0; - *ecx |= CPUID_TOPOLOGY_LEVEL_INVALID; - } - assert(!(*eax & ~0x1f)); - *ebx &= 0xffff; /* The count doesn't need to be reliable. */ + encode_topo_cpuid1f(env, count, &topo_info, eax, ebx, ecx, edx); break; case 0xD: { /* Processor Extended State */ @@ -6642,7 +6820,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, * discards multiple thread information if it is set. * So don't set it here for Intel to make Linux guests happy. */ - if (cs->nr_cores * cs->nr_threads > 1) { + if (threads_per_pkg > 1) { if (env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1 || env->cpuid_vendor2 != CPUID_VENDOR_INTEL_2 || env->cpuid_vendor3 != CPUID_VENDOR_INTEL_3) { @@ -6709,7 +6887,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *eax |= (cpu->guest_phys_bits << 16); } *ebx = env->features[FEAT_8000_0008_EBX]; - if (cs->nr_cores * cs->nr_threads > 1) { + if (threads_per_pkg > 1) { /* * Bits 15:12 is "The number of bits in the initial * Core::X86::Apic::ApicId[ApicId] value that indicate @@ -6717,7 +6895,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, * Bits 7:0 is "The number of threads in the package is NC+1" */ *ecx = (apicid_pkg_offset(&topo_info) << 12) | - ((cs->nr_cores * cs->nr_threads) - 1); + (threads_per_pkg - 1); } else { *ecx = 0; } @@ -7227,7 +7405,7 @@ void x86_cpu_expand_features(X86CPU *cpu, Error **errp) * cpu->vendor_cpuid_only has been unset for compatibility with older * machine types. */ - if ((env->nr_dies > 1) && + if (x86_has_extended_topo(env->avail_cpu_topo) && (IS_INTEL_CPU(env) || !cpu->vendor_cpuid_only)) { x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x1F); } @@ -7371,7 +7549,7 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) #if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY) /* Use pc-relative instructions in system-mode */ - cs->tcg_cflags |= CF_PCREL; + tcg_cflags_set(cs, CF_PCREL); #endif if (cpu->apic_id == UNASSIGNED_APIC_ID) { @@ -7750,13 +7928,26 @@ static void x86_cpu_post_initfn(Object *obj) accel_cpu_instance_init(CPU(obj)); } +static void x86_cpu_init_default_topo(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + + env->nr_modules = 1; + env->nr_dies = 1; + + /* SMT, core and package levels are set by default. */ + set_bit(CPU_TOPO_LEVEL_SMT, env->avail_cpu_topo); + set_bit(CPU_TOPO_LEVEL_CORE, env->avail_cpu_topo); + set_bit(CPU_TOPO_LEVEL_PACKAGE, env->avail_cpu_topo); +} + static void x86_cpu_initfn(Object *obj) { X86CPU *cpu = X86_CPU(obj); X86CPUClass *xcc = X86_CPU_GET_CLASS(obj); CPUX86State *env = &cpu->env; - env->nr_dies = 1; + x86_cpu_init_default_topo(cpu); object_property_add(obj, "feature-words", "X86CPUFeatureWordInfo", x86_cpu_get_feature_words, @@ -7963,12 +8154,14 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_UINT32("apic-id", X86CPU, apic_id, 0), DEFINE_PROP_INT32("thread-id", X86CPU, thread_id, 0), DEFINE_PROP_INT32("core-id", X86CPU, core_id, 0), + DEFINE_PROP_INT32("module-id", X86CPU, module_id, 0), DEFINE_PROP_INT32("die-id", X86CPU, die_id, 0), DEFINE_PROP_INT32("socket-id", X86CPU, socket_id, 0), #else DEFINE_PROP_UINT32("apic-id", X86CPU, apic_id, UNASSIGNED_APIC_ID), DEFINE_PROP_INT32("thread-id", X86CPU, thread_id, -1), DEFINE_PROP_INT32("core-id", X86CPU, core_id, -1), + DEFINE_PROP_INT32("module-id", X86CPU, module_id, -1), DEFINE_PROP_INT32("die-id", X86CPU, die_id, -1), DEFINE_PROP_INT32("socket-id", X86CPU, socket_id, -1), #endif @@ -8073,6 +8266,7 @@ static Property x86_cpu_properties[] = { * own cache information (see x86_cpu_load_def()). */ DEFINE_PROP_BOOL("legacy-cache", X86CPU, legacy_cache, true), + DEFINE_PROP_BOOL("legacy-multi-node", X86CPU, legacy_multi_node, false), DEFINE_PROP_BOOL("xen-vapic", X86CPU, xen_vapic, false), /* @@ -8092,6 +8286,7 @@ static Property x86_cpu_properties[] = { false), DEFINE_PROP_BOOL("x-intel-pt-auto-level", X86CPU, intel_pt_auto_level, true), + DEFINE_PROP_BOOL("x-l1-cache-per-thread", X86CPU, l1_cache_per_core, true), DEFINE_PROP_END_OF_LIST() }; diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 6112e27bfd..c64ef0c1a2 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -24,15 +24,13 @@ #include "cpu-qom.h" #include "kvm/hyperv-proto.h" #include "exec/cpu-defs.h" +#include "hw/i386/topology.h" #include "qapi/qapi-types-common.h" #include "qemu/cpu-float.h" #include "qemu/timer.h" #define XEN_NR_VIRQS 24 -/* The x86 has a strong memory model with some store-after-load re-ordering */ -#define TCG_GUEST_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD) - #define KVM_HAVE_MCE_INJECTION 1 /* support for self modifying code even if the modified instruction is @@ -261,6 +259,7 @@ typedef enum X86Seg { #define CR4_SMAP_MASK (1U << 21) #define CR4_PKE_MASK (1U << 22) #define CR4_PKS_MASK (1U << 24) +#define CR4_LAM_SUP_MASK (1U << 28) #define CR4_RESERVED_MASK \ (~(target_ulong)(CR4_VME_MASK | CR4_PVI_MASK | CR4_TSD_MASK \ @@ -269,7 +268,8 @@ typedef enum X86Seg { | CR4_OSFXSR_MASK | CR4_OSXMMEXCPT_MASK | CR4_UMIP_MASK \ | CR4_LA57_MASK \ | CR4_FSGSBASE_MASK | CR4_PCIDE_MASK | CR4_OSXSAVE_MASK \ - | CR4_SMEP_MASK | CR4_SMAP_MASK | CR4_PKE_MASK | CR4_PKS_MASK)) + | CR4_SMEP_MASK | CR4_SMAP_MASK | CR4_PKE_MASK | CR4_PKS_MASK \ + | CR4_LAM_SUP_MASK)) #define DR6_BD (1 << 13) #define DR6_BS (1 << 14) @@ -819,8 +819,6 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, #define CPUID_7_0_EBX_SMAP (1U << 20) /* AVX-512 Integer Fused Multiply Add */ #define CPUID_7_0_EBX_AVX512IFMA (1U << 21) -/* Persistent Commit */ -#define CPUID_7_0_EBX_PCOMMIT (1U << 22) /* Flush a Cache Line Optimized */ #define CPUID_7_0_EBX_CLFLUSHOPT (1U << 23) /* Cache Line Write Back */ @@ -932,6 +930,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, #define CPUID_7_1_EAX_AMX_FP16 (1U << 21) /* Support for VPMADD52[H,L]UQ */ #define CPUID_7_1_EAX_AVX_IFMA (1U << 23) +/* Linear Address Masking */ +#define CPUID_7_1_EAX_LAM (1U << 26) /* Support for VPDPB[SU,UU,SS]D[,S] */ #define CPUID_7_1_EDX_AVX_VNNI_INT8 (1U << 4) @@ -1016,10 +1016,16 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, #define CPUID_MWAIT_EMX (1U << 0) /* enumeration supported */ /* CPUID[0xB].ECX level types */ -#define CPUID_TOPOLOGY_LEVEL_INVALID (0U << 8) -#define CPUID_TOPOLOGY_LEVEL_SMT (1U << 8) -#define CPUID_TOPOLOGY_LEVEL_CORE (2U << 8) -#define CPUID_TOPOLOGY_LEVEL_DIE (5U << 8) +#define CPUID_B_ECX_TOPO_LEVEL_INVALID 0 +#define CPUID_B_ECX_TOPO_LEVEL_SMT 1 +#define CPUID_B_ECX_TOPO_LEVEL_CORE 2 + +/* COUID[0x1F].ECX level types */ +#define CPUID_1F_ECX_TOPO_LEVEL_INVALID CPUID_B_ECX_TOPO_LEVEL_INVALID +#define CPUID_1F_ECX_TOPO_LEVEL_SMT CPUID_B_ECX_TOPO_LEVEL_SMT +#define CPUID_1F_ECX_TOPO_LEVEL_CORE CPUID_B_ECX_TOPO_LEVEL_CORE +#define CPUID_1F_ECX_TOPO_LEVEL_MODULE 3 +#define CPUID_1F_ECX_TOPO_LEVEL_DIE 5 /* MSR Feature Bits */ #define MSR_ARCH_CAP_RDCL_NO (1U << 0) @@ -1419,23 +1425,34 @@ typedef struct { */ #define UNASSIGNED_APIC_ID 0xFFFFFFFF -typedef union X86LegacyXSaveArea { - struct { - uint16_t fcw; - uint16_t fsw; - uint8_t ftw; - uint8_t reserved; - uint16_t fpop; - uint64_t fpip; - uint64_t fpdp; - uint32_t mxcsr; - uint32_t mxcsr_mask; - FPReg fpregs[8]; - uint8_t xmm_regs[16][16]; +typedef struct X86LegacyXSaveArea { + uint16_t fcw; + uint16_t fsw; + uint8_t ftw; + uint8_t reserved; + uint16_t fpop; + union { + struct { + uint64_t fpip; + uint64_t fpdp; + }; + struct { + uint32_t fip; + uint32_t fcs; + uint32_t foo; + uint32_t fos; + }; }; - uint8_t data[512]; + uint32_t mxcsr; + uint32_t mxcsr_mask; + FPReg fpregs[8]; + uint8_t xmm_regs[16][16]; + uint32_t hw_reserved[12]; + uint32_t sw_reserved[12]; } X86LegacyXSaveArea; +QEMU_BUILD_BUG_ON(sizeof(X86LegacyXSaveArea) != 512); + typedef struct X86XSaveHeader { uint64_t xstate_bv; uint64_t xcomp_bv; @@ -1583,6 +1600,13 @@ typedef struct CPUCacheInfo { * address bits. CPUID[4].EDX[bit 2]. */ bool complex_indexing; + + /* + * Cache Topology. The level that cache is shared in. + * Used to encode CPUID[4].EAX[bits 25:14] or + * CPUID[0x8000001D].EAX[bits 25:14]. + */ + enum CPUTopoLevel share_level; } CPUCacheInfo; @@ -1892,6 +1916,12 @@ typedef struct CPUArchState { /* Number of dies within this CPU package. */ unsigned nr_dies; + + /* Number of modules within one die. */ + unsigned nr_modules; + + /* Bitmap of available CPU topology levels for this CPU. */ + DECLARE_BITMAP(avail_cpu_topo, CPU_TOPO_LEVEL_MAX); } CPUX86State; struct kvm_msrs; @@ -1993,10 +2023,21 @@ struct ArchCPU { bool enable_l3_cache; /* Compatibility bits for old machine types. + * If true present L1 cache as per-thread, not per-core. + */ + bool l1_cache_per_core; + + /* Compatibility bits for old machine types. * If true present the old cache topology information */ bool legacy_cache; + /* Compatibility bits for old machine types. + * If true decode the CPUID Function 0x8000001E_ECX to support multiple + * nodes per processor + */ + bool legacy_multi_node; + /* Compatibility bits for old machine types: */ bool enable_cpuid_0xb; @@ -2046,6 +2087,7 @@ struct ArchCPU { int32_t node_id; /* NUMA node this CPU belongs to */ int32_t socket_id; int32_t die_id; + int32_t module_id; int32_t core_id; int32_t thread_id; @@ -2224,15 +2266,17 @@ int cpu_x86_get_descr_debug(CPUX86State *env, unsigned int selector, /* used for debug or cpu save/restore */ /* cpu-exec.c */ -/* the following helpers are only usable in user mode simulation as - they can trigger unexpected exceptions */ +/* + * The following helpers are only usable in user mode simulation. + * The host pointers should come from lock_user(). + */ void cpu_x86_load_seg(CPUX86State *s, X86Seg seg_reg, int selector); -void cpu_x86_fsave(CPUX86State *s, target_ulong ptr, int data32); -void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32); -void cpu_x86_fxsave(CPUX86State *s, target_ulong ptr); -void cpu_x86_fxrstor(CPUX86State *s, target_ulong ptr); -void cpu_x86_xsave(CPUX86State *s, target_ulong ptr); -void cpu_x86_xrstor(CPUX86State *s, target_ulong ptr); +void cpu_x86_fsave(CPUX86State *s, void *host, size_t len); +void cpu_x86_frstor(CPUX86State *s, void *host, size_t len); +void cpu_x86_fxsave(CPUX86State *s, void *host, size_t len); +void cpu_x86_fxrstor(CPUX86State *s, void *host, size_t len); +void cpu_x86_xsave(CPUX86State *s, void *host, size_t len, uint64_t rbfm); +bool cpu_x86_xrstor(CPUX86State *s, void *host, size_t len, uint64_t rbfm); /* cpu.c */ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, @@ -2560,6 +2604,9 @@ static inline uint64_t cr4_reserved_bits(CPUX86State *env) if (!(env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_PKS)) { reserved_bits |= CR4_PKS_MASK; } + if (!(env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_LAM)) { + reserved_bits |= CR4_LAM_SUP_MASK; + } return reserved_bits; } diff --git a/target/i386/gdbstub.c b/target/i386/gdbstub.c index ebb000df6a..4acf485879 100644 --- a/target/i386/gdbstub.c +++ b/target/i386/gdbstub.c @@ -19,7 +19,7 @@ */ #include "qemu/osdep.h" #include "cpu.h" -#include "include/gdbstub/helpers.h" +#include "gdbstub/helpers.h" #ifdef TARGET_X86_64 static const int gpr_map[16] = { diff --git a/target/i386/helper.c b/target/i386/helper.c index 23ccb23a5b..f9d1381f90 100644 --- a/target/i386/helper.c +++ b/target/i386/helper.c @@ -219,6 +219,10 @@ void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4) new_cr4 &= ~CR4_PKS_MASK; } + if (!(env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_LAM)) { + new_cr4 &= ~CR4_LAM_SUP_MASK; + } + env->cr[4] = new_cr4; env->hflags = hflags; @@ -523,7 +527,7 @@ static inline target_ulong get_memio_eip(CPUX86State *env) } /* Per x86_restore_state_to_opc. */ - if (cs->tcg_cflags & CF_PCREL) { + if (tcg_cflags_has(cs, CF_PCREL)) { return (env->eip & TARGET_PAGE_MASK) | data[0]; } else { return data[0] - env->segs[R_CS].base; diff --git a/target/i386/helper.h b/target/i386/helper.h index ac2b04abd6..a52a1bf0f2 100644 --- a/target/i386/helper.h +++ b/target/i386/helper.h @@ -22,8 +22,8 @@ DEF_HELPER_FLAGS_5(bndstx32, TCG_CALL_NO_WG, void, env, tl, tl, i64, i64) DEF_HELPER_FLAGS_5(bndstx64, TCG_CALL_NO_WG, void, env, tl, tl, i64, i64) DEF_HELPER_1(bnd_jmp, void, env) -DEF_HELPER_2(aam, void, env, int) -DEF_HELPER_2(aad, void, env, int) +DEF_HELPER_FLAGS_2(aam, TCG_CALL_NO_RWG_SE, tl, tl, tl) +DEF_HELPER_FLAGS_2(aad, TCG_CALL_NO_RWG_SE, tl, tl, tl) DEF_HELPER_1(aaa, void, env) DEF_HELPER_1(aas, void, env) DEF_HELPER_1(daa, void, env) @@ -207,15 +207,4 @@ DEF_HELPER_1(emms, void, env) #define SHIFT 2 #include "tcg/ops_sse_header.h.inc" -DEF_HELPER_3(rclb, tl, env, tl, tl) -DEF_HELPER_3(rclw, tl, env, tl, tl) -DEF_HELPER_3(rcll, tl, env, tl, tl) -DEF_HELPER_3(rcrb, tl, env, tl, tl) -DEF_HELPER_3(rcrw, tl, env, tl, tl) -DEF_HELPER_3(rcrl, tl, env, tl, tl) -#ifdef TARGET_X86_64 -DEF_HELPER_3(rclq, tl, env, tl, tl) -DEF_HELPER_3(rcrq, tl, env, tl, tl) -#endif - DEF_HELPER_1(rdrand, tl, env) diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index 1ed8ed5154..e493452acb 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -419,9 +419,9 @@ int hvf_vcpu_exec(CPUState *cpu) } do { - if (cpu->vcpu_dirty) { + if (cpu->accel->dirty) { hvf_put_registers(cpu); - cpu->vcpu_dirty = false; + cpu->accel->dirty = false; } if (hvf_inject_interrupts(cpu)) { diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c index be2c46246e..1569f860eb 100644 --- a/target/i386/hvf/x86hvf.c +++ b/target/i386/hvf/x86hvf.c @@ -427,7 +427,7 @@ int hvf_process_events(CPUState *cs) X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; - if (!cs->vcpu_dirty) { + if (!cs->accel->dirty) { /* light weight sync for CPU_INTERRUPT_HARD and IF_MASK */ env->eflags = rreg(cs->accel->fd, HV_X86_RFLAGS); } diff --git a/target/i386/kvm/hyperv.c b/target/i386/kvm/hyperv.c index f2a3fe650a..b94f12acc2 100644 --- a/target/i386/kvm/hyperv.c +++ b/target/i386/kvm/hyperv.c @@ -81,7 +81,7 @@ int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) */ async_safe_run_on_cpu(CPU(cpu), async_synic_update, RUN_ON_CPU_NULL); - return 0; + return EXCP_INTERRUPT; case KVM_EXIT_HYPERV_HCALL: { uint16_t code = exit->u.hcall.input & 0xffff; bool fast = exit->u.hcall.input & HV_HYPERCALL_FAST; diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index c5943605ee..6c864e4611 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -51,6 +51,7 @@ #include "hw/i386/apic_internal.h" #include "hw/i386/apic-msidef.h" #include "hw/i386/intel_iommu.h" +#include "hw/i386/topology.h" #include "hw/i386/x86-iommu.h" #include "hw/i386/e820_memory_layout.h" @@ -1791,7 +1792,7 @@ static uint32_t kvm_x86_build_cpuid(CPUX86State *env, break; } case 0x1f: - if (env->nr_dies < 2) { + if (!x86_has_extended_topo(env->avail_cpu_topo)) { cpuid_i--; break; } diff --git a/target/i386/meson.build b/target/i386/meson.build index 8abce725f8..075117989b 100644 --- a/target/i386/meson.build +++ b/target/i386/meson.build @@ -18,6 +18,7 @@ i386_system_ss.add(files( 'arch_memory_mapping.c', 'machine.c', 'monitor.c', + 'cpu-apic.c', 'cpu-sysemu.c', )) i386_system_ss.add(when: 'CONFIG_SEV', if_true: files('sev.c'), if_false: files('sev-sysemu-stub.c')) diff --git a/target/i386/monitor.c b/target/i386/monitor.c index 3a281dab02..2d766b2637 100644 --- a/target/i386/monitor.c +++ b/target/i386/monitor.c @@ -28,8 +28,6 @@ #include "monitor/hmp-target.h" #include "monitor/hmp.h" #include "qapi/qmp/qdict.h" -#include "sysemu/hw_accel.h" -#include "sysemu/kvm.h" #include "qapi/error.h" #include "qapi/qapi-commands-misc-target.h" #include "qapi/qapi-commands-misc.h" @@ -647,26 +645,3 @@ const MonitorDef *target_monitor_defs(void) { return monitor_defs; } - -void hmp_info_local_apic(Monitor *mon, const QDict *qdict) -{ - CPUState *cs; - - if (qdict_haskey(qdict, "apic-id")) { - int id = qdict_get_try_int(qdict, "apic-id", 0); - - cs = cpu_by_arch_id(id); - if (cs) { - cpu_synchronize_state(cs); - } - } else { - cs = mon_get_cpu(mon); - } - - - if (!cs) { - monitor_printf(mon, "No CPU available\n"); - return; - } - x86_cpu_dump_local_apic_state(cs, CPU_DUMP_FPU); -} diff --git a/target/i386/nvmm/nvmm-all.c b/target/i386/nvmm/nvmm-all.c index 49a3a3b916..65768aca03 100644 --- a/target/i386/nvmm/nvmm-all.c +++ b/target/i386/nvmm/nvmm-all.c @@ -30,6 +30,7 @@ struct AccelCPUState { struct nvmm_vcpu vcpu; uint8_t tpr; bool stop; + bool dirty; /* Window-exiting for INTs/NMIs. */ bool int_window_exit; @@ -507,7 +508,7 @@ nvmm_io_callback(struct nvmm_io *io) } /* Needed, otherwise infinite loop. */ - current_cpu->vcpu_dirty = false; + current_cpu->accel->dirty = false; } static void @@ -516,7 +517,7 @@ nvmm_mem_callback(struct nvmm_mem *mem) cpu_physical_memory_rw(mem->gpa, mem->data, mem->size, mem->write); /* Needed, otherwise infinite loop. */ - current_cpu->vcpu_dirty = false; + current_cpu->accel->dirty = false; } static struct nvmm_assist_callbacks nvmm_callbacks = { @@ -726,9 +727,9 @@ nvmm_vcpu_loop(CPUState *cpu) * Inner VCPU loop. */ do { - if (cpu->vcpu_dirty) { + if (cpu->accel->dirty) { nvmm_set_registers(cpu); - cpu->vcpu_dirty = false; + cpu->accel->dirty = false; } if (qcpu->stop) { @@ -826,32 +827,32 @@ static void do_nvmm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) { nvmm_get_registers(cpu); - cpu->vcpu_dirty = true; + cpu->accel->dirty = true; } static void do_nvmm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg) { nvmm_set_registers(cpu); - cpu->vcpu_dirty = false; + cpu->accel->dirty = false; } static void do_nvmm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) { nvmm_set_registers(cpu); - cpu->vcpu_dirty = false; + cpu->accel->dirty = false; } static void do_nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg) { - cpu->vcpu_dirty = true; + cpu->accel->dirty = true; } void nvmm_cpu_synchronize_state(CPUState *cpu) { - if (!cpu->vcpu_dirty) { + if (!cpu->accel->dirty) { run_on_cpu(cpu, do_nvmm_cpu_synchronize_state, RUN_ON_CPU_NULL); } } @@ -981,7 +982,7 @@ nvmm_init_vcpu(CPUState *cpu) } } - cpu->vcpu_dirty = true; + qcpu->dirty = true; cpu->accel = qcpu; return 0; diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h index 6a465a35fd..f0aa1894aa 100644 --- a/target/i386/ops_sse.h +++ b/target/i386/ops_sse.h @@ -1111,6 +1111,7 @@ void helper_ucomiss(CPUX86State *env, Reg *d, Reg *s) s1 = s->ZMM_S(0); ret = float32_compare_quiet(s0, s1, &env->sse_status); CC_SRC = comis_eflags[ret + 1]; + CC_OP = CC_OP_EFLAGS; } void helper_comiss(CPUX86State *env, Reg *d, Reg *s) @@ -1122,6 +1123,7 @@ void helper_comiss(CPUX86State *env, Reg *d, Reg *s) s1 = s->ZMM_S(0); ret = float32_compare(s0, s1, &env->sse_status); CC_SRC = comis_eflags[ret + 1]; + CC_OP = CC_OP_EFLAGS; } void helper_ucomisd(CPUX86State *env, Reg *d, Reg *s) @@ -1133,6 +1135,7 @@ void helper_ucomisd(CPUX86State *env, Reg *d, Reg *s) d1 = s->ZMM_D(0); ret = float64_compare_quiet(d0, d1, &env->sse_status); CC_SRC = comis_eflags[ret + 1]; + CC_OP = CC_OP_EFLAGS; } void helper_comisd(CPUX86State *env, Reg *d, Reg *s) @@ -1144,6 +1147,7 @@ void helper_comisd(CPUX86State *env, Reg *d, Reg *s) d1 = s->ZMM_D(0); ret = float64_compare(d0, d1, &env->sse_status); CC_SRC = comis_eflags[ret + 1]; + CC_OP = CC_OP_EFLAGS; } #endif @@ -1610,6 +1614,7 @@ void glue(helper_ptest, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) cf |= (s->Q(i) & ~d->Q(i)); } CC_SRC = (zf ? 0 : CC_Z) | (cf ? 0 : CC_C); + CC_OP = CC_OP_EFLAGS; } #define FMOVSLDUP(i) s->L((i) & ~1) @@ -1966,6 +1971,7 @@ static inline unsigned pcmpxstrx(CPUX86State *env, Reg *d, Reg *s, validd--; CC_SRC = (valids < upper ? CC_Z : 0) | (validd < upper ? CC_S : 0); + CC_OP = CC_OP_EFLAGS; switch ((ctrl >> 2) & 3) { case 0: @@ -2297,6 +2303,7 @@ void glue(helper_vtestps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) cf |= (s->L(i) & ~d->L(i)); } CC_SRC = ((zf >> 31) ? 0 : CC_Z) | ((cf >> 31) ? 0 : CC_C); + CC_OP = CC_OP_EFLAGS; } void glue(helper_vtestpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) @@ -2309,6 +2316,7 @@ void glue(helper_vtestpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) cf |= (s->Q(i) & ~d->Q(i)); } CC_SRC = ((zf >> 63) ? 0 : CC_Z) | ((cf >> 63) ? 0 : CC_C); + CC_OP = CC_OP_EFLAGS; } void glue(helper_vpmaskmovd_st, SUFFIX)(CPUX86State *env, diff --git a/target/i386/tcg/access.c b/target/i386/tcg/access.c new file mode 100644 index 0000000000..56a1181ea5 --- /dev/null +++ b/target/i386/tcg/access.c @@ -0,0 +1,169 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Access guest memory in blocks. */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "exec/cpu_ldst.h" +#include "exec/exec-all.h" +#include "access.h" + + +void access_prepare_mmu(X86Access *ret, CPUX86State *env, + vaddr vaddr, unsigned size, + MMUAccessType type, int mmu_idx, uintptr_t ra) +{ + int size1, size2; + void *haddr1, *haddr2; + + assert(size > 0 && size <= TARGET_PAGE_SIZE); + + size1 = MIN(size, -(vaddr | TARGET_PAGE_MASK)), + size2 = size - size1; + + memset(ret, 0, sizeof(*ret)); + ret->vaddr = vaddr; + ret->size = size; + ret->size1 = size1; + ret->mmu_idx = mmu_idx; + ret->env = env; + ret->ra = ra; + + haddr1 = probe_access(env, vaddr, size1, type, mmu_idx, ra); + ret->haddr1 = haddr1; + + if (unlikely(size2)) { + haddr2 = probe_access(env, vaddr + size1, size2, type, mmu_idx, ra); + if (haddr2 == haddr1 + size1) { + ret->size1 = size; + } else { +#ifdef CONFIG_USER_ONLY + g_assert_not_reached(); +#else + ret->haddr2 = haddr2; +#endif + } + } +} + +void access_prepare(X86Access *ret, CPUX86State *env, vaddr vaddr, + unsigned size, MMUAccessType type, uintptr_t ra) +{ + int mmu_idx = cpu_mmu_index(env_cpu(env), false); + access_prepare_mmu(ret, env, vaddr, size, type, mmu_idx, ra); +} + +static void *access_ptr(X86Access *ac, vaddr addr, unsigned len) +{ + vaddr offset = addr - ac->vaddr; + + assert(addr >= ac->vaddr); + +#ifdef CONFIG_USER_ONLY + assert(offset <= ac->size1 - len); + return ac->haddr1 + offset; +#else + if (likely(offset <= ac->size1 - len)) { + return ac->haddr1 + offset; + } + assert(offset <= ac->size - len); + /* + * If the address is not naturally aligned, it might span both pages. + * Only return ac->haddr2 if the area is entirely within the second page, + * otherwise fall back to slow accesses. + */ + if (likely(offset >= ac->size1)) { + return ac->haddr2 + (offset - ac->size1); + } + return NULL; +#endif +} + +#ifdef CONFIG_USER_ONLY +# define test_ptr(p) true +#else +# define test_ptr(p) likely(p) +#endif + +uint8_t access_ldb(X86Access *ac, vaddr addr) +{ + void *p = access_ptr(ac, addr, sizeof(uint8_t)); + + if (test_ptr(p)) { + return ldub_p(p); + } + return cpu_ldub_mmuidx_ra(ac->env, addr, ac->mmu_idx, ac->ra); +} + +uint16_t access_ldw(X86Access *ac, vaddr addr) +{ + void *p = access_ptr(ac, addr, sizeof(uint16_t)); + + if (test_ptr(p)) { + return lduw_le_p(p); + } + return cpu_lduw_le_mmuidx_ra(ac->env, addr, ac->mmu_idx, ac->ra); +} + +uint32_t access_ldl(X86Access *ac, vaddr addr) +{ + void *p = access_ptr(ac, addr, sizeof(uint32_t)); + + if (test_ptr(p)) { + return ldl_le_p(p); + } + return cpu_ldl_le_mmuidx_ra(ac->env, addr, ac->mmu_idx, ac->ra); +} + +uint64_t access_ldq(X86Access *ac, vaddr addr) +{ + void *p = access_ptr(ac, addr, sizeof(uint64_t)); + + if (test_ptr(p)) { + return ldq_le_p(p); + } + return cpu_ldq_le_mmuidx_ra(ac->env, addr, ac->mmu_idx, ac->ra); +} + +void access_stb(X86Access *ac, vaddr addr, uint8_t val) +{ + void *p = access_ptr(ac, addr, sizeof(uint8_t)); + + if (test_ptr(p)) { + stb_p(p, val); + } else { + cpu_stb_mmuidx_ra(ac->env, addr, val, ac->mmu_idx, ac->ra); + } +} + +void access_stw(X86Access *ac, vaddr addr, uint16_t val) +{ + void *p = access_ptr(ac, addr, sizeof(uint16_t)); + + if (test_ptr(p)) { + stw_le_p(p, val); + } else { + cpu_stw_le_mmuidx_ra(ac->env, addr, val, ac->mmu_idx, ac->ra); + } +} + +void access_stl(X86Access *ac, vaddr addr, uint32_t val) +{ + void *p = access_ptr(ac, addr, sizeof(uint32_t)); + + if (test_ptr(p)) { + stl_le_p(p, val); + } else { + cpu_stl_le_mmuidx_ra(ac->env, addr, val, ac->mmu_idx, ac->ra); + } +} + +void access_stq(X86Access *ac, vaddr addr, uint64_t val) +{ + void *p = access_ptr(ac, addr, sizeof(uint64_t)); + + if (test_ptr(p)) { + stq_le_p(p, val); + } else { + cpu_stq_le_mmuidx_ra(ac->env, addr, val, ac->mmu_idx, ac->ra); + } +} diff --git a/target/i386/tcg/access.h b/target/i386/tcg/access.h new file mode 100644 index 0000000000..d70808a3a3 --- /dev/null +++ b/target/i386/tcg/access.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* Access guest memory in blocks. */ + +#ifndef X86_TCG_ACCESS_H +#define X86_TCG_ACCESS_H + +/* An access covers at most sizeof(X86XSaveArea), at most 2 pages. */ +typedef struct X86Access { + target_ulong vaddr; + void *haddr1; + void *haddr2; + uint16_t size; + uint16_t size1; + /* + * If we can't access the host page directly, we'll have to do I/O access + * via ld/st helpers. These are internal details, so we store the rest + * to do the access here instead of passing it around in the helpers. + */ + int mmu_idx; + CPUX86State *env; + uintptr_t ra; +} X86Access; + +void access_prepare_mmu(X86Access *ret, CPUX86State *env, + vaddr vaddr, unsigned size, + MMUAccessType type, int mmu_idx, uintptr_t ra); +void access_prepare(X86Access *ret, CPUX86State *env, vaddr vaddr, + unsigned size, MMUAccessType type, uintptr_t ra); + +uint8_t access_ldb(X86Access *ac, vaddr addr); +uint16_t access_ldw(X86Access *ac, vaddr addr); +uint32_t access_ldl(X86Access *ac, vaddr addr); +uint64_t access_ldq(X86Access *ac, vaddr addr); + +void access_stb(X86Access *ac, vaddr addr, uint8_t val); +void access_stw(X86Access *ac, vaddr addr, uint16_t val); +void access_stl(X86Access *ac, vaddr addr, uint32_t val); +void access_stq(X86Access *ac, vaddr addr, uint64_t val); + +#endif diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc index 426c459412..27dc1bb146 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -33,6 +33,32 @@ * ("cannot encode 16-bit or 32-bit size in 64-bit mode") as modifiers of the * "v" or "z" sizes. The decoder simply makes them separate operand sizes. * + * The manual lists immediate far destinations as Ap (technically an implicit + * argument). The decoder splits them into two immediates, using "Ip" for + * the offset part (that comes first in the instruction stream) and "Iw" for + * the segment/selector part. The size of the offset is given by s->dflag + * and the instructions are illegal in 64-bit mode, so the choice of "Ip" + * is somewhat arbitrary; "Iv" or "Iz" would work just as well. + * + * Operand types + * ------------- + * + * For memory-only operands, if the emitter functions wants to rely on + * generic load and writeback, the decoder needs to know the type of the + * operand. Therefore, M is often replaced by the more specific EM and WM + * (respectively selecting an ALU operand, like the operand type E, or a + * vector operand like the operand type W). + * + * Immediates are almost always signed or masked away in helpers. Two + * common exceptions are IN/OUT and absolute jumps. For these, there is + * an additional custom operand type "I_unsigned". Alternatively, the + * mask could be applied (and the original sign-extended value would be + * optimized away by TCG) in the emitter function. + * + * Finally, a "nop" operand type is used for multi-byte NOPs. It accepts + * any value of mod including 11b (unlike M) but it does not try to + * interpret the operand (like M). + * * Vector operands * --------------- * @@ -119,8 +145,12 @@ ## __VA_ARGS__ \ } +#define X86_OP_GROUP1(op, op0, s0, ...) \ + X86_OP_GROUP3(op, op0, s0, 2op, s0, None, None, ## __VA_ARGS__) #define X86_OP_GROUP2(op, op0, s0, op1, s1, ...) \ X86_OP_GROUP3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__) +#define X86_OP_GROUPw(op, op0, s0, ...) \ + X86_OP_GROUP3(op, op0, s0, None, None, None, None, ## __VA_ARGS__) #define X86_OP_GROUP0(op, ...) \ X86_OP_GROUP3(op, None, None, None, None, None, None, ## __VA_ARGS__) @@ -140,16 +170,30 @@ .op3 = X86_TYPE_I, .s3 = X86_SIZE_b, \ ## __VA_ARGS__) +/* + * Short forms that are mostly useful for ALU opcodes and other + * one-byte opcodes. For vector instructions it is usually + * clearer to write all three operands explicitly, because the + * corresponding gen_* function will use OP_PTRn rather than s->T0 + * and s->T1. + */ +#define X86_OP_ENTRYrr(op, op0, s0, op1, s1, ...) \ + X86_OP_ENTRY3(op, None, None, op0, s0, op1, s1, ## __VA_ARGS__) +#define X86_OP_ENTRYwr(op, op0, s0, op1, s1, ...) \ + X86_OP_ENTRY3(op, op0, s0, None, None, op1, s1, ## __VA_ARGS__) #define X86_OP_ENTRY2(op, op0, s0, op1, s1, ...) \ X86_OP_ENTRY3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__) #define X86_OP_ENTRYw(op, op0, s0, ...) \ X86_OP_ENTRY3(op, op0, s0, None, None, None, None, ## __VA_ARGS__) #define X86_OP_ENTRYr(op, op0, s0, ...) \ X86_OP_ENTRY3(op, None, None, None, None, op0, s0, ## __VA_ARGS__) +#define X86_OP_ENTRY1(op, op0, s0, ...) \ + X86_OP_ENTRY3(op, op0, s0, 2op, s0, None, None, ## __VA_ARGS__) #define X86_OP_ENTRY0(op, ...) \ X86_OP_ENTRY3(op, None, None, None, None, None, None, ## __VA_ARGS__) #define cpuid(feat) .cpuid = X86_FEAT_##feat, +#define noseg .special = X86_SPECIAL_NoSeg, #define xchg .special = X86_SPECIAL_Locked, #define lock .special = X86_SPECIAL_HasLock, #define mmx .special = X86_SPECIAL_MMX, @@ -196,6 +240,8 @@ #define p_66_f3_f2 .valid_prefix = P_66 | P_F3 | P_F2, #define p_00_66_f3_f2 .valid_prefix = P_00 | P_66 | P_F3 | P_F2, +#define UNKNOWN_OPCODE ((X86OpEntry) {}) + static uint8_t get_modrm(DisasContext *s, CPUX86State *env) { if (!s->has_modrm) { @@ -957,6 +1003,15 @@ static const X86OpEntry opcodes_0F[256] = { /* Incorrectly listed as Mq,Vq in the manual */ [0x17] = X86_OP_ENTRY3(VMOVHPx_st, M,q, None,None, V,dq, vex5 p_00_66), + [0x40] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)), + [0x41] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)), + [0x42] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)), + [0x43] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)), + [0x44] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)), + [0x45] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)), + [0x46] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)), + [0x47] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)), + [0x50] = X86_OP_ENTRY3(MOVMSK, G,y, None,None, U,x, vex7 p_00_66), [0x51] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), /* sqrtps */ [0x52] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex4_rep5 p_00_f3), /* rsqrtps */ @@ -984,6 +1039,37 @@ static const X86OpEntry opcodes_0F[256] = { [0x76] = X86_OP_ENTRY3(PCMPEQD, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), [0x77] = X86_OP_GROUP0(0F77), + [0x80] = X86_OP_ENTRYr(Jcc, J,z_f64), + [0x81] = X86_OP_ENTRYr(Jcc, J,z_f64), + [0x82] = X86_OP_ENTRYr(Jcc, J,z_f64), + [0x83] = X86_OP_ENTRYr(Jcc, J,z_f64), + [0x84] = X86_OP_ENTRYr(Jcc, J,z_f64), + [0x85] = X86_OP_ENTRYr(Jcc, J,z_f64), + [0x86] = X86_OP_ENTRYr(Jcc, J,z_f64), + [0x87] = X86_OP_ENTRYr(Jcc, J,z_f64), + + [0x90] = X86_OP_ENTRYw(SETcc, E,b), + [0x91] = X86_OP_ENTRYw(SETcc, E,b), + [0x92] = X86_OP_ENTRYw(SETcc, E,b), + [0x93] = X86_OP_ENTRYw(SETcc, E,b), + [0x94] = X86_OP_ENTRYw(SETcc, E,b), + [0x95] = X86_OP_ENTRYw(SETcc, E,b), + [0x96] = X86_OP_ENTRYw(SETcc, E,b), + [0x97] = X86_OP_ENTRYw(SETcc, E,b), + + [0xa0] = X86_OP_ENTRYr(PUSH, FS, w), + [0xa1] = X86_OP_ENTRYw(POP, FS, w), + + [0x0b] = X86_OP_ENTRY0(UD), /* UD2 */ + [0x0d] = X86_OP_ENTRY1(NOP, M,v), /* 3DNow! prefetch */ + + [0x18] = X86_OP_ENTRY1(NOP, nop,v), /* prefetch/reserved NOP */ + [0x19] = X86_OP_ENTRY1(NOP, nop,v), /* reserved NOP */ + [0x1c] = X86_OP_ENTRY1(NOP, nop,v), /* reserved NOP */ + [0x1d] = X86_OP_ENTRY1(NOP, nop,v), /* reserved NOP */ + [0x1e] = X86_OP_ENTRY1(NOP, nop,v), /* reserved NOP */ + [0x1f] = X86_OP_ENTRY1(NOP, nop,v), /* NOP/reserved NOP */ + [0x28] = X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex1 p_00_66), /* MOVAPS */ [0x29] = X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex1 p_00_66), /* MOVAPS */ [0x2A] = X86_OP_GROUP0(0F2A), @@ -996,6 +1082,15 @@ static const X86OpEntry opcodes_0F[256] = { [0x38] = X86_OP_GROUP0(0F38), [0x3a] = X86_OP_GROUP0(0F3A), + [0x48] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)), + [0x49] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)), + [0x4a] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)), + [0x4b] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)), + [0x4c] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)), + [0x4d] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)), + [0x4e] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)), + [0x4f] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)), + [0x58] = X86_OP_ENTRY3(VADD, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), [0x59] = X86_OP_ENTRY3(VMUL, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), [0x5a] = X86_OP_GROUP0(0F5A), @@ -1021,13 +1116,59 @@ static const X86OpEntry opcodes_0F[256] = { [0x7e] = X86_OP_GROUP0(0F7E), [0x7f] = X86_OP_GROUP0(0F7F), + [0x88] = X86_OP_ENTRYr(Jcc, J,z_f64), + [0x89] = X86_OP_ENTRYr(Jcc, J,z_f64), + [0x8a] = X86_OP_ENTRYr(Jcc, J,z_f64), + [0x8b] = X86_OP_ENTRYr(Jcc, J,z_f64), + [0x8c] = X86_OP_ENTRYr(Jcc, J,z_f64), + [0x8d] = X86_OP_ENTRYr(Jcc, J,z_f64), + [0x8e] = X86_OP_ENTRYr(Jcc, J,z_f64), + [0x8f] = X86_OP_ENTRYr(Jcc, J,z_f64), + + [0x98] = X86_OP_ENTRYw(SETcc, E,b), + [0x99] = X86_OP_ENTRYw(SETcc, E,b), + [0x9a] = X86_OP_ENTRYw(SETcc, E,b), + [0x9b] = X86_OP_ENTRYw(SETcc, E,b), + [0x9c] = X86_OP_ENTRYw(SETcc, E,b), + [0x9d] = X86_OP_ENTRYw(SETcc, E,b), + [0x9e] = X86_OP_ENTRYw(SETcc, E,b), + [0x9f] = X86_OP_ENTRYw(SETcc, E,b), + + [0xa8] = X86_OP_ENTRYr(PUSH, GS, w), + [0xa9] = X86_OP_ENTRYw(POP, GS, w), [0xae] = X86_OP_GROUP0(group15), + /* + * It's slightly more efficient to put Ev operand in T0 and allow gen_IMUL3 + * to assume sextT0. Multiplication is commutative anyway. + */ + [0xaf] = X86_OP_ENTRY3(IMUL3, G,v, E,v, 2op,v, sextT0), + + [0xb2] = X86_OP_ENTRY3(LSS, G,v, EM,p, None, None), + [0xb4] = X86_OP_ENTRY3(LFS, G,v, EM,p, None, None), + [0xb5] = X86_OP_ENTRY3(LGS, G,v, EM,p, None, None), + [0xb6] = X86_OP_ENTRY3(MOV, G,v, E,b, None, None, zextT0), /* MOVZX */ + [0xb7] = X86_OP_ENTRY3(MOV, G,v, E,w, None, None, zextT0), /* MOVZX */ + + /* decoded as modrm, which is visible as a difference between page fault and #UD */ + [0xb9] = X86_OP_ENTRYr(UD, nop,v), /* UD1 */ + [0xbe] = X86_OP_ENTRY3(MOV, G,v, E,b, None, None, sextT0), /* MOVSX */ + [0xbf] = X86_OP_ENTRY3(MOV, G,v, E,w, None, None, sextT0), /* MOVSX */ [0xc2] = X86_OP_ENTRY4(VCMP, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), + [0xc3] = X86_OP_ENTRY3(MOV, EM,y,G,y, None,None, cpuid(SSE2)), /* MOVNTI */ [0xc4] = X86_OP_ENTRY4(PINSRW, V,dq,H,dq,E,w, vex5 mmx p_00_66), [0xc5] = X86_OP_ENTRY3(PEXTRW, G,d, U,dq,I,b, vex5 mmx p_00_66), [0xc6] = X86_OP_ENTRY4(VSHUF, V,x, H,x, W,x, vex4 p_00_66), + [0xc8] = X86_OP_ENTRY1(BSWAP, LoBits,y), + [0xc9] = X86_OP_ENTRY1(BSWAP, LoBits,y), + [0xca] = X86_OP_ENTRY1(BSWAP, LoBits,y), + [0xcb] = X86_OP_ENTRY1(BSWAP, LoBits,y), + [0xcc] = X86_OP_ENTRY1(BSWAP, LoBits,y), + [0xcd] = X86_OP_ENTRY1(BSWAP, LoBits,y), + [0xce] = X86_OP_ENTRY1(BSWAP, LoBits,y), + [0xcf] = X86_OP_ENTRY1(BSWAP, LoBits,y), + [0xd0] = X86_OP_ENTRY3(VADDSUB, V,x, H,x, W,x, vex2 cpuid(SSE3) p_66_f2), [0xd1] = X86_OP_ENTRY3(PSRLW_r, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), [0xd2] = X86_OP_ENTRY3(PSRLD_r, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), @@ -1081,7 +1222,7 @@ static const X86OpEntry opcodes_0F[256] = { [0xfc] = X86_OP_ENTRY3(PADDB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), [0xfd] = X86_OP_ENTRY3(PADDW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), [0xfe] = X86_OP_ENTRY3(PADDD, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), - /* 0xff = UD0 */ + [0xff] = X86_OP_ENTRYr(UD, nop,v), /* UD0 */ }; static void do_decode_0F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) @@ -1095,8 +1236,405 @@ static void decode_0F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint do_decode_0F(s, env, entry, b); } +static void decode_63(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86OpEntry arpl = X86_OP_ENTRY2(ARPL, E,w, G,w, chk(prot)); + static const X86OpEntry mov = X86_OP_ENTRY3(MOV, G,v, E,v, None, None); + static const X86OpEntry movsxd = X86_OP_ENTRY3(MOV, G,v, E,d, None, None, sextT0); + if (!CODE64(s)) { + *entry = arpl; + } else if (REX_W(s)) { + *entry = movsxd; + } else { + *entry = mov; + } +} + +static void decode_group1(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86GenFunc group1_gen[8] = { + gen_ADD, gen_OR, gen_ADC, gen_SBB, gen_AND, gen_SUB, gen_XOR, gen_SUB, + }; + int op = (get_modrm(s, env) >> 3) & 7; + entry->gen = group1_gen[op]; + + if (op == 7) { + /* prevent writeback for CMP */ + entry->op1 = entry->op0; + entry->op0 = X86_TYPE_None; + entry->s0 = X86_SIZE_None; + } else { + entry->special = X86_SPECIAL_HasLock; + } +} + +static void decode_group1A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + int op = (get_modrm(s, env) >> 3) & 7; + if (op != 0) { + /* could be XOP prefix too */ + *entry = UNKNOWN_OPCODE; + } else { + entry->gen = gen_POP; + /* The address must use the value of ESP after the pop. */ + s->popl_esp_hack = 1 << mo_pushpop(s, s->dflag); + } +} + +static void decode_group2(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86GenFunc group2_gen[8] = { + gen_ROL, gen_ROR, gen_RCL, gen_RCR, + gen_SHL, gen_SHR, gen_SHL /* SAL, undocumented */, gen_SAR, + }; + int op = (get_modrm(s, env) >> 3) & 7; + entry->gen = group2_gen[op]; + if (op == 7) { + entry->special = X86_SPECIAL_SExtT0; + } else { + entry->special = X86_SPECIAL_ZExtT0; + } +} + +static void decode_group3(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86OpEntry opcodes_grp3[16] = { + /* 0xf6 */ + [0x00] = X86_OP_ENTRYrr(AND, E,b, I,b), + [0x02] = X86_OP_ENTRY1(NOT, E,b, lock), + [0x03] = X86_OP_ENTRY1(NEG, E,b, lock), + [0x04] = X86_OP_ENTRYrr(MUL, E,b, 0,b, zextT0), + [0x05] = X86_OP_ENTRYrr(IMUL,E,b, 0,b, sextT0), + [0x06] = X86_OP_ENTRYr(DIV, E,b), + [0x07] = X86_OP_ENTRYr(IDIV, E,b), + + /* 0xf7 */ + [0x08] = X86_OP_ENTRYrr(AND, E,v, I,z), + [0x0a] = X86_OP_ENTRY1(NOT, E,v, lock), + [0x0b] = X86_OP_ENTRY1(NEG, E,v, lock), + [0x0c] = X86_OP_ENTRYrr(MUL, E,v, 0,v, zextT0), + [0x0d] = X86_OP_ENTRYrr(IMUL,E,v, 0,v, sextT0), + [0x0e] = X86_OP_ENTRYr(DIV, E,v), + [0x0f] = X86_OP_ENTRYr(IDIV, E,v), + }; + + int w = (*b & 1); + int reg = (get_modrm(s, env) >> 3) & 7; + + *entry = opcodes_grp3[(w << 3) | reg]; +} + +static void decode_group4_5(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86OpEntry opcodes_grp4_5[16] = { + /* 0xfe */ + [0x00] = X86_OP_ENTRY1(INC, E,b, lock), + [0x01] = X86_OP_ENTRY1(DEC, E,b, lock), + + /* 0xff */ + [0x08] = X86_OP_ENTRY1(INC, E,v, lock), + [0x09] = X86_OP_ENTRY1(DEC, E,v, lock), + [0x0a] = X86_OP_ENTRY3(CALL_m, None, None, E,f64, None, None, zextT0), + [0x0b] = X86_OP_ENTRYr(CALLF_m, M,p), + [0x0c] = X86_OP_ENTRY3(JMP_m, None, None, E,f64, None, None, zextT0), + [0x0d] = X86_OP_ENTRYr(JMPF_m, M,p), + [0x0e] = X86_OP_ENTRYr(PUSH, E,f64), + }; + + int w = (*b & 1); + int reg = (get_modrm(s, env) >> 3) & 7; + + *entry = opcodes_grp4_5[(w << 3) | reg]; +} + + +static void decode_group11(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + int op = (get_modrm(s, env) >> 3) & 7; + if (op != 0) { + *entry = UNKNOWN_OPCODE; + } else { + entry->gen = gen_MOV; + } +} + static const X86OpEntry opcodes_root[256] = { + [0x00] = X86_OP_ENTRY2(ADD, E,b, G,b, lock), + [0x01] = X86_OP_ENTRY2(ADD, E,v, G,v, lock), + [0x02] = X86_OP_ENTRY2(ADD, G,b, E,b, lock), + [0x03] = X86_OP_ENTRY2(ADD, G,v, E,v, lock), + [0x04] = X86_OP_ENTRY2(ADD, 0,b, I,b, lock), /* AL, Ib */ + [0x05] = X86_OP_ENTRY2(ADD, 0,v, I,z, lock), /* rAX, Iz */ + [0x06] = X86_OP_ENTRYr(PUSH, ES, w, chk(i64)), + [0x07] = X86_OP_ENTRYw(POP, ES, w, chk(i64)), + + [0x10] = X86_OP_ENTRY2(ADC, E,b, G,b, lock), + [0x11] = X86_OP_ENTRY2(ADC, E,v, G,v, lock), + [0x12] = X86_OP_ENTRY2(ADC, G,b, E,b, lock), + [0x13] = X86_OP_ENTRY2(ADC, G,v, E,v, lock), + [0x14] = X86_OP_ENTRY2(ADC, 0,b, I,b, lock), /* AL, Ib */ + [0x15] = X86_OP_ENTRY2(ADC, 0,v, I,z, lock), /* rAX, Iz */ + [0x16] = X86_OP_ENTRYr(PUSH, SS, w, chk(i64)), + [0x17] = X86_OP_ENTRYw(POP, SS, w, chk(i64)), + + [0x20] = X86_OP_ENTRY2(AND, E,b, G,b, lock), + [0x21] = X86_OP_ENTRY2(AND, E,v, G,v, lock), + [0x22] = X86_OP_ENTRY2(AND, G,b, E,b, lock), + [0x23] = X86_OP_ENTRY2(AND, G,v, E,v, lock), + [0x24] = X86_OP_ENTRY2(AND, 0,b, I,b, lock), /* AL, Ib */ + [0x25] = X86_OP_ENTRY2(AND, 0,v, I,z, lock), /* rAX, Iz */ + [0x26] = {}, + [0x27] = X86_OP_ENTRY0(DAA, chk(i64)), + + [0x30] = X86_OP_ENTRY2(XOR, E,b, G,b, lock), + [0x31] = X86_OP_ENTRY2(XOR, E,v, G,v, lock), + [0x32] = X86_OP_ENTRY2(XOR, G,b, E,b, lock), + [0x33] = X86_OP_ENTRY2(XOR, G,v, E,v, lock), + [0x34] = X86_OP_ENTRY2(XOR, 0,b, I,b, lock), /* AL, Ib */ + [0x35] = X86_OP_ENTRY2(XOR, 0,v, I,z, lock), /* rAX, Iz */ + [0x36] = {}, + [0x37] = X86_OP_ENTRY0(AAA, chk(i64)), + + [0x40] = X86_OP_ENTRY1(INC, 0,v, chk(i64)), + [0x41] = X86_OP_ENTRY1(INC, 1,v, chk(i64)), + [0x42] = X86_OP_ENTRY1(INC, 2,v, chk(i64)), + [0x43] = X86_OP_ENTRY1(INC, 3,v, chk(i64)), + [0x44] = X86_OP_ENTRY1(INC, 4,v, chk(i64)), + [0x45] = X86_OP_ENTRY1(INC, 5,v, chk(i64)), + [0x46] = X86_OP_ENTRY1(INC, 6,v, chk(i64)), + [0x47] = X86_OP_ENTRY1(INC, 7,v, chk(i64)), + + [0x50] = X86_OP_ENTRYr(PUSH, LoBits,d64), + [0x51] = X86_OP_ENTRYr(PUSH, LoBits,d64), + [0x52] = X86_OP_ENTRYr(PUSH, LoBits,d64), + [0x53] = X86_OP_ENTRYr(PUSH, LoBits,d64), + [0x54] = X86_OP_ENTRYr(PUSH, LoBits,d64), + [0x55] = X86_OP_ENTRYr(PUSH, LoBits,d64), + [0x56] = X86_OP_ENTRYr(PUSH, LoBits,d64), + [0x57] = X86_OP_ENTRYr(PUSH, LoBits,d64), + + [0x60] = X86_OP_ENTRY0(PUSHA, chk(i64)), + [0x61] = X86_OP_ENTRY0(POPA, chk(i64)), + [0x62] = X86_OP_ENTRYrr(BOUND, G,v, M,a, chk(i64)), + [0x63] = X86_OP_GROUP0(63), + [0x64] = {}, + [0x65] = {}, + [0x66] = {}, + [0x67] = {}, + + [0x70] = X86_OP_ENTRYr(Jcc, J,b), + [0x71] = X86_OP_ENTRYr(Jcc, J,b), + [0x72] = X86_OP_ENTRYr(Jcc, J,b), + [0x73] = X86_OP_ENTRYr(Jcc, J,b), + [0x74] = X86_OP_ENTRYr(Jcc, J,b), + [0x75] = X86_OP_ENTRYr(Jcc, J,b), + [0x76] = X86_OP_ENTRYr(Jcc, J,b), + [0x77] = X86_OP_ENTRYr(Jcc, J,b), + + [0x80] = X86_OP_GROUP2(group1, E,b, I,b), + [0x81] = X86_OP_GROUP2(group1, E,v, I,z), + [0x82] = X86_OP_GROUP2(group1, E,b, I,b, chk(i64)), + [0x83] = X86_OP_GROUP2(group1, E,v, I,b), + [0x84] = X86_OP_ENTRYrr(AND, E,b, G,b), + [0x85] = X86_OP_ENTRYrr(AND, E,v, G,v), + [0x86] = X86_OP_ENTRY2(XCHG, E,b, G,b, xchg), + [0x87] = X86_OP_ENTRY2(XCHG, E,v, G,v, xchg), + + [0x90] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v), + [0x91] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v), + [0x92] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v), + [0x93] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v), + [0x94] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v), + [0x95] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v), + [0x96] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v), + [0x97] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v), + + [0xA0] = X86_OP_ENTRY3(MOV, 0,b, O,b, None, None), /* AL, Ob */ + [0xA1] = X86_OP_ENTRY3(MOV, 0,v, O,v, None, None), /* rAX, Ov */ + [0xA2] = X86_OP_ENTRY3(MOV, O,b, 0,b, None, None), /* Ob, AL */ + [0xA3] = X86_OP_ENTRY3(MOV, O,v, 0,v, None, None), /* Ov, rAX */ + [0xA4] = X86_OP_ENTRYrr(MOVS, Y,b, X,b), + [0xA5] = X86_OP_ENTRYrr(MOVS, Y,v, X,v), + [0xA6] = X86_OP_ENTRYrr(CMPS, Y,b, X,b), + [0xA7] = X86_OP_ENTRYrr(CMPS, Y,v, X,v), + + [0xB0] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None), + [0xB1] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None), + [0xB2] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None), + [0xB3] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None), + [0xB4] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None), + [0xB5] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None), + [0xB6] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None), + [0xB7] = X86_OP_ENTRY3(MOV, LoBits,b, I,b, None, None), + + [0xC0] = X86_OP_GROUP2(group2, E,b, I,b), + [0xC1] = X86_OP_GROUP2(group2, E,v, I,b), + [0xC2] = X86_OP_ENTRYr(RET, I,w), + [0xC3] = X86_OP_ENTRY0(RET), + [0xC4] = X86_OP_ENTRY3(LES, G,z, EM,p, None, None, chk(i64)), + [0xC5] = X86_OP_ENTRY3(LDS, G,z, EM,p, None, None, chk(i64)), + [0xC6] = X86_OP_GROUP3(group11, E,b, I,b, None, None), /* reg=000b */ + [0xC7] = X86_OP_GROUP3(group11, E,v, I,z, None, None), /* reg=000b */ + + [0xD0] = X86_OP_GROUP1(group2, E,b), + [0xD1] = X86_OP_GROUP1(group2, E,v), + [0xD2] = X86_OP_GROUP2(group2, E,b, 1,b), /* CL */ + [0xD3] = X86_OP_GROUP2(group2, E,v, 1,b), /* CL */ + [0xD4] = X86_OP_ENTRY2(AAM, 0,w, I,b), + [0xD5] = X86_OP_ENTRY2(AAD, 0,w, I,b), + [0xD6] = X86_OP_ENTRYw(SALC, 0,b), + [0xD7] = X86_OP_ENTRY1(XLAT, 0,b, zextT0), /* AL read/written */ + + [0xE0] = X86_OP_ENTRYr(LOOPNE, J,b), /* implicit: CX with aflag size */ + [0xE1] = X86_OP_ENTRYr(LOOPE, J,b), /* implicit: CX with aflag size */ + [0xE2] = X86_OP_ENTRYr(LOOP, J,b), /* implicit: CX with aflag size */ + [0xE3] = X86_OP_ENTRYr(JCXZ, J,b), /* implicit: CX with aflag size */ + [0xE4] = X86_OP_ENTRYwr(IN, 0,b, I_unsigned,b), /* AL */ + [0xE5] = X86_OP_ENTRYwr(IN, 0,v, I_unsigned,b), /* AX/EAX */ + [0xE6] = X86_OP_ENTRYrr(OUT, 0,b, I_unsigned,b), /* AL */ + [0xE7] = X86_OP_ENTRYrr(OUT, 0,v, I_unsigned,b), /* AX/EAX */ + + [0xF1] = X86_OP_ENTRY0(INT1, svm(ICEBP)), + [0xF4] = X86_OP_ENTRY0(HLT, chk(cpl0)), + [0xF5] = X86_OP_ENTRY0(CMC), + [0xF6] = X86_OP_GROUP1(group3, E,b), + [0xF7] = X86_OP_GROUP1(group3, E,v), + + [0x08] = X86_OP_ENTRY2(OR, E,b, G,b, lock), + [0x09] = X86_OP_ENTRY2(OR, E,v, G,v, lock), + [0x0A] = X86_OP_ENTRY2(OR, G,b, E,b, lock), + [0x0B] = X86_OP_ENTRY2(OR, G,v, E,v, lock), + [0x0C] = X86_OP_ENTRY2(OR, 0,b, I,b, lock), /* AL, Ib */ + [0x0D] = X86_OP_ENTRY2(OR, 0,v, I,z, lock), /* rAX, Iz */ + [0x0E] = X86_OP_ENTRYr(PUSH, CS, w, chk(i64)), [0x0F] = X86_OP_GROUP0(0F), + + [0x18] = X86_OP_ENTRY2(SBB, E,b, G,b, lock), + [0x19] = X86_OP_ENTRY2(SBB, E,v, G,v, lock), + [0x1A] = X86_OP_ENTRY2(SBB, G,b, E,b, lock), + [0x1B] = X86_OP_ENTRY2(SBB, G,v, E,v, lock), + [0x1C] = X86_OP_ENTRY2(SBB, 0,b, I,b, lock), /* AL, Ib */ + [0x1D] = X86_OP_ENTRY2(SBB, 0,v, I,z, lock), /* rAX, Iz */ + [0x1E] = X86_OP_ENTRYr(PUSH, DS, w, chk(i64)), + [0x1F] = X86_OP_ENTRYw(POP, DS, w, chk(i64)), + + [0x28] = X86_OP_ENTRY2(SUB, E,b, G,b, lock), + [0x29] = X86_OP_ENTRY2(SUB, E,v, G,v, lock), + [0x2A] = X86_OP_ENTRY2(SUB, G,b, E,b, lock), + [0x2B] = X86_OP_ENTRY2(SUB, G,v, E,v, lock), + [0x2C] = X86_OP_ENTRY2(SUB, 0,b, I,b, lock), /* AL, Ib */ + [0x2D] = X86_OP_ENTRY2(SUB, 0,v, I,z, lock), /* rAX, Iz */ + [0x2E] = {}, + [0x2F] = X86_OP_ENTRY0(DAS, chk(i64)), + + [0x38] = X86_OP_ENTRYrr(SUB, E,b, G,b), + [0x39] = X86_OP_ENTRYrr(SUB, E,v, G,v), + [0x3A] = X86_OP_ENTRYrr(SUB, G,b, E,b), + [0x3B] = X86_OP_ENTRYrr(SUB, G,v, E,v), + [0x3C] = X86_OP_ENTRYrr(SUB, 0,b, I,b), /* AL, Ib */ + [0x3D] = X86_OP_ENTRYrr(SUB, 0,v, I,z), /* rAX, Iz */ + [0x3E] = {}, + [0x3F] = X86_OP_ENTRY0(AAS, chk(i64)), + + [0x48] = X86_OP_ENTRY1(DEC, 0,v, chk(i64)), + [0x49] = X86_OP_ENTRY1(DEC, 1,v, chk(i64)), + [0x4A] = X86_OP_ENTRY1(DEC, 2,v, chk(i64)), + [0x4B] = X86_OP_ENTRY1(DEC, 3,v, chk(i64)), + [0x4C] = X86_OP_ENTRY1(DEC, 4,v, chk(i64)), + [0x4D] = X86_OP_ENTRY1(DEC, 5,v, chk(i64)), + [0x4E] = X86_OP_ENTRY1(DEC, 6,v, chk(i64)), + [0x4F] = X86_OP_ENTRY1(DEC, 7,v, chk(i64)), + + [0x58] = X86_OP_ENTRYw(POP, LoBits,d64), + [0x59] = X86_OP_ENTRYw(POP, LoBits,d64), + [0x5A] = X86_OP_ENTRYw(POP, LoBits,d64), + [0x5B] = X86_OP_ENTRYw(POP, LoBits,d64), + [0x5C] = X86_OP_ENTRYw(POP, LoBits,d64), + [0x5D] = X86_OP_ENTRYw(POP, LoBits,d64), + [0x5E] = X86_OP_ENTRYw(POP, LoBits,d64), + [0x5F] = X86_OP_ENTRYw(POP, LoBits,d64), + + [0x68] = X86_OP_ENTRYr(PUSH, I,z), + [0x69] = X86_OP_ENTRY3(IMUL3, G,v, E,v, I,z, sextT0), + [0x6A] = X86_OP_ENTRYr(PUSH, I,b), + [0x6B] = X86_OP_ENTRY3(IMUL3, G,v, E,v, I,b, sextT0), + [0x6C] = X86_OP_ENTRYrr(INS, Y,b, 2,w), /* DX */ + [0x6D] = X86_OP_ENTRYrr(INS, Y,z, 2,w), /* DX */ + [0x6E] = X86_OP_ENTRYrr(OUTS, X,b, 2,w), /* DX */ + [0x6F] = X86_OP_ENTRYrr(OUTS, X,z, 2,w), /* DX */ + + [0x78] = X86_OP_ENTRYr(Jcc, J,b), + [0x79] = X86_OP_ENTRYr(Jcc, J,b), + [0x7A] = X86_OP_ENTRYr(Jcc, J,b), + [0x7B] = X86_OP_ENTRYr(Jcc, J,b), + [0x7C] = X86_OP_ENTRYr(Jcc, J,b), + [0x7D] = X86_OP_ENTRYr(Jcc, J,b), + [0x7E] = X86_OP_ENTRYr(Jcc, J,b), + [0x7F] = X86_OP_ENTRYr(Jcc, J,b), + + [0x88] = X86_OP_ENTRY3(MOV, E,b, G,b, None, None), + [0x89] = X86_OP_ENTRY3(MOV, E,v, G,v, None, None), + [0x8A] = X86_OP_ENTRY3(MOV, G,b, E,b, None, None), + [0x8B] = X86_OP_ENTRY3(MOV, G,v, E,v, None, None), + [0x8C] = X86_OP_ENTRY3(MOV, E,v, S,w, None, None), + [0x8D] = X86_OP_ENTRY3(LEA, G,v, M,v, None, None, noseg), + [0x8E] = X86_OP_ENTRY3(MOV, S,w, E,v, None, None), + [0x8F] = X86_OP_GROUPw(group1A, E,v), + + [0x98] = X86_OP_ENTRY1(CBW, 0,v), /* rAX */ + [0x99] = X86_OP_ENTRY3(CWD, 2,v, 0,v, None, None), /* rDX, rAX */ + [0x9A] = X86_OP_ENTRYrr(CALLF, I_unsigned,p, I_unsigned,w, chk(i64)), + [0x9B] = X86_OP_ENTRY0(WAIT), + [0x9C] = X86_OP_ENTRY0(PUSHF, chk(vm86_iopl) svm(PUSHF)), + [0x9D] = X86_OP_ENTRY0(POPF, chk(vm86_iopl) svm(POPF)), + [0x9E] = X86_OP_ENTRY0(SAHF), + [0x9F] = X86_OP_ENTRY0(LAHF), + + [0xA8] = X86_OP_ENTRYrr(AND, 0,b, I,b), /* AL, Ib */ + [0xA9] = X86_OP_ENTRYrr(AND, 0,v, I,z), /* rAX, Iz */ + [0xAA] = X86_OP_ENTRY3(STOS, Y,b, 0,b, None, None), + [0xAB] = X86_OP_ENTRY3(STOS, Y,v, 0,v, None, None), + /* Manual writeback because REP LODS (!) has to write EAX/RAX after every LODS. */ + [0xAC] = X86_OP_ENTRYr(LODS, X,b), + [0xAD] = X86_OP_ENTRYr(LODS, X,v), + [0xAE] = X86_OP_ENTRYrr(SCAS, 0,b, Y,b), + [0xAF] = X86_OP_ENTRYrr(SCAS, 0,v, Y,v), + + [0xB8] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None), + [0xB9] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None), + [0xBA] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None), + [0xBB] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None), + [0xBC] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None), + [0xBD] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None), + [0xBE] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None), + [0xBF] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None), + + [0xC8] = X86_OP_ENTRYrr(ENTER, I,w, I,b), + [0xC9] = X86_OP_ENTRY1(LEAVE, A,d64), + [0xCA] = X86_OP_ENTRYr(RETF, I,w), + [0xCB] = X86_OP_ENTRY0(RETF), + [0xCC] = X86_OP_ENTRY0(INT3), + [0xCD] = X86_OP_ENTRYr(INT, I,b, chk(vm86_iopl)), + [0xCE] = X86_OP_ENTRY0(INTO), + [0xCF] = X86_OP_ENTRY0(IRET, chk(vm86_iopl) svm(IRET)), + + [0xE8] = X86_OP_ENTRYr(CALL, J,z_f64), + [0xE9] = X86_OP_ENTRYr(JMP, J,z_f64), + [0xEA] = X86_OP_ENTRYrr(JMPF, I_unsigned,p, I_unsigned,w, chk(i64)), + [0xEB] = X86_OP_ENTRYr(JMP, J,b), + [0xEC] = X86_OP_ENTRYwr(IN, 0,b, 2,w), /* AL, DX */ + [0xED] = X86_OP_ENTRYwr(IN, 0,v, 2,w), /* AX/EAX, DX */ + [0xEE] = X86_OP_ENTRYrr(OUT, 0,b, 2,w), /* DX, AL */ + [0xEF] = X86_OP_ENTRYrr(OUT, 0,v, 2,w), /* DX, AX/EAX */ + + [0xF8] = X86_OP_ENTRY0(CLC), + [0xF9] = X86_OP_ENTRY0(STC), + [0xFA] = X86_OP_ENTRY0(CLI, chk(iopl)), + [0xFB] = X86_OP_ENTRY0(STI, chk(iopl)), + [0xFC] = X86_OP_ENTRY0(CLD), + [0xFD] = X86_OP_ENTRY0(STD), + [0xFE] = X86_OP_GROUP1(group4_5, E,b), + [0xFF] = X86_OP_GROUP1(group4_5, E,v), }; #undef mmx @@ -1176,6 +1714,10 @@ static bool decode_op_size(DisasContext *s, X86OpEntry *e, X86OpSize size, MemOp *ot = s->dflag == MO_16 ? MO_16 : MO_32; return true; + case X86_SIZE_z_f64: /* 32-bit for 32-bit operand size or 64-bit mode, else 16-bit */ + *ot = !CODE64(s) && s->dflag == MO_16 ? MO_16 : MO_32; + return true; + case X86_SIZE_dq: /* SSE/AVX 128-bit */ if (e->special == X86_SPECIAL_MMX && !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) { @@ -1315,12 +1857,19 @@ static bool decode_op(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, case X86_TYPE_WM: /* modrm byte selects an XMM/YMM memory operand */ op->unit = X86_OP_SSE; + goto get_modrm_mem; + + case X86_TYPE_EM: /* modrm byte selects an ALU memory operand */ + op->unit = X86_OP_INT; /* fall through */ case X86_TYPE_M: /* modrm byte selects a memory operand */ + get_modrm_mem: modrm = get_modrm(s, env); if ((modrm >> 6) == 3) { return false; } + /* fall through */ + case X86_TYPE_nop: /* modrm operand decoded but not fetched */ get_modrm: decode_modrm(s, env, decode, op, type); break; @@ -1353,7 +1902,12 @@ static bool decode_op(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, case X86_TYPE_I: /* Immediate */ case X86_TYPE_J: /* Relative offset for a jump */ op->unit = X86_OP_IMM; - decode->immediate = insn_get_signed(env, s, op->ot); + decode->immediate = op->imm = insn_get_signed(env, s, op->ot); + break; + + case X86_TYPE_I_unsigned: /* Immediate */ + op->unit = X86_OP_IMM; + decode->immediate = op->imm = insn_get(env, s, op->ot); break; case X86_TYPE_L: /* The upper 4 bits of the immediate select a 128-bit register */ @@ -1476,6 +2030,8 @@ static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid) switch (cpuid) { case X86_FEAT_None: return true; + case X86_FEAT_CMOV: + return (s->cpuid_features & CPUID_CMOV); case X86_FEAT_F16C: return (s->cpuid_ext_features & CPUID_EXT_F16C); case X86_FEAT_FMA: @@ -1681,22 +2237,31 @@ illegal: * Convert one instruction. s->base.is_jmp is set if the translation must * be stopped. */ -static void disas_insn_new(DisasContext *s, CPUState *cpu, int b) +static void disas_insn(DisasContext *s, CPUState *cpu) { CPUX86State *env = cpu_env(cpu); - bool first = true; X86DecodedInsn decode; X86DecodeFunc decode_func = decode_root; - uint8_t cc_live; + uint8_t cc_live, b; + s->pc = s->base.pc_next; + s->override = -1; + s->popl_esp_hack = 0; +#ifdef TARGET_X86_64 + s->rex_r = 0; + s->rex_x = 0; + s->rex_b = 0; +#endif + s->rip_offset = 0; /* for relative ip address */ + s->vex_l = 0; + s->vex_v = 0; + s->vex_w = false; s->has_modrm = false; + s->prefix = 0; next_byte: - if (first) { - first = false; - } else { - b = x86_ldub_code(env, s); - } + b = x86_ldub_code(env, s); + /* Collect prefixes. */ switch (b) { case 0xf3: @@ -1808,10 +2373,6 @@ static void disas_insn_new(DisasContext *s, CPUState *cpu, int b) } break; default: - if (b >= 0x100) { - b -= 0x100; - decode_func = do_decode_0F; - } break; } @@ -1840,6 +2401,40 @@ static void disas_insn_new(DisasContext *s, CPUState *cpu, int b) } } + /* Go back to old decoder for unconverted opcodes. */ + if (!(s->prefix & PREFIX_VEX)) { + if ((b & ~7) == 0xd8) { + if (!disas_insn_x87(s, cpu, b)) { + goto unknown_op; + } + return; + } + + if (b == 0x0f) { + b = x86_ldub_code(env, s); + switch (b) { + case 0x00 ... 0x03: /* mostly privileged instructions */ + case 0x05 ... 0x09: + case 0x1a ... 0x1b: /* MPX */ + case 0x20 ... 0x23: /* mov from/to CR and DR */ + case 0x30 ... 0x35: /* more privileged instructions */ + case 0xa2 ... 0xa5: /* CPUID, BT, SHLD */ + case 0xaa ... 0xae: /* RSM, SHRD, grp15 */ + case 0xb0 ... 0xb1: /* cmpxchg */ + case 0xb3: /* btr */ + case 0xb8: /* integer ops */ + case 0xba ... 0xbd: /* integer ops */ + case 0xc0 ... 0xc1: /* xadd */ + case 0xc7: /* grp9 */ + disas_insn_old(s, cpu, b + 0x100); + return; + default: + decode_func = do_decode_0F; + break; + } + } + } + memset(&decode, 0, sizeof(decode)); decode.cc_op = -1; decode.b = b; @@ -1914,6 +2509,11 @@ static void disas_insn_new(DisasContext *s, CPUState *cpu, int b) assert(decode.op[1].unit == X86_OP_INT); break; + case X86_SPECIAL_NoSeg: + decode.mem.def_seg = -1; + s->override = -1; + break; + default: break; } diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h index 15e6bfef4b..51ef0e621b 100644 --- a/target/i386/tcg/decode-new.h +++ b/target/i386/tcg/decode-new.h @@ -47,7 +47,10 @@ typedef enum X86OpType { X86_TYPE_Y, /* string destination */ /* Custom */ + X86_TYPE_EM, /* modrm byte selects an ALU memory operand */ X86_TYPE_WM, /* modrm byte selects an XMM/YMM memory operand */ + X86_TYPE_I_unsigned, /* Immediate, zero-extended */ + X86_TYPE_nop, /* modrm operand decoded but not loaded into s->T{0,1} */ X86_TYPE_2op, /* 2-operand RMW instruction */ X86_TYPE_LoBits, /* encoded in bits 0-2 of the operand + REX.B */ X86_TYPE_0, /* Hard-coded GPRs (RAX..RDI) */ @@ -88,6 +91,7 @@ typedef enum X86OpSize { X86_SIZE_x, /* 128/256-bit, based on operand size */ X86_SIZE_y, /* 32/64-bit, based on operand size */ X86_SIZE_z, /* 16-bit for 16-bit operand size, else 32-bit */ + X86_SIZE_z_f64, /* 32-bit for 32-bit operand size or 64-bit mode, else 16-bit */ /* Custom */ X86_SIZE_d64, @@ -104,6 +108,7 @@ typedef enum X86CPUIDFeature { X86_FEAT_AVX2, X86_FEAT_BMI1, X86_FEAT_BMI2, + X86_FEAT_CMOV, X86_FEAT_CMPCCXADD, X86_FEAT_F16C, X86_FEAT_FMA, @@ -165,6 +170,8 @@ typedef enum X86InsnSpecial { /* Always locked if it has a memory operand (XCHG) */ X86_SPECIAL_Locked, + /* Do not apply segment base to effective address */ + X86_SPECIAL_NoSeg, /* * Rd/Mb or Rd/Mw in the manual: register operand 0 is treated as 32 bits * (and writeback zero-extends it to 64 bits if applicable). PREFIX_DATA @@ -271,16 +278,23 @@ typedef struct X86DecodedOp { bool has_ea; int offset; /* For MMX and SSE */ - /* - * This field is used internally by macros OP0_PTR/OP1_PTR/OP2_PTR, - * do not access directly! - */ - TCGv_ptr v_ptr; + union { + target_ulong imm; + /* + * This field is used internally by macros OP0_PTR/OP1_PTR/OP2_PTR, + * do not access directly! + */ + TCGv_ptr v_ptr; + }; } X86DecodedOp; struct X86DecodedInsn { X86OpEntry e; X86DecodedOp op[3]; + /* + * Rightmost immediate, for convenience since most instructions have + * one (and also for 4-operand instructions). + */ target_ulong immediate; AddressParts mem; diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 6bcf88ecd7..e990141454 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -19,6 +19,21 @@ * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ +/* + * Sometimes, knowing what the backend has can produce better code. + * The exact opcode to check depends on 32- vs. 64-bit. + */ +#ifdef TARGET_X86_64 +#define TCG_TARGET_HAS_extract2_tl TCG_TARGET_HAS_extract2_i64 +#define TCG_TARGET_deposit_tl_valid TCG_TARGET_deposit_i64_valid +#define TCG_TARGET_extract_tl_valid TCG_TARGET_extract_i64_valid +#else +#define TCG_TARGET_HAS_extract2_tl TCG_TARGET_HAS_extract2_i32 +#define TCG_TARGET_deposit_tl_valid TCG_TARGET_deposit_i32_valid +#define TCG_TARGET_extract_tl_valid TCG_TARGET_extract_i32_valid +#endif + + #define ZMM_OFFSET(reg) offsetof(CPUX86State, xmm_regs[reg]) typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg); @@ -45,6 +60,9 @@ typedef void (*SSEFunc_0_eppppii)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_ptr reg_c, TCGv_ptr reg_d, TCGv_i32 even, TCGv_i32 odd); +static void gen_JMP_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode); +static void gen_JMP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode); + static inline TCGv_i32 tcg_constant8u_i32(uint8_t val) { return tcg_constant_i32(val); @@ -58,7 +76,7 @@ static void gen_NM_exception(DisasContext *s) static void gen_load_ea(DisasContext *s, AddressParts *mem, bool is_vsib) { TCGv ea = gen_lea_modrm_1(s, *mem, is_vsib); - gen_lea_v_seg(s, s->aflag, ea, mem->def_seg, s->override); + gen_lea_v_seg(s, ea, mem->def_seg, s->override); } static inline int mmx_offset(MemOp ot) @@ -259,7 +277,7 @@ static void gen_load(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv v) } break; case X86_OP_IMM: - tcg_gen_movi_tl(v, decode->immediate); + tcg_gen_movi_tl(v, op->imm); break; case X86_OP_MMX: @@ -283,6 +301,8 @@ static void gen_load(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv v) static TCGv_ptr op_ptr(X86DecodedInsn *decode, int opn) { X86DecodedOp *op = &decode->op[opn]; + + assert(op->unit == X86_OP_MMX || op->unit == X86_OP_SSE); if (op->v_ptr) { return op->v_ptr; } @@ -304,8 +324,8 @@ static void gen_writeback(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv case X86_OP_SKIP: break; case X86_OP_SEG: - /* Note that gen_movl_seg_T0 takes care of interrupt shadow and TF. */ - gen_movl_seg_T0(s, op->n); + /* Note that gen_movl_seg takes care of interrupt shadow and TF. */ + gen_movl_seg(s, op->n, s->T0); break; case X86_OP_INT: if (op->has_ea) { @@ -328,6 +348,7 @@ static void gen_writeback(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv default: g_assert_not_reached(); } + op->unit = X86_OP_SKIP; } static inline int vector_len(DisasContext *s, X86DecodedInsn *decode) @@ -352,6 +373,20 @@ static void prepare_update2_cc(X86DecodedInsn *decode, DisasContext *s, CCOp op) decode->cc_op = op; } +static void prepare_update_cc_incdec(X86DecodedInsn *decode, DisasContext *s, CCOp op) +{ + gen_compute_eflags_c(s, s->T1); + prepare_update2_cc(decode, s, op); +} + +static void prepare_update3_cc(X86DecodedInsn *decode, DisasContext *s, CCOp op, TCGv reg) +{ + decode->cc_src2 = reg; + decode->cc_src = s->T1; + decode->cc_dst = s->T0; + decode->cc_op = op; +} + static void gen_store_sse(DisasContext *s, X86DecodedInsn *decode, int src_ofs) { MemOp ot = decode->op[0].ot; @@ -883,7 +918,7 @@ static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod } else { \ gen_helper_##lname##_ymm(tcg_env, OP_PTR1, OP_PTR2); \ } \ - set_cc_op(s, CC_OP_EFLAGS); \ + assume_cc_op(s, CC_OP_EFLAGS); \ } UNARY_CMP_SSE(VPTEST, ptest) UNARY_CMP_SSE(VTESTPS, vtestps) @@ -1040,6 +1075,53 @@ static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod VSIB_AVX(VPGATHERD, vpgatherd) VSIB_AVX(VPGATHERQ, vpgatherq) +static void gen_AAA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_update_cc_op(s); + gen_helper_aaa(tcg_env); + assume_cc_op(s, CC_OP_EFLAGS); +} + +static void gen_AAD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_helper_aad(s->T0, s->T0, s->T1); + prepare_update1_cc(decode, s, CC_OP_LOGICB); +} + +static void gen_AAM(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + if (decode->immediate == 0) { + gen_exception(s, EXCP00_DIVZ); + } else { + gen_helper_aam(s->T0, s->T0, s->T1); + prepare_update1_cc(decode, s, CC_OP_LOGICB); + } +} + +static void gen_AAS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_update_cc_op(s); + gen_helper_aas(tcg_env); + assume_cc_op(s, CC_OP_EFLAGS); +} + +static void gen_ADC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[1].ot; + TCGv c_in = tcg_temp_new(); + + gen_compute_eflags_c(s, c_in); + if (s->prefix & PREFIX_LOCK) { + tcg_gen_add_tl(s->T0, c_in, s->T1); + tcg_gen_atomic_add_fetch_tl(s->T0, s->A0, s->T0, + s->mem_index, ot | MO_LE); + } else { + tcg_gen_add_tl(s->T0, s->T0, s->T1); + tcg_gen_add_tl(s->T0, s->T0, c_in); + } + prepare_update3_cc(decode, s, CC_OP_ADCB + ot, c_in); +} + /* ADCX/ADOX do not have memory operands and can use set_cc_op. */ static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op) { @@ -1093,11 +1175,37 @@ static void gen_ADCX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) gen_ADCOX(s, env, decode->op[0].ot, CC_OP_ADCX); } +static void gen_ADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[1].ot; + + if (s->prefix & PREFIX_LOCK) { + tcg_gen_atomic_add_fetch_tl(s->T0, s->A0, s->T1, + s->mem_index, ot | MO_LE); + } else { + tcg_gen_add_tl(s->T0, s->T0, s->T1); + } + prepare_update2_cc(decode, s, CC_OP_ADDB + ot); +} + static void gen_ADOX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { gen_ADCOX(s, env, decode->op[0].ot, CC_OP_ADOX); } +static void gen_AND(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[1].ot; + + if (s->prefix & PREFIX_LOCK) { + tcg_gen_atomic_and_fetch_tl(s->T0, s->A0, s->T1, + s->mem_index, ot | MO_LE); + } else { + tcg_gen_and_tl(s->T0, s->T0, s->T1); + } + prepare_update1_cc(decode, s, CC_OP_LOGICB + ot); +} + static void gen_ANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; @@ -1106,6 +1214,27 @@ static void gen_ANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) prepare_update1_cc(decode, s, CC_OP_LOGICB + ot); } +static void gen_ARPL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + TCGv zf = tcg_temp_new(); + TCGv flags = tcg_temp_new(); + + gen_mov_eflags(s, flags); + + /* Compute adjusted DST in T1, merging in SRC[RPL]. */ + tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 0, 2); + + /* Z flag set if DST[RPL] < SRC[RPL] */ + tcg_gen_setcond_tl(TCG_COND_LTU, zf, s->T0, s->T1); + tcg_gen_deposit_tl(flags, flags, zf, ctz32(CC_Z), 1); + + /* Place maximum RPL in DST */ + tcg_gen_umax_tl(s->T0, s->T0, s->T1); + + decode->cc_src = flags; + decode->cc_op = CC_OP_EFLAGS; +} + static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; @@ -1170,6 +1299,28 @@ static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) set_cc_op(s, CC_OP_BMILGB + ot); } +static void gen_BOUND(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + TCGv_i32 op = tcg_temp_new_i32(); + tcg_gen_trunc_tl_i32(op, s->T0); + if (decode->op[1].ot == MO_16) { + gen_helper_boundw(tcg_env, s->A0, op); + } else { + gen_helper_boundl(tcg_env, s->A0, op); + } +} + +static void gen_BSWAP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ +#ifdef TARGET_X86_64 + if (s->dflag == MO_64) { + tcg_gen_bswap64_i64(s->T0, s->T0); + return; + } +#endif + tcg_gen_bswap32_tl(s->T0, s->T0, TCG_BSWAP_OZ); +} + static void gen_BZHI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; @@ -1190,6 +1341,67 @@ static void gen_BZHI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) prepare_update2_cc(decode, s, CC_OP_BMILGB + ot); } +static void gen_CALL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_push_v(s, eip_next_tl(s)); + gen_JMP(s, env, decode); +} + +static void gen_CALL_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_push_v(s, eip_next_tl(s)); + gen_JMP_m(s, env, decode); +} + +static void gen_CALLF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_far_call(s); +} + +static void gen_CALLF_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[2].ot; + + gen_op_ld_v(s, ot, s->T0, s->A0); + gen_add_A0_im(s, 1 << ot); + gen_op_ld_v(s, MO_16, s->T1, s->A0); + gen_far_call(s); +} + +static void gen_CBW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp src_ot = decode->op[0].ot - 1; + + tcg_gen_ext_tl(s->T0, s->T0, src_ot | MO_SIGN); +} + +static void gen_CLC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_compute_eflags(s); + tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C); +} + +static void gen_CLD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + tcg_gen_st_i32(tcg_constant_i32(1), tcg_env, offsetof(CPUX86State, df)); +} + +static void gen_CLI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_reset_eflags(s, IF_MASK); +} + +static void gen_CMC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_compute_eflags(s); + tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C); +} + +static void gen_CMOVcc(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_cmovcc1(s, decode->b & 0xf, s->T0, s->T1); +} + static void gen_CMPccXADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { TCGLabel *label_top = gen_new_label(); @@ -1209,7 +1421,7 @@ static void gen_CMPccXADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec [JCC_Z] = TCG_COND_EQ, [JCC_BE] = TCG_COND_LEU, [JCC_S] = TCG_COND_LT, /* test sign bit by comparing against 0 */ - [JCC_P] = TCG_COND_EQ, /* even parity - tests low bit of popcount */ + [JCC_P] = TCG_COND_TSTEQ, /* even parity - tests low bit of popcount */ [JCC_L] = TCG_COND_LT, [JCC_LE] = TCG_COND_LE, }; @@ -1260,8 +1472,7 @@ static void gen_CMPccXADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec case JCC_P: tcg_gen_ext8u_tl(s->tmp0, s->T0); tcg_gen_ctpop_tl(s->tmp0, s->tmp0); - tcg_gen_andi_tl(s->tmp0, s->tmp0, 1); - cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(0); + cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(1); break; case JCC_S: @@ -1294,6 +1505,16 @@ static void gen_CMPccXADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec decode->cc_op = CC_OP_SUBB + ot; } +static void gen_CMPS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[2].ot; + if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { + gen_repz_nz(s, ot, gen_cmps); + } else { + gen_cmps(s, ot); + } +} + static void gen_CRC32(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { MemOp ot = decode->op[2].ot; @@ -1332,11 +1553,74 @@ static void gen_CVTTPx2PI(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec } } +static void gen_CWD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int shift = 8 << decode->op[0].ot; + + tcg_gen_sextract_tl(s->T0, s->T0, shift - 1, 1); +} + +static void gen_DAA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_update_cc_op(s); + gen_helper_daa(tcg_env); + assume_cc_op(s, CC_OP_EFLAGS); +} + +static void gen_DAS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_update_cc_op(s); + gen_helper_das(tcg_env); + assume_cc_op(s, CC_OP_EFLAGS); +} + +static void gen_DEC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[1].ot; + + tcg_gen_movi_tl(s->T1, -1); + if (s->prefix & PREFIX_LOCK) { + tcg_gen_atomic_add_fetch_tl(s->T0, s->A0, s->T1, + s->mem_index, ot | MO_LE); + } else { + tcg_gen_add_tl(s->T0, s->T0, s->T1); + } + prepare_update_cc_incdec(decode, s, CC_OP_DECB + ot); +} + +static void gen_DIV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[2].ot; + + switch(ot) { + case MO_8: + gen_helper_divb_AL(tcg_env, s->T1); + break; + case MO_16: + gen_helper_divw_AX(tcg_env, s->T1); + break; + default: + case MO_32: + gen_helper_divl_EAX(tcg_env, s->T1); + break; +#ifdef TARGET_X86_64 + case MO_64: + gen_helper_divq_EAX(tcg_env, s->T1); + break; +#endif + } +} + static void gen_EMMS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { gen_helper_emms(tcg_env); } +static void gen_ENTER(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_enter(s, decode->op[1].imm, decode->op[2].imm); +} + static void gen_EXTRQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { TCGv_i32 length = tcg_constant_i32(decode->immediate & 63); @@ -1350,6 +1634,210 @@ static void gen_EXTRQ_r(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod gen_helper_extrq_r(tcg_env, OP_PTR0, OP_PTR2); } +static void gen_HLT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ +#ifdef CONFIG_SYSTEM_ONLY + gen_update_cc_op(s); + gen_update_eip_cur(s); + gen_helper_hlt(tcg_env, cur_insn_len_i32(s)); + s->base.is_jmp = DISAS_NORETURN; +#endif +} + +static void gen_IDIV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[2].ot; + + switch(ot) { + case MO_8: + gen_helper_idivb_AL(tcg_env, s->T1); + break; + case MO_16: + gen_helper_idivw_AX(tcg_env, s->T1); + break; + default: + case MO_32: + gen_helper_idivl_EAX(tcg_env, s->T1); + break; +#ifdef TARGET_X86_64 + case MO_64: + gen_helper_idivq_EAX(tcg_env, s->T1); + break; +#endif + } +} + +static void gen_IMUL3(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[0].ot; + TCGv cc_src_rhs; + + switch (ot) { + case MO_16: + /* s->T0 already sign-extended */ + tcg_gen_ext16s_tl(s->T1, s->T1); + tcg_gen_mul_tl(s->T0, s->T0, s->T1); + /* Compare the full result to the extension of the truncated result. */ + tcg_gen_ext16s_tl(s->T1, s->T0); + cc_src_rhs = s->T0; + break; + + case MO_32: +#ifdef TARGET_X86_64 + if (TCG_TARGET_REG_BITS == 64) { + /* + * This produces fewer TCG ops, and better code if flags are needed, + * but it requires a 64-bit multiply even if they are not. Use it + * only if the target has 64-bits registers. + * + * s->T0 is already sign-extended. + */ + tcg_gen_ext32s_tl(s->T1, s->T1); + tcg_gen_mul_tl(s->T0, s->T0, s->T1); + /* Compare the full result to the extension of the truncated result. */ + tcg_gen_ext32s_tl(s->T1, s->T0); + cc_src_rhs = s->T0; + } else { + /* Variant that only needs a 32-bit widening multiply. */ + TCGv_i32 hi = tcg_temp_new_i32(); + TCGv_i32 lo = tcg_temp_new_i32(); + tcg_gen_trunc_tl_i32(lo, s->T0); + tcg_gen_trunc_tl_i32(hi, s->T1); + tcg_gen_muls2_i32(lo, hi, lo, hi); + tcg_gen_extu_i32_tl(s->T0, lo); + + cc_src_rhs = tcg_temp_new(); + tcg_gen_extu_i32_tl(cc_src_rhs, hi); + /* Compare the high part to the sign bit of the truncated result */ + tcg_gen_sari_i32(lo, lo, 31); + tcg_gen_extu_i32_tl(s->T1, lo); + } + break; + + case MO_64: +#endif + cc_src_rhs = tcg_temp_new(); + tcg_gen_muls2_tl(s->T0, cc_src_rhs, s->T0, s->T1); + /* Compare the high part to the sign bit of the truncated result */ + tcg_gen_sari_tl(s->T1, s->T0, TARGET_LONG_BITS - 1); + break; + + default: + g_assert_not_reached(); + } + + tcg_gen_sub_tl(s->T1, s->T1, cc_src_rhs); + prepare_update2_cc(decode, s, CC_OP_MULB + ot); +} + +static void gen_IMUL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[1].ot; + TCGv cc_src_rhs; + + switch (ot) { + case MO_8: + /* s->T0 already sign-extended */ + tcg_gen_ext8s_tl(s->T1, s->T1); + tcg_gen_mul_tl(s->T0, s->T0, s->T1); + gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0); + /* Compare the full result to the extension of the truncated result. */ + tcg_gen_ext8s_tl(s->T1, s->T0); + cc_src_rhs = s->T0; + break; + + case MO_16: + /* s->T0 already sign-extended */ + tcg_gen_ext16s_tl(s->T1, s->T1); + tcg_gen_mul_tl(s->T0, s->T0, s->T1); + gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0); + tcg_gen_shri_tl(s->T1, s->T0, 16); + gen_op_mov_reg_v(s, MO_16, R_EDX, s->T1); + /* Compare the full result to the extension of the truncated result. */ + tcg_gen_ext16s_tl(s->T1, s->T0); + cc_src_rhs = s->T0; + break; + + case MO_32: +#ifdef TARGET_X86_64 + /* s->T0 already sign-extended */ + tcg_gen_ext32s_tl(s->T1, s->T1); + tcg_gen_mul_tl(s->T0, s->T0, s->T1); + tcg_gen_ext32u_tl(cpu_regs[R_EAX], s->T0); + tcg_gen_shri_tl(cpu_regs[R_EDX], s->T0, 32); + /* Compare the full result to the extension of the truncated result. */ + tcg_gen_ext32s_tl(s->T1, s->T0); + cc_src_rhs = s->T0; + break; + + case MO_64: +#endif + tcg_gen_muls2_tl(s->T0, cpu_regs[R_EDX], s->T0, s->T1); + tcg_gen_mov_tl(cpu_regs[R_EAX], s->T0); + + /* Compare the high part to the sign bit of the truncated result */ + tcg_gen_negsetcondi_tl(TCG_COND_LT, s->T1, s->T0, 0); + cc_src_rhs = cpu_regs[R_EDX]; + break; + + default: + g_assert_not_reached(); + } + + tcg_gen_sub_tl(s->T1, s->T1, cc_src_rhs); + prepare_update2_cc(decode, s, CC_OP_MULB + ot); +} + +static void gen_IN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[0].ot; + TCGv_i32 port = tcg_temp_new_i32(); + + tcg_gen_trunc_tl_i32(port, s->T1); + tcg_gen_ext16u_i32(port, port); + if (!gen_check_io(s, ot, port, SVM_IOIO_TYPE_MASK)) { + return; + } + translator_io_start(&s->base); + gen_helper_in_func(ot, s->T0, port); + gen_writeback(s, decode, 0, s->T0); + gen_bpt_io(s, port, ot); +} + +static void gen_INC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[1].ot; + + tcg_gen_movi_tl(s->T1, 1); + if (s->prefix & PREFIX_LOCK) { + tcg_gen_atomic_add_fetch_tl(s->T0, s->A0, s->T1, + s->mem_index, ot | MO_LE); + } else { + tcg_gen_add_tl(s->T0, s->T0, s->T1); + } + prepare_update_cc_incdec(decode, s, CC_OP_INCB + ot); +} + +static void gen_INS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[1].ot; + TCGv_i32 port = tcg_temp_new_i32(); + + tcg_gen_trunc_tl_i32(port, s->T1); + tcg_gen_ext16u_i32(port, port); + if (!gen_check_io(s, ot, port, + SVM_IOIO_TYPE_MASK | SVM_IOIO_STR_MASK)) { + return; + } + + translator_io_start(&s->base); + if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { + gen_repz(s, ot, gen_ins); + } else { + gen_ins(s, ot); + } +} + static void gen_INSERTQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { TCGv_i32 length = tcg_constant_i32(decode->immediate & 63); @@ -1363,15 +1851,200 @@ static void gen_INSERTQ_r(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec gen_helper_insertq_r(tcg_env, OP_PTR0, OP_PTR2); } +static void gen_INT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_interrupt(s, decode->immediate); +} + +static void gen_INT1(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_exception(s, EXCP01_DB); +} + +static void gen_INT3(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_interrupt(s, EXCP03_INT3); +} + +static void gen_INTO(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_update_cc_op(s); + gen_update_eip_cur(s); + gen_helper_into(tcg_env, cur_insn_len_i32(s)); +} + +static void gen_IRET(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + if (!PE(s) || VM86(s)) { + gen_helper_iret_real(tcg_env, tcg_constant_i32(s->dflag - 1)); + } else { + gen_helper_iret_protected(tcg_env, tcg_constant_i32(s->dflag - 1), + eip_next_i32(s)); + } + assume_cc_op(s, CC_OP_EFLAGS); + s->base.is_jmp = DISAS_EOB_ONLY; +} + +static void gen_Jcc(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_bnd_jmp(s); + gen_jcc(s, decode->b & 0xf, decode->immediate); +} + +static void gen_JCXZ(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + TCGLabel *taken = gen_new_label(); + + gen_update_cc_op(s); + gen_op_jz_ecx(s, taken); + gen_conditional_jump_labels(s, decode->immediate, NULL, taken); +} + +static void gen_JMP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_update_cc_op(s); + gen_jmp_rel(s, s->dflag, decode->immediate, 0); +} + +static void gen_JMP_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_op_jmp_v(s, s->T0); + gen_bnd_jmp(s); + s->base.is_jmp = DISAS_JUMP; +} + +static void gen_JMPF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_far_jmp(s); +} + +static void gen_JMPF_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[2].ot; + + gen_op_ld_v(s, ot, s->T0, s->A0); + gen_add_A0_im(s, 1 << ot); + gen_op_ld_v(s, MO_16, s->T1, s->A0); + gen_far_jmp(s); +} + +static void gen_LAHF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM)) { + return gen_illegal_opcode(s); + } + gen_compute_eflags(s); + /* Note: gen_compute_eflags() only gives the condition codes */ + tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02); + tcg_gen_deposit_tl(cpu_regs[R_EAX], cpu_regs[R_EAX], s->T0, 8, 8); +} + static void gen_LDMXCSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T1); gen_helper_ldmxcsr(tcg_env, s->tmp2_i32); } +static void gen_lxx_seg(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, int seg) +{ + MemOp ot = decode->op[0].ot; + + /* Offset already in s->T0. */ + gen_add_A0_im(s, 1 << ot); + gen_op_ld_v(s, MO_16, s->T1, s->A0); + + /* load the segment here to handle exceptions properly */ + gen_movl_seg(s, seg, s->T1); +} + +static void gen_LDS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_lxx_seg(s, env, decode, R_DS); +} + +static void gen_LEA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + tcg_gen_mov_tl(s->T0, s->A0); +} + +static void gen_LEAVE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_leave(s); +} + +static void gen_LES(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_lxx_seg(s, env, decode, R_ES); +} + +static void gen_LFS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_lxx_seg(s, env, decode, R_FS); +} + +static void gen_LGS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_lxx_seg(s, env, decode, R_GS); +} + +static void gen_LODS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[2].ot; + if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { + gen_repz(s, ot, gen_lods); + } else { + gen_lods(s, ot); + } +} + +static void gen_LOOP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + TCGLabel *taken = gen_new_label(); + + gen_update_cc_op(s); + gen_op_add_reg_im(s, s->aflag, R_ECX, -1); + gen_op_jnz_ecx(s, taken); + gen_conditional_jump_labels(s, decode->immediate, NULL, taken); +} + +static void gen_LOOPE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + TCGLabel *taken = gen_new_label(); + TCGLabel *not_taken = gen_new_label(); + + gen_update_cc_op(s); + gen_op_add_reg_im(s, s->aflag, R_ECX, -1); + gen_op_jz_ecx(s, not_taken); + gen_jcc1(s, (JCC_Z << 1), taken); /* jz taken */ + gen_conditional_jump_labels(s, decode->immediate, not_taken, taken); +} + +static void gen_LOOPNE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + TCGLabel *taken = gen_new_label(); + TCGLabel *not_taken = gen_new_label(); + + gen_update_cc_op(s); + gen_op_add_reg_im(s, s->aflag, R_ECX, -1); + gen_op_jz_ecx(s, not_taken); + gen_jcc1(s, (JCC_Z << 1) | 1, taken); /* jnz taken */ + gen_conditional_jump_labels(s, decode->immediate, not_taken, taken); +} + +static void gen_LSS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_lxx_seg(s, env, decode, R_SS); +} + +static void gen_MOV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + /* nothing to do! */ +} +#define gen_NOP gen_MOV + static void gen_MASKMOV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { - gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_DS, s->override); + gen_lea_v_seg(s, cpu_regs[R_EDI], R_DS, s->override); if (s->prefix & PREFIX_DATA) { gen_helper_maskmov_xmm(tcg_env, OP_PTR1, OP_PTR2, s->A0); @@ -1476,6 +2149,67 @@ static void gen_MOVq_dq(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod return gen_MOVQ(s, env, decode); } +static void gen_MOVS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[2].ot; + if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { + gen_repz(s, ot, gen_movs); + } else { + gen_movs(s, ot); + } +} + +static void gen_MUL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[1].ot; + + switch (ot) { + case MO_8: + /* s->T0 already zero-extended */ + tcg_gen_ext8u_tl(s->T1, s->T1); + tcg_gen_mul_tl(s->T0, s->T0, s->T1); + gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0); + tcg_gen_andi_tl(s->T1, s->T0, 0xff00); + decode->cc_dst = s->T0; + decode->cc_src = s->T1; + break; + + case MO_16: + /* s->T0 already zero-extended */ + tcg_gen_ext16u_tl(s->T1, s->T1); + tcg_gen_mul_tl(s->T0, s->T0, s->T1); + gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0); + tcg_gen_shri_tl(s->T1, s->T0, 16); + gen_op_mov_reg_v(s, MO_16, R_EDX, s->T1); + decode->cc_dst = s->T0; + decode->cc_src = s->T1; + break; + + case MO_32: +#ifdef TARGET_X86_64 + /* s->T0 already zero-extended */ + tcg_gen_ext32u_tl(s->T1, s->T1); + tcg_gen_mul_tl(s->T0, s->T0, s->T1); + tcg_gen_ext32u_tl(cpu_regs[R_EAX], s->T0); + tcg_gen_shri_tl(cpu_regs[R_EDX], s->T0, 32); + decode->cc_dst = cpu_regs[R_EAX]; + decode->cc_src = cpu_regs[R_EDX]; + break; + + case MO_64: +#endif + tcg_gen_mulu2_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->T0, s->T1); + decode->cc_dst = cpu_regs[R_EAX]; + decode->cc_src = cpu_regs[R_EDX]; + break; + + default: + g_assert_not_reached(); + } + + decode->cc_op = CC_OP_MULB + ot; +} + static void gen_MULX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; @@ -1502,6 +2236,95 @@ static void gen_MULX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) } } +static void gen_NEG(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[0].ot; + TCGv oldv = tcg_temp_new(); + + if (s->prefix & PREFIX_LOCK) { + TCGv newv = tcg_temp_new(); + TCGv cmpv = tcg_temp_new(); + TCGLabel *label1 = gen_new_label(); + + gen_set_label(label1); + gen_op_ld_v(s, ot, oldv, s->A0); + tcg_gen_neg_tl(newv, oldv); + tcg_gen_atomic_cmpxchg_tl(cmpv, s->A0, oldv, newv, + s->mem_index, ot | MO_LE); + tcg_gen_brcond_tl(TCG_COND_NE, oldv, cmpv, label1); + } else { + tcg_gen_mov_tl(oldv, s->T0); + } + tcg_gen_neg_tl(s->T0, oldv); + + decode->cc_dst = s->T0; + decode->cc_src = oldv; + tcg_gen_movi_tl(s->cc_srcT, 0); + decode->cc_op = CC_OP_SUBB + ot; +} + +static void gen_NOT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[0].ot; + + if (s->prefix & PREFIX_LOCK) { + tcg_gen_movi_tl(s->T0, ~0); + tcg_gen_atomic_xor_fetch_tl(s->T0, s->A0, s->T0, + s->mem_index, ot | MO_LE); + } else { + tcg_gen_not_tl(s->T0, s->T0); + } +} + +static void gen_OR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[1].ot; + + if (s->prefix & PREFIX_LOCK) { + tcg_gen_atomic_or_fetch_tl(s->T0, s->A0, s->T1, + s->mem_index, ot | MO_LE); + } else { + tcg_gen_or_tl(s->T0, s->T0, s->T1); + } + prepare_update1_cc(decode, s, CC_OP_LOGICB + ot); +} + +static void gen_OUT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[1].ot; + TCGv_i32 port = tcg_temp_new_i32(); + TCGv_i32 value = tcg_temp_new_i32(); + + tcg_gen_trunc_tl_i32(port, s->T1); + tcg_gen_ext16u_i32(port, port); + if (!gen_check_io(s, ot, port, 0)) { + return; + } + tcg_gen_trunc_tl_i32(value, s->T0); + translator_io_start(&s->base); + gen_helper_out_func(ot, port, value); + gen_bpt_io(s, port, ot); +} + +static void gen_OUTS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[1].ot; + TCGv_i32 port = tcg_temp_new_i32(); + + tcg_gen_trunc_tl_i32(port, s->T1); + tcg_gen_ext16u_i32(port, port); + if (!gen_check_io(s, ot, port, SVM_IOIO_STR_MASK)) { + return; + } + + translator_io_start(&s->base); + if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { + gen_repz(s, ot, gen_outs); + } else { + gen_outs(s, ot); + } +} + static void gen_PALIGNR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); @@ -1528,14 +2351,14 @@ static void gen_PCMPESTRI(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec { TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); gen_helper_pcmpestri_xmm(tcg_env, OP_PTR1, OP_PTR2, imm); - set_cc_op(s, CC_OP_EFLAGS); + assume_cc_op(s, CC_OP_EFLAGS); } static void gen_PCMPESTRM(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); gen_helper_pcmpestrm_xmm(tcg_env, OP_PTR1, OP_PTR2, imm); - set_cc_op(s, CC_OP_EFLAGS); + assume_cc_op(s, CC_OP_EFLAGS); if ((s->prefix & PREFIX_VEX) && !s->vex_l) { tcg_gen_gvec_dup_imm(MO_64, offsetof(CPUX86State, xmm_regs[0].ZMM_X(1)), 16, 16, 0); @@ -1546,14 +2369,14 @@ static void gen_PCMPISTRI(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec { TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); gen_helper_pcmpistri_xmm(tcg_env, OP_PTR1, OP_PTR2, imm); - set_cc_op(s, CC_OP_EFLAGS); + assume_cc_op(s, CC_OP_EFLAGS); } static void gen_PCMPISTRM(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); gen_helper_pcmpistrm_xmm(tcg_env, OP_PTR1, OP_PTR2, imm); - set_cc_op(s, CC_OP_EFLAGS); + assume_cc_op(s, CC_OP_EFLAGS); if ((s->prefix & PREFIX_VEX) && !s->vex_l) { tcg_gen_gvec_dup_imm(MO_64, offsetof(CPUX86State, xmm_regs[0].ZMM_X(1)), 16, 16, 0); @@ -1695,12 +2518,6 @@ static void gen_pmovmskb_vec(unsigned vece, TCGv_vec d, TCGv_vec s) tcg_gen_or_vec(vece, d, d, t); } -#ifdef TARGET_X86_64 -#define TCG_TARGET_HAS_extract2_tl TCG_TARGET_HAS_extract2_i64 -#else -#define TCG_TARGET_HAS_extract2_tl TCG_TARGET_HAS_extract2_i32 -#endif - static void gen_PMOVMSKB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 }; @@ -1745,6 +2562,45 @@ static void gen_PMOVMSKB(DisasContext *s, CPUX86State *env, X86DecodedInsn *deco } } +static void gen_POP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = gen_pop_T0(s); + if (decode->op[0].has_ea) { + /* NOTE: order is important for MMU exceptions */ + gen_op_st_v(s, ot, s->T0, s->A0); + decode->op[0].unit = X86_OP_SKIP; + } + /* NOTE: writing back registers after update is important for pop %sp */ + gen_pop_update(s, ot); +} + +static void gen_POPA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_popa(s); +} + +static void gen_POPF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot; + int mask = TF_MASK | AC_MASK | ID_MASK | NT_MASK; + + if (CPL(s) == 0) { + mask |= IF_MASK | IOPL_MASK; + } else if (CPL(s) <= IOPL(s)) { + mask |= IF_MASK; + } + if (s->dflag == MO_16) { + mask &= 0xffff; + } + + ot = gen_pop_T0(s); + gen_helper_write_eflags(tcg_env, s->T0, tcg_constant_i32(mask)); + gen_pop_update(s, ot); + set_cc_op(s, CC_OP_EFLAGS); + /* abort translation because TF/AC flag may change */ + s->base.is_jmp = DISAS_EOB_NEXT; +} + static void gen_PSHUFW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); @@ -1891,6 +2747,457 @@ static void gen_PSLLDQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *deco } } +static void gen_PUSH(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_push_v(s, s->T1); +} + +static void gen_PUSHA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_pusha(s); +} + +static void gen_PUSHF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_update_cc_op(s); + gen_helper_read_eflags(s->T0, tcg_env); + gen_push_v(s, s->T0); +} + +static MemOp gen_shift_count(DisasContext *s, X86DecodedInsn *decode, + bool *can_be_zero, TCGv *count) +{ + MemOp ot = decode->op[0].ot; + int mask = (ot <= MO_32 ? 0x1f : 0x3f); + + *can_be_zero = false; + switch (decode->op[2].unit) { + case X86_OP_INT: + *count = tcg_temp_new(); + tcg_gen_andi_tl(*count, s->T1, mask); + *can_be_zero = true; + break; + + case X86_OP_IMM: + if ((decode->immediate & mask) == 0) { + *count = NULL; + break; + } + *count = tcg_temp_new(); + tcg_gen_movi_tl(*count, decode->immediate & mask); + break; + + case X86_OP_SKIP: + *count = tcg_temp_new(); + tcg_gen_movi_tl(*count, 1); + break; + + default: + g_assert_not_reached(); + } + + return ot; +} + +/* + * Compute existing flags in decode->cc_src, for gen_* functions that wants + * to set the cc_op set to CC_OP_ADCOX. In particular, this allows rotate + * operations to compute the carry in decode->cc_dst and the overflow in + * decode->cc_src2. + * + * If need_flags is true, decode->cc_dst and decode->cc_src2 are preloaded + * with the value of CF and OF before the instruction, so that it is possible + * to keep the flags unmodified. + * + * Return true if carry could be made available cheaply as a 1-bit value in + * decode->cc_dst (trying a bit harder if want_carry is true). If false is + * returned, decode->cc_dst is uninitialized and the carry is only available + * as bit 0 of decode->cc_src. + */ +static bool gen_eflags_adcox(DisasContext *s, X86DecodedInsn *decode, bool want_carry, bool need_flags) +{ + bool got_cf = false; + bool got_of = false; + + decode->cc_dst = tcg_temp_new(); + decode->cc_src = tcg_temp_new(); + decode->cc_src2 = tcg_temp_new(); + decode->cc_op = CC_OP_ADCOX; + + /* A lot more cc_ops could be "optimized" to avoid the extracts at + * the end (INC/DEC, BMILG, MUL), but they are all really unlikely + * to be followed by rotations within the same basic block. + */ + switch (s->cc_op) { + case CC_OP_ADCOX: + /* No need to compute the full EFLAGS, CF/OF are already isolated. */ + tcg_gen_mov_tl(decode->cc_src, cpu_cc_src); + if (need_flags) { + tcg_gen_mov_tl(decode->cc_src2, cpu_cc_src2); + got_of = true; + } + if (want_carry || need_flags) { + tcg_gen_mov_tl(decode->cc_dst, cpu_cc_dst); + got_cf = true; + } + break; + + case CC_OP_LOGICB ... CC_OP_LOGICQ: + /* CF and OF are zero, do it just because it's easy. */ + gen_mov_eflags(s, decode->cc_src); + if (need_flags) { + tcg_gen_movi_tl(decode->cc_src2, 0); + got_of = true; + } + if (want_carry || need_flags) { + tcg_gen_movi_tl(decode->cc_dst, 0); + got_cf = true; + } + break; + + case CC_OP_SARB ... CC_OP_SARQ: + /* + * SHR/RCR/SHR/RCR/... is a relatively common occurrence of RCR. + * By computing CF without using eflags, the calls to cc_compute_all + * can be eliminated as dead code (except for the last RCR). + */ + if (want_carry || need_flags) { + tcg_gen_andi_tl(decode->cc_dst, cpu_cc_src, 1); + got_cf = true; + } + gen_mov_eflags(s, decode->cc_src); + break; + + case CC_OP_SHLB ... CC_OP_SHLQ: + /* + * Likewise for SHL/RCL/SHL/RCL/... but, if CF is not in the sign + * bit, we might as well fish CF out of EFLAGS and save a shift. + */ + if (want_carry && (!need_flags || s->cc_op == CC_OP_SHLB + MO_TL)) { + tcg_gen_shri_tl(decode->cc_dst, cpu_cc_src, (8 << (s->cc_op - CC_OP_SHLB)) - 1); + got_cf = true; + } + gen_mov_eflags(s, decode->cc_src); + break; + + default: + gen_mov_eflags(s, decode->cc_src); + break; + } + + if (need_flags) { + /* If the flags could be left unmodified, always load them. */ + if (!got_of) { + tcg_gen_extract_tl(decode->cc_src2, decode->cc_src, ctz32(CC_O), 1); + got_of = true; + } + if (!got_cf) { + tcg_gen_extract_tl(decode->cc_dst, decode->cc_src, ctz32(CC_C), 1); + got_cf = true; + } + } + return got_cf; +} + +static void gen_rot_overflow(X86DecodedInsn *decode, TCGv result, TCGv old, + bool can_be_zero, TCGv count) +{ + MemOp ot = decode->op[0].ot; + TCGv temp = can_be_zero ? tcg_temp_new() : decode->cc_src2; + + tcg_gen_xor_tl(temp, old, result); + tcg_gen_extract_tl(temp, temp, (8 << ot) - 1, 1); + if (can_be_zero) { + tcg_gen_movcond_tl(TCG_COND_EQ, decode->cc_src2, count, tcg_constant_tl(0), + decode->cc_src2, temp); + } +} + +/* + * RCx operations are invariant modulo 8*operand_size+1. For 8 and 16-bit operands, + * this is less than 0x1f (the mask applied by gen_shift_count) so reduce further. + */ +static void gen_rotc_mod(MemOp ot, TCGv count) +{ + TCGv temp; + + switch (ot) { + case MO_8: + temp = tcg_temp_new(); + tcg_gen_subi_tl(temp, count, 18); + tcg_gen_movcond_tl(TCG_COND_GE, count, temp, tcg_constant_tl(0), temp, count); + tcg_gen_subi_tl(temp, count, 9); + tcg_gen_movcond_tl(TCG_COND_GE, count, temp, tcg_constant_tl(0), temp, count); + break; + + case MO_16: + temp = tcg_temp_new(); + tcg_gen_subi_tl(temp, count, 17); + tcg_gen_movcond_tl(TCG_COND_GE, count, temp, tcg_constant_tl(0), temp, count); + break; + + default: + break; + } +} + +/* + * The idea here is that the bit to the right of the new bit 0 is the + * new carry, and the bit to the right of the old bit 0 is the old carry. + * Just like a regular rotation, the result of the rotation is composed + * from a right shifted part and a left shifted part of s->T0. The new carry + * is extracted from the right-shifted portion, and the old carry is + * inserted at the end of the left-shifted portion. + * + * Because of the separate shifts involving the carry, gen_RCL and gen_RCR + * mostly operate on count-1. This also comes in handy when computing + * length - count, because (length-1) - (count-1) can be computed with + * a XOR, and that is commutative unlike subtraction. + */ +static void gen_RCL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + bool have_1bit_cin, can_be_zero; + TCGv count; + TCGLabel *zero_label = NULL; + MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count); + TCGv low, high, low_count; + + if (!count) { + return; + } + + low = tcg_temp_new(); + high = tcg_temp_new(); + low_count = tcg_temp_new(); + + gen_rotc_mod(ot, count); + have_1bit_cin = gen_eflags_adcox(s, decode, true, can_be_zero); + if (can_be_zero) { + zero_label = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, count, 0, zero_label); + } + + /* Compute high part, including incoming carry. */ + if (!have_1bit_cin || TCG_TARGET_deposit_tl_valid(1, TARGET_LONG_BITS - 1)) { + /* high = (T0 << 1) | cin */ + TCGv cin = have_1bit_cin ? decode->cc_dst : decode->cc_src; + tcg_gen_deposit_tl(high, cin, s->T0, 1, TARGET_LONG_BITS - 1); + } else { + /* Same as above but without deposit; cin in cc_dst. */ + tcg_gen_add_tl(high, s->T0, decode->cc_dst); + tcg_gen_add_tl(high, high, s->T0); + } + tcg_gen_subi_tl(count, count, 1); + tcg_gen_shl_tl(high, high, count); + + /* Compute low part and outgoing carry, incoming s->T0 is zero extended */ + tcg_gen_xori_tl(low_count, count, (8 << ot) - 1); /* LENGTH - 1 - (count - 1) */ + tcg_gen_shr_tl(low, s->T0, low_count); + tcg_gen_andi_tl(decode->cc_dst, low, 1); + tcg_gen_shri_tl(low, low, 1); + + /* Compute result and outgoing overflow */ + tcg_gen_mov_tl(decode->cc_src2, s->T0); + tcg_gen_or_tl(s->T0, low, high); + gen_rot_overflow(decode, s->T0, decode->cc_src2, false, NULL); + + if (zero_label) { + gen_set_label(zero_label); + } +} + +static void gen_RCR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + bool have_1bit_cin, can_be_zero; + TCGv count; + TCGLabel *zero_label = NULL; + MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count); + TCGv low, high, high_count; + + if (!count) { + return; + } + + low = tcg_temp_new(); + high = tcg_temp_new(); + high_count = tcg_temp_new(); + + gen_rotc_mod(ot, count); + have_1bit_cin = gen_eflags_adcox(s, decode, true, can_be_zero); + if (can_be_zero) { + zero_label = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, count, 0, zero_label); + } + + /* Save incoming carry into high, it will be shifted later. */ + if (!have_1bit_cin || TCG_TARGET_deposit_tl_valid(1, TARGET_LONG_BITS - 1)) { + TCGv cin = have_1bit_cin ? decode->cc_dst : decode->cc_src; + tcg_gen_deposit_tl(high, cin, s->T0, 1, TARGET_LONG_BITS - 1); + } else { + /* Same as above but without deposit; cin in cc_dst. */ + tcg_gen_add_tl(high, s->T0, decode->cc_dst); + tcg_gen_add_tl(high, high, s->T0); + } + + /* Compute low part and outgoing carry, incoming s->T0 is zero extended */ + tcg_gen_subi_tl(count, count, 1); + tcg_gen_shr_tl(low, s->T0, count); + tcg_gen_andi_tl(decode->cc_dst, low, 1); + tcg_gen_shri_tl(low, low, 1); + + /* Move high part to the right position */ + tcg_gen_xori_tl(high_count, count, (8 << ot) - 1); /* LENGTH - 1 - (count - 1) */ + tcg_gen_shl_tl(high, high, high_count); + + /* Compute result and outgoing overflow */ + tcg_gen_mov_tl(decode->cc_src2, s->T0); + tcg_gen_or_tl(s->T0, low, high); + gen_rot_overflow(decode, s->T0, decode->cc_src2, false, NULL); + + if (zero_label) { + gen_set_label(zero_label); + } +} + +static void gen_RET(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int16_t adjust = decode->e.op2 == X86_TYPE_I ? decode->immediate : 0; + + MemOp ot = gen_pop_T0(s); + gen_stack_update(s, adjust + (1 << ot)); + gen_op_jmp_v(s, s->T0); + gen_bnd_jmp(s); + s->base.is_jmp = DISAS_JUMP; +} + +static void gen_RETF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int16_t adjust = decode->e.op2 == X86_TYPE_I ? decode->immediate : 0; + + if (!PE(s) || VM86(s)) { + gen_lea_ss_ofs(s, s->A0, cpu_regs[R_ESP], 0); + /* pop offset */ + gen_op_ld_v(s, s->dflag, s->T0, s->A0); + /* NOTE: keeping EIP updated is not a problem in case of + exception */ + gen_op_jmp_v(s, s->T0); + /* pop selector */ + gen_add_A0_im(s, 1 << s->dflag); + gen_op_ld_v(s, s->dflag, s->T0, s->A0); + gen_op_movl_seg_real(s, R_CS, s->T0); + /* add stack offset */ + gen_stack_update(s, adjust + (2 << s->dflag)); + } else { + gen_update_cc_op(s); + gen_update_eip_cur(s); + gen_helper_lret_protected(tcg_env, tcg_constant_i32(s->dflag - 1), + tcg_constant_i32(adjust)); + } + s->base.is_jmp = DISAS_EOB_ONLY; +} + +/* + * Return non-NULL if a 32-bit rotate works, after possibly replicating the input. + * The input has already been zero-extended upon operand decode. + */ +static TCGv_i32 gen_rot_replicate(MemOp ot, TCGv in) +{ + TCGv_i32 temp; + switch (ot) { + case MO_8: + temp = tcg_temp_new_i32(); + tcg_gen_trunc_tl_i32(temp, in); + tcg_gen_muli_i32(temp, temp, 0x01010101); + return temp; + + case MO_16: + temp = tcg_temp_new_i32(); + tcg_gen_trunc_tl_i32(temp, in); + tcg_gen_deposit_i32(temp, temp, temp, 16, 16); + return temp; + +#ifdef TARGET_X86_64 + case MO_32: + temp = tcg_temp_new_i32(); + tcg_gen_trunc_tl_i32(temp, in); + return temp; +#endif + + default: + return NULL; + } +} + +static void gen_rot_carry(X86DecodedInsn *decode, TCGv result, + bool can_be_zero, TCGv count, int bit) +{ + if (!can_be_zero) { + tcg_gen_extract_tl(decode->cc_dst, result, bit, 1); + } else { + TCGv temp = tcg_temp_new(); + tcg_gen_extract_tl(temp, result, bit, 1); + tcg_gen_movcond_tl(TCG_COND_EQ, decode->cc_dst, count, tcg_constant_tl(0), + decode->cc_dst, temp); + } +} + +static void gen_ROL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + bool can_be_zero; + TCGv count; + MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count); + TCGv_i32 temp32, count32; + TCGv old = tcg_temp_new(); + + if (!count) { + return; + } + + gen_eflags_adcox(s, decode, false, can_be_zero); + tcg_gen_mov_tl(old, s->T0); + temp32 = gen_rot_replicate(ot, s->T0); + if (temp32) { + count32 = tcg_temp_new_i32(); + tcg_gen_trunc_tl_i32(count32, count); + tcg_gen_rotl_i32(temp32, temp32, count32); + /* Zero extend to facilitate later optimization. */ + tcg_gen_extu_i32_tl(s->T0, temp32); + } else { + tcg_gen_rotl_tl(s->T0, s->T0, count); + } + gen_rot_carry(decode, s->T0, can_be_zero, count, 0); + gen_rot_overflow(decode, s->T0, old, can_be_zero, count); +} + +static void gen_ROR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + bool can_be_zero; + TCGv count; + MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count); + TCGv_i32 temp32, count32; + TCGv old = tcg_temp_new(); + + if (!count) { + return; + } + + gen_eflags_adcox(s, decode, false, can_be_zero); + tcg_gen_mov_tl(old, s->T0); + temp32 = gen_rot_replicate(ot, s->T0); + if (temp32) { + count32 = tcg_temp_new_i32(); + tcg_gen_trunc_tl_i32(count32, count); + tcg_gen_rotr_i32(temp32, temp32, count32); + /* Zero extend to facilitate later optimization. */ + tcg_gen_extu_i32_tl(s->T0, temp32); + gen_rot_carry(decode, s->T0, can_be_zero, count, 31); + } else { + tcg_gen_rotr_tl(s->T0, s->T0, count); + gen_rot_carry(decode, s->T0, can_be_zero, count, TARGET_LONG_BITS - 1); + } + gen_rot_overflow(decode, s->T0, old, can_be_zero, count); +} + static void gen_RORX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; @@ -1915,6 +3222,76 @@ static void gen_RORX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) } } +static void gen_SAHF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM)) { + return gen_illegal_opcode(s); + } + tcg_gen_shri_tl(s->T0, cpu_regs[R_EAX], 8); + gen_compute_eflags(s); + tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O); + tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C); + tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0); +} + +static void gen_SALC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_compute_eflags_c(s, s->T0); + tcg_gen_neg_tl(s->T0, s->T0); +} + +static void gen_shift_dynamic_flags(DisasContext *s, X86DecodedInsn *decode, TCGv count, CCOp cc_op) +{ + TCGv_i32 count32 = tcg_temp_new_i32(); + TCGv_i32 old_cc_op; + + decode->cc_op = CC_OP_DYNAMIC; + decode->cc_op_dynamic = tcg_temp_new_i32(); + + assert(decode->cc_dst == s->T0); + if (cc_op_live[s->cc_op] & USES_CC_DST) { + decode->cc_dst = tcg_temp_new(); + tcg_gen_movcond_tl(TCG_COND_EQ, decode->cc_dst, count, tcg_constant_tl(0), + cpu_cc_dst, s->T0); + } + + if (cc_op_live[s->cc_op] & USES_CC_SRC) { + tcg_gen_movcond_tl(TCG_COND_EQ, decode->cc_src, count, tcg_constant_tl(0), + cpu_cc_src, decode->cc_src); + } + + tcg_gen_trunc_tl_i32(count32, count); + if (s->cc_op == CC_OP_DYNAMIC) { + old_cc_op = cpu_cc_op; + } else { + old_cc_op = tcg_constant_i32(s->cc_op); + } + tcg_gen_movcond_i32(TCG_COND_EQ, decode->cc_op_dynamic, count32, tcg_constant_i32(0), + old_cc_op, tcg_constant_i32(cc_op)); +} + +static void gen_SAR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + bool can_be_zero; + TCGv count; + MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count); + + if (!count) { + return; + } + + decode->cc_dst = s->T0; + decode->cc_src = tcg_temp_new(); + tcg_gen_subi_tl(decode->cc_src, count, 1); + tcg_gen_sar_tl(decode->cc_src, s->T0, decode->cc_src); + tcg_gen_sar_tl(s->T0, s->T0, count); + if (can_be_zero) { + gen_shift_dynamic_flags(s, decode, count, CC_OP_SARB + ot); + } else { + decode->cc_op = CC_OP_SARB + ot; + } +} + static void gen_SARX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; @@ -1925,6 +3302,43 @@ static void gen_SARX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) tcg_gen_sar_tl(s->T0, s->T0, s->T1); } +static void gen_SBB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[0].ot; + TCGv c_in = tcg_temp_new(); + + gen_compute_eflags_c(s, c_in); + if (s->prefix & PREFIX_LOCK) { + tcg_gen_add_tl(s->T0, s->T1, c_in); + tcg_gen_neg_tl(s->T0, s->T0); + tcg_gen_atomic_add_fetch_tl(s->T0, s->A0, s->T0, + s->mem_index, ot | MO_LE); + } else { + /* + * TODO: SBB reg, reg could use gen_prepare_eflags_c followed by + * negsetcond, and CC_OP_SUBB as the cc_op. + */ + tcg_gen_sub_tl(s->T0, s->T0, s->T1); + tcg_gen_sub_tl(s->T0, s->T0, c_in); + } + prepare_update3_cc(decode, s, CC_OP_SBBB + ot, c_in); +} + +static void gen_SCAS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[2].ot; + if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { + gen_repz_nz(s, ot, gen_scas); + } else { + gen_scas(s, ot); + } +} + +static void gen_SETcc(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_setcc1(s, decode->b & 0xf, s->T0); +} + static void gen_SHA1NEXTE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { gen_helper_sha1nexte(OP_PTR0, OP_PTR1, OP_PTR2); @@ -1979,6 +3393,28 @@ static void gen_SHA256RNDS2(DisasContext *s, CPUX86State *env, X86DecodedInsn *d gen_helper_sha256rnds2(OP_PTR0, OP_PTR1, OP_PTR2, wk0, wk1); } +static void gen_SHL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + bool can_be_zero; + TCGv count; + MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count); + + if (!count) { + return; + } + + decode->cc_dst = s->T0; + decode->cc_src = tcg_temp_new(); + tcg_gen_subi_tl(decode->cc_src, count, 1); + tcg_gen_shl_tl(decode->cc_src, s->T0, decode->cc_src); + tcg_gen_shl_tl(s->T0, s->T0, count); + if (can_be_zero) { + gen_shift_dynamic_flags(s, decode, count, CC_OP_SHLB + ot); + } else { + decode->cc_op = CC_OP_SHLB + ot; + } +} + static void gen_SHLX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; @@ -1989,6 +3425,28 @@ static void gen_SHLX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) tcg_gen_shl_tl(s->T0, s->T0, s->T1); } +static void gen_SHR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + bool can_be_zero; + TCGv count; + MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count); + + if (!count) { + return; + } + + decode->cc_dst = s->T0; + decode->cc_src = tcg_temp_new(); + tcg_gen_subi_tl(decode->cc_src, count, 1); + tcg_gen_shr_tl(decode->cc_src, s->T0, decode->cc_src); + tcg_gen_shr_tl(s->T0, s->T0, count); + if (can_be_zero) { + gen_shift_dynamic_flags(s, decode, count, CC_OP_SARB + ot); + } else { + decode->cc_op = CC_OP_SARB + ot; + } +} + static void gen_SHRX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; @@ -1999,6 +3457,23 @@ static void gen_SHRX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) tcg_gen_shr_tl(s->T0, s->T0, s->T1); } +static void gen_STC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_compute_eflags(s); + tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C); +} + +static void gen_STD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + tcg_gen_st_i32(tcg_constant_i32(-1), tcg_env, offsetof(CPUX86State, df)); +} + +static void gen_STI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_set_eflags(s, IF_MASK); + s->base.is_jmp = DISAS_EOB_INHIBIT_IRQ; +} + static void gen_VAESKEYGEN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); @@ -2012,6 +3487,37 @@ static void gen_STMXCSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod tcg_gen_ld32u_tl(s->T0, tcg_env, offsetof(CPUX86State, mxcsr)); } +static void gen_STOS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[1].ot; + if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { + gen_repz(s, ot, gen_stos); + } else { + gen_stos(s, ot); + } +} + +static void gen_SUB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[1].ot; + + if (s->prefix & PREFIX_LOCK) { + tcg_gen_neg_tl(s->T0, s->T1); + tcg_gen_atomic_fetch_add_tl(s->cc_srcT, s->A0, s->T0, + s->mem_index, ot | MO_LE); + tcg_gen_sub_tl(s->T0, s->cc_srcT, s->T1); + } else { + tcg_gen_mov_tl(s->cc_srcT, s->T0); + tcg_gen_sub_tl(s->T0, s->T0, s->T1); + } + prepare_update2_cc(decode, s, CC_OP_SUBB + ot); +} + +static void gen_UD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_illegal_opcode(s); +} + static void gen_VAESIMC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { assert(!s->vex_l); @@ -2083,7 +3589,7 @@ static void gen_VCOMI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) SSEFunc_0_epp fn; fn = s->prefix & PREFIX_DATA ? gen_helper_comisd : gen_helper_comiss; fn(tcg_env, OP_PTR1, OP_PTR2); - set_cc_op(s, CC_OP_EFLAGS); + assume_cc_op(s, CC_OP_EFLAGS); } static void gen_VCVTPD2PS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) @@ -2470,7 +3976,7 @@ static void gen_VUCOMI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode SSEFunc_0_epp fn; fn = s->prefix & PREFIX_DATA ? gen_helper_ucomisd : gen_helper_ucomiss; fn(tcg_env, OP_PTR1, OP_PTR2); - set_cc_op(s, CC_OP_EFLAGS); + assume_cc_op(s, CC_OP_EFLAGS); } static void gen_VZEROALL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) @@ -2491,3 +3997,69 @@ static void gen_VZEROUPPER(DisasContext *s, CPUX86State *env, X86DecodedInsn *de tcg_gen_gvec_dup_imm(MO_64, offset, 16, 16, 0); } } + +static void gen_WAIT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) == (HF_MP_MASK | HF_TS_MASK)) { + gen_NM_exception(s); + } else { + /* needs to be treated as I/O because of ferr_irq */ + translator_io_start(&s->base); + gen_helper_fwait(tcg_env); + } +} + +static void gen_XCHG(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + if (decode->b == 0x90 && !REX_B(s)) { + if (s->prefix & PREFIX_REPZ) { + gen_update_cc_op(s); + gen_update_eip_cur(s); + gen_helper_pause(tcg_env, cur_insn_len_i32(s)); + s->base.is_jmp = DISAS_NORETURN; + } + /* No writeback. */ + decode->op[0].unit = X86_OP_SKIP; + return; + } + + if (s->prefix & PREFIX_LOCK) { + tcg_gen_atomic_xchg_tl(s->T0, s->A0, s->T1, + s->mem_index, decode->op[0].ot | MO_LE); + /* now store old value into register operand */ + gen_op_mov_reg_v(s, decode->op[2].ot, decode->op[2].n, s->T0); + } else { + /* move destination value into source operand, source preserved in T1 */ + gen_op_mov_reg_v(s, decode->op[2].ot, decode->op[2].n, s->T0); + tcg_gen_mov_tl(s->T0, s->T1); + } +} + +static void gen_XLAT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + /* AL is already zero-extended into s->T0. */ + tcg_gen_add_tl(s->A0, cpu_regs[R_EBX], s->T0); + gen_lea_v_seg(s, s->A0, R_DS, s->override); + gen_op_ld_v(s, MO_8, s->T0, s->A0); +} + +static void gen_XOR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + /* special case XOR reg, reg */ + if (decode->op[1].unit == X86_OP_INT && + decode->op[2].unit == X86_OP_INT && + decode->op[1].n == decode->op[2].n) { + tcg_gen_movi_tl(s->T0, 0); + decode->cc_op = CC_OP_CLR; + } else { + MemOp ot = decode->op[1].ot; + + if (s->prefix & PREFIX_LOCK) { + tcg_gen_atomic_xor_fetch_tl(s->T0, s->A0, s->T1, + s->mem_index, ot | MO_LE); + } else { + tcg_gen_xor_tl(s->T0, s->T0, s->T1); + } + prepare_update1_cc(decode, s, CC_OP_LOGICB + ot); + } +} diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c index 4b965a5d6c..e322293371 100644 --- a/target/i386/tcg/fpu_helper.c +++ b/target/i386/tcg/fpu_helper.c @@ -21,11 +21,13 @@ #include <math.h> #include "cpu.h" #include "tcg-cpu.h" +#include "exec/exec-all.h" #include "exec/cpu_ldst.h" #include "exec/helper-proto.h" #include "fpu/softfloat.h" #include "fpu/softfloat-macros.h" #include "helper-tcg.h" +#include "access.h" /* float macros */ #define FT0 (env->ft0) @@ -83,23 +85,22 @@ static inline void fpop(CPUX86State *env) env->fpstt = (env->fpstt + 1) & 7; } -static floatx80 do_fldt(CPUX86State *env, target_ulong ptr, uintptr_t retaddr) +static floatx80 do_fldt(X86Access *ac, target_ulong ptr) { CPU_LDoubleU temp; - temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr); - temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr); + temp.l.lower = access_ldq(ac, ptr); + temp.l.upper = access_ldw(ac, ptr + 8); return temp.d; } -static void do_fstt(CPUX86State *env, floatx80 f, target_ulong ptr, - uintptr_t retaddr) +static void do_fstt(X86Access *ac, target_ulong ptr, floatx80 f) { CPU_LDoubleU temp; temp.d = f; - cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr); - cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr); + access_stq(ac, ptr, temp.l.lower); + access_stw(ac, ptr + 8, temp.l.upper); } /* x87 FPU helpers */ @@ -381,16 +382,22 @@ int64_t helper_fisttll_ST0(CPUX86State *env) void helper_fldt_ST0(CPUX86State *env, target_ulong ptr) { int new_fpstt; + X86Access ac; + + access_prepare(&ac, env, ptr, 10, MMU_DATA_LOAD, GETPC()); new_fpstt = (env->fpstt - 1) & 7; - env->fpregs[new_fpstt].d = do_fldt(env, ptr, GETPC()); + env->fpregs[new_fpstt].d = do_fldt(&ac, ptr); env->fpstt = new_fpstt; env->fptags[new_fpstt] = 0; /* validate stack entry */ } void helper_fstt_ST0(CPUX86State *env, target_ulong ptr) { - do_fstt(env, ST0, ptr, GETPC()); + X86Access ac; + + access_prepare(&ac, env, ptr, 10, MMU_DATA_STORE, GETPC()); + do_fstt(&ac, ptr, ST0); } void helper_fpush(CPUX86State *env) @@ -486,6 +493,7 @@ void helper_fcomi_ST0_FT0(CPUX86State *env) ret = floatx80_compare(ST0, FT0, &env->fp_status); eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C); CC_SRC = eflags | fcomi_ccval[ret + 1]; + CC_OP = CC_OP_EFLAGS; merge_exception_flags(env, old_flags); } @@ -498,6 +506,7 @@ void helper_fucomi_ST0_FT0(CPUX86State *env) ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C); CC_SRC = eflags | fcomi_ccval[ret + 1]; + CC_OP = CC_OP_EFLAGS; merge_exception_flags(env, old_flags); } @@ -766,18 +775,21 @@ void helper_fninit(CPUX86State *env) void helper_fbld_ST0(CPUX86State *env, target_ulong ptr) { + X86Access ac; floatx80 tmp; uint64_t val; unsigned int v; int i; + access_prepare(&ac, env, ptr, 10, MMU_DATA_LOAD, GETPC()); + val = 0; for (i = 8; i >= 0; i--) { - v = cpu_ldub_data_ra(env, ptr + i, GETPC()); + v = access_ldb(&ac, ptr + i); val = (val * 100) + ((v >> 4) * 10) + (v & 0xf); } tmp = int64_to_floatx80(val, &env->fp_status); - if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) { + if (access_ldb(&ac, ptr + 9) & 0x80) { tmp = floatx80_chs(tmp); } fpush(env); @@ -791,7 +803,9 @@ void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) target_ulong mem_ref, mem_end; int64_t val; CPU_LDoubleU temp; + X86Access ac; + access_prepare(&ac, env, ptr, 10, MMU_DATA_STORE, GETPC()); temp.d = ST0; val = floatx80_to_int64(ST0, &env->fp_status); @@ -799,20 +813,20 @@ void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) { set_float_exception_flags(float_flag_invalid, &env->fp_status); while (mem_ref < ptr + 7) { - cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); + access_stb(&ac, mem_ref++, 0); } - cpu_stb_data_ra(env, mem_ref++, 0xc0, GETPC()); - cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); - cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); + access_stb(&ac, mem_ref++, 0xc0); + access_stb(&ac, mem_ref++, 0xff); + access_stb(&ac, mem_ref++, 0xff); merge_exception_flags(env, old_flags); return; } mem_end = mem_ref + 9; if (SIGND(temp)) { - cpu_stb_data_ra(env, mem_end, 0x80, GETPC()); + access_stb(&ac, mem_end, 0x80); val = -val; } else { - cpu_stb_data_ra(env, mem_end, 0x00, GETPC()); + access_stb(&ac, mem_end, 0x00); } while (mem_ref < mem_end) { if (val == 0) { @@ -821,10 +835,10 @@ void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) v = val % 100; val = val / 100; v = ((v / 10) << 4) | (v % 10); - cpu_stb_data_ra(env, mem_ref++, v, GETPC()); + access_stb(&ac, mem_ref++, v); } while (mem_ref < mem_end) { - cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); + access_stb(&ac, mem_ref++, 0); } merge_exception_flags(env, old_flags); } @@ -2361,9 +2375,9 @@ void helper_fxam_ST0(CPUX86State *env) } } -static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32, - uintptr_t retaddr) +static void do_fstenv(X86Access *ac, target_ulong ptr, int data32) { + CPUX86State *env = ac->env; int fpus, fptag, exp, i; uint64_t mant; CPU_LDoubleU tmp; @@ -2390,28 +2404,31 @@ static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32, } if (data32) { /* 32 bit */ - cpu_stl_data_ra(env, ptr, env->fpuc, retaddr); - cpu_stl_data_ra(env, ptr + 4, fpus, retaddr); - cpu_stl_data_ra(env, ptr + 8, fptag, retaddr); - cpu_stl_data_ra(env, ptr + 12, env->fpip, retaddr); /* fpip */ - cpu_stl_data_ra(env, ptr + 16, env->fpcs, retaddr); /* fpcs */ - cpu_stl_data_ra(env, ptr + 20, env->fpdp, retaddr); /* fpoo */ - cpu_stl_data_ra(env, ptr + 24, env->fpds, retaddr); /* fpos */ + access_stl(ac, ptr, env->fpuc); + access_stl(ac, ptr + 4, fpus); + access_stl(ac, ptr + 8, fptag); + access_stl(ac, ptr + 12, env->fpip); /* fpip */ + access_stl(ac, ptr + 16, env->fpcs); /* fpcs */ + access_stl(ac, ptr + 20, env->fpdp); /* fpoo */ + access_stl(ac, ptr + 24, env->fpds); /* fpos */ } else { /* 16 bit */ - cpu_stw_data_ra(env, ptr, env->fpuc, retaddr); - cpu_stw_data_ra(env, ptr + 2, fpus, retaddr); - cpu_stw_data_ra(env, ptr + 4, fptag, retaddr); - cpu_stw_data_ra(env, ptr + 6, env->fpip, retaddr); - cpu_stw_data_ra(env, ptr + 8, env->fpcs, retaddr); - cpu_stw_data_ra(env, ptr + 10, env->fpdp, retaddr); - cpu_stw_data_ra(env, ptr + 12, env->fpds, retaddr); + access_stw(ac, ptr, env->fpuc); + access_stw(ac, ptr + 2, fpus); + access_stw(ac, ptr + 4, fptag); + access_stw(ac, ptr + 6, env->fpip); + access_stw(ac, ptr + 8, env->fpcs); + access_stw(ac, ptr + 10, env->fpdp); + access_stw(ac, ptr + 12, env->fpds); } } void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32) { - do_fstenv(env, ptr, data32, GETPC()); + X86Access ac; + + access_prepare(&ac, env, ptr, 14 << data32, MMU_DATA_STORE, GETPC()); + do_fstenv(&ac, ptr, data32); } static void cpu_set_fpus(CPUX86State *env, uint16_t fpus) @@ -2430,20 +2447,15 @@ static void cpu_set_fpus(CPUX86State *env, uint16_t fpus) #endif } -static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32, - uintptr_t retaddr) +static void do_fldenv(X86Access *ac, target_ulong ptr, int data32) { int i, fpus, fptag; + CPUX86State *env = ac->env; + + cpu_set_fpuc(env, access_ldw(ac, ptr)); + fpus = access_ldw(ac, ptr + (2 << data32)); + fptag = access_ldw(ac, ptr + (4 << data32)); - if (data32) { - cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); - fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr); - fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr); - } else { - cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); - fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr); - fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr); - } cpu_set_fpus(env, fpus); for (i = 0; i < 8; i++) { env->fptags[i] = ((fptag & 3) == 3); @@ -2453,21 +2465,22 @@ static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32, void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32) { - do_fldenv(env, ptr, data32, GETPC()); + X86Access ac; + + access_prepare(&ac, env, ptr, 14 << data32, MMU_DATA_STORE, GETPC()); + do_fldenv(&ac, ptr, data32); } -static void do_fsave(CPUX86State *env, target_ulong ptr, int data32, - uintptr_t retaddr) +static void do_fsave(X86Access *ac, target_ulong ptr, int data32) { - floatx80 tmp; - int i; + CPUX86State *env = ac->env; - do_fstenv(env, ptr, data32, retaddr); + do_fstenv(ac, ptr, data32); + ptr += 14 << data32; - ptr += (target_ulong)14 << data32; - for (i = 0; i < 8; i++) { - tmp = ST(i); - do_fstt(env, tmp, ptr, retaddr); + for (int i = 0; i < 8; i++) { + floatx80 tmp = ST(i); + do_fstt(ac, ptr, tmp); ptr += 10; } @@ -2476,20 +2489,22 @@ static void do_fsave(CPUX86State *env, target_ulong ptr, int data32, void helper_fsave(CPUX86State *env, target_ulong ptr, int data32) { - do_fsave(env, ptr, data32, GETPC()); + int size = (14 << data32) + 80; + X86Access ac; + + access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, GETPC()); + do_fsave(&ac, ptr, data32); } -static void do_frstor(CPUX86State *env, target_ulong ptr, int data32, - uintptr_t retaddr) +static void do_frstor(X86Access *ac, target_ulong ptr, int data32) { - floatx80 tmp; - int i; + CPUX86State *env = ac->env; - do_fldenv(env, ptr, data32, retaddr); - ptr += (target_ulong)14 << data32; + do_fldenv(ac, ptr, data32); + ptr += 14 << data32; - for (i = 0; i < 8; i++) { - tmp = do_fldt(env, ptr, retaddr); + for (int i = 0; i < 8; i++) { + floatx80 tmp = do_fldt(ac, ptr); ST(i) = tmp; ptr += 10; } @@ -2497,13 +2512,18 @@ static void do_frstor(CPUX86State *env, target_ulong ptr, int data32, void helper_frstor(CPUX86State *env, target_ulong ptr, int data32) { - do_frstor(env, ptr, data32, GETPC()); + int size = (14 << data32) + 80; + X86Access ac; + + access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, GETPC()); + do_frstor(&ac, ptr, data32); } #define XO(X) offsetof(X86XSaveArea, X) -static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xsave_fpu(X86Access *ac, target_ulong ptr) { + CPUX86State *env = ac->env; int fpus, fptag, i; target_ulong addr; @@ -2513,33 +2533,37 @@ static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) fptag |= (env->fptags[i] << i); } - cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra); - cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra); - cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra); + access_stw(ac, ptr + XO(legacy.fcw), env->fpuc); + access_stw(ac, ptr + XO(legacy.fsw), fpus); + access_stw(ac, ptr + XO(legacy.ftw), fptag ^ 0xff); /* In 32-bit mode this is eip, sel, dp, sel. In 64-bit mode this is rip, rdp. But in either case we don't write actual data, just zeros. */ - cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */ - cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */ + access_stq(ac, ptr + XO(legacy.fpip), 0); /* eip+sel; rip */ + access_stq(ac, ptr + XO(legacy.fpdp), 0); /* edp+sel; rdp */ addr = ptr + XO(legacy.fpregs); + for (i = 0; i < 8; i++) { floatx80 tmp = ST(i); - do_fstt(env, tmp, addr, ra); + do_fstt(ac, addr, tmp); addr += 16; } } -static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xsave_mxcsr(X86Access *ac, target_ulong ptr) { + CPUX86State *env = ac->env; + update_mxcsr_from_sse_status(env); - cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra); - cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra); + access_stl(ac, ptr + XO(legacy.mxcsr), env->mxcsr); + access_stl(ac, ptr + XO(legacy.mxcsr_mask), 0x0000ffff); } -static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xsave_sse(X86Access *ac, target_ulong ptr) { + CPUX86State *env = ac->env; int i, nb_xmm_regs; target_ulong addr; @@ -2551,14 +2575,15 @@ static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) addr = ptr + XO(legacy.xmm_regs); for (i = 0; i < nb_xmm_regs; i++) { - cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra); - cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra); + access_stq(ac, addr, env->xmm_regs[i].ZMM_Q(0)); + access_stq(ac, addr + 8, env->xmm_regs[i].ZMM_Q(1)); addr += 16; } } -static void do_xsave_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xsave_ymmh(X86Access *ac, target_ulong ptr) { + CPUX86State *env = ac->env; int i, nb_xmm_regs; if (env->hflags & HF_CS64_MASK) { @@ -2568,58 +2593,67 @@ static void do_xsave_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra) } for (i = 0; i < nb_xmm_regs; i++, ptr += 16) { - cpu_stq_data_ra(env, ptr, env->xmm_regs[i].ZMM_Q(2), ra); - cpu_stq_data_ra(env, ptr + 8, env->xmm_regs[i].ZMM_Q(3), ra); + access_stq(ac, ptr, env->xmm_regs[i].ZMM_Q(2)); + access_stq(ac, ptr + 8, env->xmm_regs[i].ZMM_Q(3)); } } -static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xsave_bndregs(X86Access *ac, target_ulong ptr) { + CPUX86State *env = ac->env; target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); int i; for (i = 0; i < 4; i++, addr += 16) { - cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra); - cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra); + access_stq(ac, addr, env->bnd_regs[i].lb); + access_stq(ac, addr + 8, env->bnd_regs[i].ub); } } -static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xsave_bndcsr(X86Access *ac, target_ulong ptr) { - cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), - env->bndcs_regs.cfgu, ra); - cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), - env->bndcs_regs.sts, ra); + CPUX86State *env = ac->env; + + access_stq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), + env->bndcs_regs.cfgu); + access_stq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), + env->bndcs_regs.sts); } -static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xsave_pkru(X86Access *ac, target_ulong ptr) { - cpu_stq_data_ra(env, ptr, env->pkru, ra); + access_stq(ac, ptr, ac->env->pkru); } -static void do_fxsave(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_fxsave(X86Access *ac, target_ulong ptr) { - /* The operand must be 16 byte aligned */ - if (ptr & 0xf) { - raise_exception_ra(env, EXCP0D_GPF, ra); - } - - do_xsave_fpu(env, ptr, ra); + CPUX86State *env = ac->env; + do_xsave_fpu(ac, ptr); if (env->cr[4] & CR4_OSFXSR_MASK) { - do_xsave_mxcsr(env, ptr, ra); + do_xsave_mxcsr(ac, ptr); /* Fast FXSAVE leaves out the XMM registers */ if (!(env->efer & MSR_EFER_FFXSR) || (env->hflags & HF_CPL_MASK) || !(env->hflags & HF_LMA_MASK)) { - do_xsave_sse(env, ptr, ra); + do_xsave_sse(ac, ptr); } } } void helper_fxsave(CPUX86State *env, target_ulong ptr) { - do_fxsave(env, ptr, GETPC()); + uintptr_t ra = GETPC(); + X86Access ac; + + /* The operand must be 16 byte aligned */ + if (ptr & 0xf) { + raise_exception_ra(env, EXCP0D_GPF, ra); + } + + access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea), + MMU_DATA_STORE, ra); + do_fxsave(&ac, ptr); } static uint64_t get_xinuse(CPUX86State *env) @@ -2636,57 +2670,73 @@ static uint64_t get_xinuse(CPUX86State *env) return inuse; } -static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm, - uint64_t inuse, uint64_t opt, uintptr_t ra) +static void do_xsave_access(X86Access *ac, target_ulong ptr, uint64_t rfbm, + uint64_t inuse, uint64_t opt) { uint64_t old_bv, new_bv; - /* The OS must have enabled XSAVE. */ - if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { - raise_exception_ra(env, EXCP06_ILLOP, ra); - } - - /* The operand must be 64 byte aligned. */ - if (ptr & 63) { - raise_exception_ra(env, EXCP0D_GPF, ra); - } - - /* Never save anything not enabled by XCR0. */ - rfbm &= env->xcr0; - opt &= rfbm; - if (opt & XSTATE_FP_MASK) { - do_xsave_fpu(env, ptr, ra); + do_xsave_fpu(ac, ptr); } if (rfbm & XSTATE_SSE_MASK) { /* Note that saving MXCSR is not suppressed by XSAVEOPT. */ - do_xsave_mxcsr(env, ptr, ra); + do_xsave_mxcsr(ac, ptr); } if (opt & XSTATE_SSE_MASK) { - do_xsave_sse(env, ptr, ra); + do_xsave_sse(ac, ptr); } if (opt & XSTATE_YMM_MASK) { - do_xsave_ymmh(env, ptr + XO(avx_state), ra); + do_xsave_ymmh(ac, ptr + XO(avx_state)); } if (opt & XSTATE_BNDREGS_MASK) { - do_xsave_bndregs(env, ptr + XO(bndreg_state), ra); + do_xsave_bndregs(ac, ptr + XO(bndreg_state)); } if (opt & XSTATE_BNDCSR_MASK) { - do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra); + do_xsave_bndcsr(ac, ptr + XO(bndcsr_state)); } if (opt & XSTATE_PKRU_MASK) { - do_xsave_pkru(env, ptr + XO(pkru_state), ra); + do_xsave_pkru(ac, ptr + XO(pkru_state)); } /* Update the XSTATE_BV field. */ - old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); + old_bv = access_ldq(ac, ptr + XO(header.xstate_bv)); new_bv = (old_bv & ~rfbm) | (inuse & rfbm); - cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra); + access_stq(ac, ptr + XO(header.xstate_bv), new_bv); +} + +static void do_xsave_chk(CPUX86State *env, target_ulong ptr, uintptr_t ra) +{ + /* The OS must have enabled XSAVE. */ + if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { + raise_exception_ra(env, EXCP06_ILLOP, ra); + } + + /* The operand must be 64 byte aligned. */ + if (ptr & 63) { + raise_exception_ra(env, EXCP0D_GPF, ra); + } +} + +static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm, + uint64_t inuse, uint64_t opt, uintptr_t ra) +{ + X86Access ac; + unsigned size; + + do_xsave_chk(env, ptr, ra); + + /* Never save anything not enabled by XCR0. */ + rfbm &= env->xcr0; + opt &= rfbm; + size = xsave_area_size(opt, false); + + access_prepare(&ac, env, ptr, size, MMU_DATA_STORE, ra); + do_xsave_access(&ac, ptr, rfbm, inuse, opt); } void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm) { - do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC()); + do_xsave(env, ptr, rfbm, get_xinuse(env), rfbm, GETPC()); } void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm) @@ -2695,36 +2745,41 @@ void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm) do_xsave(env, ptr, rfbm, inuse, inuse, GETPC()); } -static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xrstor_fpu(X86Access *ac, target_ulong ptr) { + CPUX86State *env = ac->env; int i, fpuc, fpus, fptag; target_ulong addr; - fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra); - fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra); - fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra); + fpuc = access_ldw(ac, ptr + XO(legacy.fcw)); + fpus = access_ldw(ac, ptr + XO(legacy.fsw)); + fptag = access_ldw(ac, ptr + XO(legacy.ftw)); cpu_set_fpuc(env, fpuc); cpu_set_fpus(env, fpus); + fptag ^= 0xff; for (i = 0; i < 8; i++) { env->fptags[i] = ((fptag >> i) & 1); } addr = ptr + XO(legacy.fpregs); + for (i = 0; i < 8; i++) { - floatx80 tmp = do_fldt(env, addr, ra); + floatx80 tmp = do_fldt(ac, addr); ST(i) = tmp; addr += 16; } } -static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xrstor_mxcsr(X86Access *ac, target_ulong ptr) { - cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra)); + CPUX86State *env = ac->env; + cpu_set_mxcsr(env, access_ldl(ac, ptr + XO(legacy.mxcsr))); } -static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xrstor_sse(X86Access *ac, target_ulong ptr) { + CPUX86State *env = ac->env; int i, nb_xmm_regs; target_ulong addr; @@ -2736,8 +2791,8 @@ static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) addr = ptr + XO(legacy.xmm_regs); for (i = 0; i < nb_xmm_regs; i++) { - env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra); - env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra); + env->xmm_regs[i].ZMM_Q(0) = access_ldq(ac, addr); + env->xmm_regs[i].ZMM_Q(1) = access_ldq(ac, addr + 8); addr += 16; } } @@ -2758,8 +2813,9 @@ static void do_clear_sse(CPUX86State *env) } } -static void do_xrstor_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xrstor_ymmh(X86Access *ac, target_ulong ptr) { + CPUX86State *env = ac->env; int i, nb_xmm_regs; if (env->hflags & HF_CS64_MASK) { @@ -2769,8 +2825,8 @@ static void do_xrstor_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra) } for (i = 0; i < nb_xmm_regs; i++, ptr += 16) { - env->xmm_regs[i].ZMM_Q(2) = cpu_ldq_data_ra(env, ptr, ra); - env->xmm_regs[i].ZMM_Q(3) = cpu_ldq_data_ra(env, ptr + 8, ra); + env->xmm_regs[i].ZMM_Q(2) = access_ldq(ac, ptr); + env->xmm_regs[i].ZMM_Q(3) = access_ldq(ac, ptr + 8); } } @@ -2790,100 +2846,97 @@ static void do_clear_ymmh(CPUX86State *env) } } -static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xrstor_bndregs(X86Access *ac, target_ulong ptr) { + CPUX86State *env = ac->env; target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); int i; for (i = 0; i < 4; i++, addr += 16) { - env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra); - env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra); + env->bnd_regs[i].lb = access_ldq(ac, addr); + env->bnd_regs[i].ub = access_ldq(ac, addr + 8); } } -static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xrstor_bndcsr(X86Access *ac, target_ulong ptr) { + CPUX86State *env = ac->env; + /* FIXME: Extend highest implemented bit of linear address. */ env->bndcs_regs.cfgu - = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra); + = access_ldq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu)); env->bndcs_regs.sts - = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra); + = access_ldq(ac, ptr + offsetof(XSaveBNDCSR, bndcsr.sts)); } -static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_xrstor_pkru(X86Access *ac, target_ulong ptr) { - env->pkru = cpu_ldq_data_ra(env, ptr, ra); + ac->env->pkru = access_ldq(ac, ptr); } -static void do_fxrstor(CPUX86State *env, target_ulong ptr, uintptr_t ra) +static void do_fxrstor(X86Access *ac, target_ulong ptr) { - /* The operand must be 16 byte aligned */ - if (ptr & 0xf) { - raise_exception_ra(env, EXCP0D_GPF, ra); - } - - do_xrstor_fpu(env, ptr, ra); + CPUX86State *env = ac->env; + do_xrstor_fpu(ac, ptr); if (env->cr[4] & CR4_OSFXSR_MASK) { - do_xrstor_mxcsr(env, ptr, ra); + do_xrstor_mxcsr(ac, ptr); /* Fast FXRSTOR leaves out the XMM registers */ if (!(env->efer & MSR_EFER_FFXSR) || (env->hflags & HF_CPL_MASK) || !(env->hflags & HF_LMA_MASK)) { - do_xrstor_sse(env, ptr, ra); + do_xrstor_sse(ac, ptr); } } } void helper_fxrstor(CPUX86State *env, target_ulong ptr) { - do_fxrstor(env, ptr, GETPC()); + uintptr_t ra = GETPC(); + X86Access ac; + + /* The operand must be 16 byte aligned */ + if (ptr & 0xf) { + raise_exception_ra(env, EXCP0D_GPF, ra); + } + + access_prepare(&ac, env, ptr, sizeof(X86LegacyXSaveArea), + MMU_DATA_LOAD, ra); + do_fxrstor(&ac, ptr); } -static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr_t ra) +static bool valid_xrstor_header(X86Access *ac, uint64_t *pxsbv, + target_ulong ptr) { uint64_t xstate_bv, xcomp_bv, reserve0; - rfbm &= env->xcr0; + xstate_bv = access_ldq(ac, ptr + XO(header.xstate_bv)); + xcomp_bv = access_ldq(ac, ptr + XO(header.xcomp_bv)); + reserve0 = access_ldq(ac, ptr + XO(header.reserve0)); + *pxsbv = xstate_bv; - /* The OS must have enabled XSAVE. */ - if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { - raise_exception_ra(env, EXCP06_ILLOP, ra); - } - - /* The operand must be 64 byte aligned. */ - if (ptr & 63) { - raise_exception_ra(env, EXCP0D_GPF, ra); - } - - xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); - - if ((int64_t)xstate_bv < 0) { - /* FIXME: Compact form. */ - raise_exception_ra(env, EXCP0D_GPF, ra); + /* + * XCOMP_BV bit 63 indicates compact form, which we do not support, + * and thus must raise #GP. That leaves us in standard form. + * In standard form, bytes 23:8 must be zero -- which is both + * XCOMP_BV and the following 64-bit field. + */ + if (xcomp_bv || reserve0) { + return false; } - /* Standard form. */ - /* The XSTATE_BV field must not set bits not present in XCR0. */ - if (xstate_bv & ~env->xcr0) { - raise_exception_ra(env, EXCP0D_GPF, ra); - } + return (xstate_bv & ~ac->env->xcr0) == 0; +} - /* The XCOMP_BV field must be zero. Note that, as of the April 2016 - revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2) - describes only XCOMP_BV, but the description of the standard form - of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which - includes the next 64-bit field. */ - xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra); - reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra); - if (xcomp_bv || reserve0) { - raise_exception_ra(env, EXCP0D_GPF, ra); - } +static void do_xrstor(X86Access *ac, target_ulong ptr, + uint64_t rfbm, uint64_t xstate_bv) +{ + CPUX86State *env = ac->env; if (rfbm & XSTATE_FP_MASK) { if (xstate_bv & XSTATE_FP_MASK) { - do_xrstor_fpu(env, ptr, ra); + do_xrstor_fpu(ac, ptr); } else { do_fninit(env); memset(env->fpregs, 0, sizeof(env->fpregs)); @@ -2892,23 +2945,23 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr if (rfbm & XSTATE_SSE_MASK) { /* Note that the standard form of XRSTOR loads MXCSR from memory whether or not the XSTATE_BV bit is set. */ - do_xrstor_mxcsr(env, ptr, ra); + do_xrstor_mxcsr(ac, ptr); if (xstate_bv & XSTATE_SSE_MASK) { - do_xrstor_sse(env, ptr, ra); + do_xrstor_sse(ac, ptr); } else { do_clear_sse(env); } } if (rfbm & XSTATE_YMM_MASK) { if (xstate_bv & XSTATE_YMM_MASK) { - do_xrstor_ymmh(env, ptr + XO(avx_state), ra); + do_xrstor_ymmh(ac, ptr + XO(avx_state)); } else { do_clear_ymmh(env); } } if (rfbm & XSTATE_BNDREGS_MASK) { if (xstate_bv & XSTATE_BNDREGS_MASK) { - do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra); + do_xrstor_bndregs(ac, ptr + XO(bndreg_state)); env->hflags |= HF_MPX_IU_MASK; } else { memset(env->bnd_regs, 0, sizeof(env->bnd_regs)); @@ -2917,7 +2970,7 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr } if (rfbm & XSTATE_BNDCSR_MASK) { if (xstate_bv & XSTATE_BNDCSR_MASK) { - do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra); + do_xrstor_bndcsr(ac, ptr + XO(bndcsr_state)); } else { memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs)); } @@ -2926,7 +2979,7 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr if (rfbm & XSTATE_PKRU_MASK) { uint64_t old_pkru = env->pkru; if (xstate_bv & XSTATE_PKRU_MASK) { - do_xrstor_pkru(env, ptr + XO(pkru_state), ra); + do_xrstor_pkru(ac, ptr + XO(pkru_state)); } else { env->pkru = 0; } @@ -2941,38 +2994,117 @@ static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) { - do_xrstor(env, ptr, rfbm, GETPC()); + uintptr_t ra = GETPC(); + X86Access ac; + uint64_t xstate_bv; + unsigned size, size_ext; + + do_xsave_chk(env, ptr, ra); + + /* Begin with just the minimum size to validate the header. */ + size = sizeof(X86LegacyXSaveArea) + sizeof(X86XSaveHeader); + access_prepare(&ac, env, ptr, size, MMU_DATA_LOAD, ra); + if (!valid_xrstor_header(&ac, &xstate_bv, ptr)) { + raise_exception_ra(env, EXCP0D_GPF, ra); + } + + rfbm &= env->xcr0; + size_ext = xsave_area_size(rfbm & xstate_bv, false); + if (size < size_ext) { + /* TODO: See if existing page probe has covered extra size. */ + access_prepare(&ac, env, ptr, size_ext, MMU_DATA_LOAD, ra); + } + + do_xrstor(&ac, ptr, rfbm, xstate_bv); } #if defined(CONFIG_USER_ONLY) -void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32) +void cpu_x86_fsave(CPUX86State *env, void *host, size_t len) { - do_fsave(env, ptr, data32, 0); + X86Access ac = { + .haddr1 = host, + .size = 4 * 7 + 8 * 10, + .env = env, + }; + + assert(ac.size <= len); + do_fsave(&ac, 0, true); } -void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32) +void cpu_x86_frstor(CPUX86State *env, void *host, size_t len) { - do_frstor(env, ptr, data32, 0); + X86Access ac = { + .haddr1 = host, + .size = 4 * 7 + 8 * 10, + .env = env, + }; + + assert(ac.size <= len); + do_frstor(&ac, 0, true); } -void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr) +void cpu_x86_fxsave(CPUX86State *env, void *host, size_t len) { - do_fxsave(env, ptr, 0); + X86Access ac = { + .haddr1 = host, + .size = sizeof(X86LegacyXSaveArea), + .env = env, + }; + + assert(ac.size <= len); + do_fxsave(&ac, 0); } -void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr) +void cpu_x86_fxrstor(CPUX86State *env, void *host, size_t len) { - do_fxrstor(env, ptr, 0); + X86Access ac = { + .haddr1 = host, + .size = sizeof(X86LegacyXSaveArea), + .env = env, + }; + + assert(ac.size <= len); + do_fxrstor(&ac, 0); } -void cpu_x86_xsave(CPUX86State *env, target_ulong ptr) +void cpu_x86_xsave(CPUX86State *env, void *host, size_t len, uint64_t rfbm) { - do_xsave(env, ptr, -1, get_xinuse(env), -1, 0); + X86Access ac = { + .haddr1 = host, + .env = env, + }; + + /* + * Since this is only called from user-level signal handling, + * we should have done the job correctly there. + */ + assert((rfbm & ~env->xcr0) == 0); + ac.size = xsave_area_size(rfbm, false); + assert(ac.size <= len); + do_xsave_access(&ac, 0, rfbm, get_xinuse(env), rfbm); } -void cpu_x86_xrstor(CPUX86State *env, target_ulong ptr) +bool cpu_x86_xrstor(CPUX86State *env, void *host, size_t len, uint64_t rfbm) { - do_xrstor(env, ptr, -1, 0); + X86Access ac = { + .haddr1 = host, + .env = env, + }; + uint64_t xstate_bv; + + /* + * Since this is only called from user-level signal handling, + * we should have done the job correctly there. + */ + assert((rfbm & ~env->xcr0) == 0); + ac.size = xsave_area_size(rfbm, false); + assert(ac.size <= len); + + if (!valid_xrstor_header(&ac, &xstate_bv, 0)) { + return false; + } + do_xrstor(&ac, 0, rfbm, xstate_bv); + return true; } #endif diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h index effc2c1c98..85957943bf 100644 --- a/target/i386/tcg/helper-tcg.h +++ b/target/i386/tcg/helper-tcg.h @@ -39,7 +39,7 @@ QEMU_BUILD_BUG_ON(TCG_PHYS_ADDR_BITS > TARGET_PHYS_ADDR_SPACE_BITS); */ void x86_cpu_do_interrupt(CPUState *cpu); #ifndef CONFIG_USER_ONLY -void x86_cpu_exec_halt(CPUState *cpu); +bool x86_cpu_exec_halt(CPUState *cpu); bool x86_need_replay_interrupt(int interrupt_request); bool x86_cpu_exec_interrupt(CPUState *cpu, int int_req); #endif diff --git a/target/i386/tcg/int_helper.c b/target/i386/tcg/int_helper.c index ab85dc5540..e1f9240528 100644 --- a/target/i386/tcg/int_helper.c +++ b/target/i386/tcg/int_helper.c @@ -29,22 +29,6 @@ //#define DEBUG_MULDIV -/* modulo 9 table */ -static const uint8_t rclb_table[32] = { - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 0, 1, 2, 3, 4, 5, 6, - 7, 8, 0, 1, 2, 3, 4, 5, - 6, 7, 8, 0, 1, 2, 3, 4, -}; - -/* modulo 17 table */ -static const uint8_t rclw_table[32] = { - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 0, 1, 2, 3, 4, 5, 6, - 7, 8, 9, 10, 11, 12, 13, 14, -}; - /* division, flags are undefined */ void helper_divb_AL(CPUX86State *env, target_ulong t0) @@ -161,27 +145,24 @@ void helper_idivl_EAX(CPUX86State *env, target_ulong t0) /* bcd */ -/* XXX: exception */ -void helper_aam(CPUX86State *env, int base) +target_ulong helper_aam(target_ulong al, target_ulong base) { - int al, ah; + int ah; - al = env->regs[R_EAX] & 0xff; + al &= 0xff; ah = al / base; al = al % base; - env->regs[R_EAX] = (env->regs[R_EAX] & ~0xffff) | al | (ah << 8); - CC_DST = al; + return al | (ah << 8); } -void helper_aad(CPUX86State *env, int base) +target_ulong helper_aad(target_ulong ax, target_ulong base) { int al, ah; - al = env->regs[R_EAX] & 0xff; - ah = (env->regs[R_EAX] >> 8) & 0xff; + al = ax & 0xff; + ah = (ax >> 8) & 0xff; al = ((ah * base) + al) & 0xff; - env->regs[R_EAX] = (env->regs[R_EAX] & ~0xffff) | al; - CC_DST = al; + return al; } void helper_aaa(CPUX86State *env) @@ -206,6 +187,7 @@ void helper_aaa(CPUX86State *env) } env->regs[R_EAX] = (env->regs[R_EAX] & ~0xffff) | al | (ah << 8); CC_SRC = eflags; + CC_OP = CC_OP_EFLAGS; } void helper_aas(CPUX86State *env) @@ -230,6 +212,7 @@ void helper_aas(CPUX86State *env) } env->regs[R_EAX] = (env->regs[R_EAX] & ~0xffff) | al | (ah << 8); CC_SRC = eflags; + CC_OP = CC_OP_EFLAGS; } void helper_daa(CPUX86State *env) @@ -257,6 +240,7 @@ void helper_daa(CPUX86State *env) eflags |= parity_table[al]; /* pf */ eflags |= (al & 0x80); /* sf */ CC_SRC = eflags; + CC_OP = CC_OP_EFLAGS; } void helper_das(CPUX86State *env) @@ -288,6 +272,7 @@ void helper_das(CPUX86State *env) eflags |= parity_table[al]; /* pf */ eflags |= (al & 0x80); /* sf */ CC_SRC = eflags; + CC_OP = CC_OP_EFLAGS; } #ifdef TARGET_X86_64 @@ -447,24 +432,6 @@ target_ulong helper_pext(target_ulong src, target_ulong mask) return dest; } -#define SHIFT 0 -#include "shift_helper_template.h.inc" -#undef SHIFT - -#define SHIFT 1 -#include "shift_helper_template.h.inc" -#undef SHIFT - -#define SHIFT 2 -#include "shift_helper_template.h.inc" -#undef SHIFT - -#ifdef TARGET_X86_64 -#define SHIFT 3 -#include "shift_helper_template.h.inc" -#undef SHIFT -#endif - /* Test that BIT is enabled in CR4. If not, raise an illegal opcode exception. This reduces the requirements for rare CR4 bits being mapped into HFLAGS. */ @@ -486,10 +453,11 @@ target_ulong HELPER(rdrand)(CPUX86State *env) error_free(err); /* Failure clears CF and all other flags, and returns 0. */ env->cc_src = 0; - return 0; + ret = 0; + } else { + /* Success sets CF and clears all others. */ + env->cc_src = CC_C; } - - /* Success sets CF and clears all others. */ - env->cc_src = CC_C; + env->cc_op = CC_OP_EFLAGS; return ret; } diff --git a/target/i386/tcg/meson.build b/target/i386/tcg/meson.build index f9110e890c..1105b35d92 100644 --- a/target/i386/tcg/meson.build +++ b/target/i386/tcg/meson.build @@ -1,4 +1,5 @@ i386_ss.add(when: 'CONFIG_TCG', if_true: files( + 'access.c', 'bpt_helper.c', 'cc_helper.c', 'excp_helper.c', diff --git a/target/i386/tcg/seg_helper.c b/target/i386/tcg/seg_helper.c index 34ccabd8ce..0301459004 100644 --- a/target/i386/tcg/seg_helper.c +++ b/target/i386/tcg/seg_helper.c @@ -2326,7 +2326,7 @@ void helper_verr(CPUX86State *env, target_ulong selector1) int rpl, dpl, cpl; selector = selector1 & 0xffff; - eflags = cpu_cc_compute_all(env); + eflags = cpu_cc_compute_all(env) | CC_Z; if ((selector & 0xfffc) == 0) { goto fail; } @@ -2351,11 +2351,11 @@ void helper_verr(CPUX86State *env, target_ulong selector1) } else { if (dpl < cpl || dpl < rpl) { fail: - CC_SRC = eflags & ~CC_Z; - return; + eflags &= ~CC_Z; } } - CC_SRC = eflags | CC_Z; + CC_SRC = eflags; + CC_OP = CC_OP_EFLAGS; } void helper_verw(CPUX86State *env, target_ulong selector1) @@ -2364,7 +2364,7 @@ void helper_verw(CPUX86State *env, target_ulong selector1) int rpl, dpl, cpl; selector = selector1 & 0xffff; - eflags = cpu_cc_compute_all(env); + eflags = cpu_cc_compute_all(env) | CC_Z; if ((selector & 0xfffc) == 0) { goto fail; } @@ -2385,9 +2385,9 @@ void helper_verw(CPUX86State *env, target_ulong selector1) } if (!(e2 & DESC_W_MASK)) { fail: - CC_SRC = eflags & ~CC_Z; - return; + eflags &= ~CC_Z; } } - CC_SRC = eflags | CC_Z; + CC_SRC = eflags; + CC_OP = CC_OP_EFLAGS; } diff --git a/target/i386/tcg/shift_helper_template.h.inc b/target/i386/tcg/shift_helper_template.h.inc deleted file mode 100644 index 54f15d6e05..0000000000 --- a/target/i386/tcg/shift_helper_template.h.inc +++ /dev/null @@ -1,108 +0,0 @@ -/* - * x86 shift helpers - * - * Copyright (c) 2008 Fabrice Bellard - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see <http://www.gnu.org/licenses/>. - */ - -#define DATA_BITS (1 << (3 + SHIFT)) -#define SHIFT_MASK (DATA_BITS - 1) -#if DATA_BITS <= 32 -#define SHIFT1_MASK 0x1f -#else -#define SHIFT1_MASK 0x3f -#endif - -#if DATA_BITS == 8 -#define SUFFIX b -#define DATA_MASK 0xff -#elif DATA_BITS == 16 -#define SUFFIX w -#define DATA_MASK 0xffff -#elif DATA_BITS == 32 -#define SUFFIX l -#define DATA_MASK 0xffffffff -#elif DATA_BITS == 64 -#define SUFFIX q -#define DATA_MASK 0xffffffffffffffffULL -#else -#error unhandled operand size -#endif - -target_ulong glue(helper_rcl, SUFFIX)(CPUX86State *env, target_ulong t0, - target_ulong t1) -{ - int count, eflags; - target_ulong src; - target_long res; - - count = t1 & SHIFT1_MASK; -#if DATA_BITS == 16 - count = rclw_table[count]; -#elif DATA_BITS == 8 - count = rclb_table[count]; -#endif - if (count) { - eflags = env->cc_src; - t0 &= DATA_MASK; - src = t0; - res = (t0 << count) | ((target_ulong)(eflags & CC_C) << (count - 1)); - if (count > 1) { - res |= t0 >> (DATA_BITS + 1 - count); - } - t0 = res; - env->cc_src = (eflags & ~(CC_C | CC_O)) | - (lshift(src ^ t0, 11 - (DATA_BITS - 1)) & CC_O) | - ((src >> (DATA_BITS - count)) & CC_C); - } - return t0; -} - -target_ulong glue(helper_rcr, SUFFIX)(CPUX86State *env, target_ulong t0, - target_ulong t1) -{ - int count, eflags; - target_ulong src; - target_long res; - - count = t1 & SHIFT1_MASK; -#if DATA_BITS == 16 - count = rclw_table[count]; -#elif DATA_BITS == 8 - count = rclb_table[count]; -#endif - if (count) { - eflags = env->cc_src; - t0 &= DATA_MASK; - src = t0; - res = (t0 >> count) | - ((target_ulong)(eflags & CC_C) << (DATA_BITS - count)); - if (count > 1) { - res |= t0 << (DATA_BITS + 1 - count); - } - t0 = res; - env->cc_src = (eflags & ~(CC_C | CC_O)) | - (lshift(src ^ t0, 11 - (DATA_BITS - 1)) & CC_O) | - ((src >> (count - 1)) & CC_C); - } - return t0; -} - -#undef DATA_BITS -#undef SHIFT_MASK -#undef SHIFT1_MASK -#undef DATA_TYPE -#undef DATA_MASK -#undef SUFFIX diff --git a/target/i386/tcg/sysemu/excp_helper.c b/target/i386/tcg/sysemu/excp_helper.c index 7a57b7dd10..8fb05b1f53 100644 --- a/target/i386/tcg/sysemu/excp_helper.c +++ b/target/i386/tcg/sysemu/excp_helper.c @@ -21,6 +21,7 @@ #include "cpu.h" #include "exec/cpu_ldst.h" #include "exec/exec-all.h" +#include "exec/page-protection.h" #include "tcg/helper-tcg.h" typedef struct TranslateParams { diff --git a/target/i386/tcg/sysemu/seg_helper.c b/target/i386/tcg/sysemu/seg_helper.c index 2db8083748..9ba94deb3a 100644 --- a/target/i386/tcg/sysemu/seg_helper.c +++ b/target/i386/tcg/sysemu/seg_helper.c @@ -128,7 +128,7 @@ void x86_cpu_do_interrupt(CPUState *cs) } } -void x86_cpu_exec_halt(CPUState *cpu) +bool x86_cpu_exec_halt(CPUState *cpu) { if (cpu->interrupt_request & CPU_INTERRUPT_POLL) { X86CPU *x86_cpu = X86_CPU(cpu); @@ -138,6 +138,7 @@ void x86_cpu_exec_halt(CPUState *cpu) cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL); bql_unlock(); } + return cpu_has_work(cpu); } bool x86_need_replay_interrupt(int interrupt_request) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index c05d9e5225..6dedfe94c0 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -20,11 +20,9 @@ #include "qemu/host-utils.h" #include "cpu.h" -#include "disas/disas.h" #include "exec/exec-all.h" #include "tcg/tcg-op.h" #include "tcg/tcg-op-gvec.h" -#include "exec/cpu_ldst.h" #include "exec/translator.h" #include "fpu/softfloat.h" @@ -38,6 +36,9 @@ #include "exec/helper-info.c.inc" #undef HELPER_H +/* Fixes for Windows namespace pollution. */ +#undef IN +#undef OUT #define PREFIX_REPZ 0x01 #define PREFIX_REPNZ 0x02 @@ -143,11 +144,36 @@ typedef struct DisasContext { TCGOp *prev_insn_end; } DisasContext; -#define DISAS_EOB_ONLY DISAS_TARGET_0 -#define DISAS_EOB_NEXT DISAS_TARGET_1 -#define DISAS_EOB_INHIBIT_IRQ DISAS_TARGET_2 +/* + * Point EIP to next instruction before ending translation. + * For instructions that can change hflags. + */ +#define DISAS_EOB_NEXT DISAS_TARGET_0 + +/* + * Point EIP to next instruction and set HF_INHIBIT_IRQ if not + * already set. For instructions that activate interrupt shadow. + */ +#define DISAS_EOB_INHIBIT_IRQ DISAS_TARGET_1 + +/* + * Return to the main loop; EIP might have already been updated + * but even in that case do not use lookup_and_goto_ptr(). + */ +#define DISAS_EOB_ONLY DISAS_TARGET_2 + +/* + * EIP has already been updated. For jumps that wish to use + * lookup_and_goto_ptr() + */ #define DISAS_JUMP DISAS_TARGET_3 +/* + * EIP has already been updated. Use updated value of + * EFLAGS.TF to determine singlestep trap (SYSCALL/SYSRET). + */ +#define DISAS_EOB_RECHECK_TF DISAS_TARGET_4 + /* The environment in which user-only runs is constrained. */ #ifdef CONFIG_USER_ONLY #define PE(S) true @@ -212,7 +238,6 @@ typedef struct DisasContext { #ifdef CONFIG_USER_ONLY STUB_HELPER(clgi, TCGv_env env) STUB_HELPER(flush_page, TCGv_env env, TCGv addr) -STUB_HELPER(hlt, TCGv_env env, TCGv_i32 pc_ofs) STUB_HELPER(inb, TCGv ret, TCGv_env env, TCGv_i32 port) STUB_HELPER(inw, TCGv ret, TCGv_env env, TCGv_i32 port) STUB_HELPER(inl, TCGv ret, TCGv_env env, TCGv_i32 port) @@ -235,25 +260,10 @@ STUB_HELPER(write_crN, TCGv_env env, TCGv_i32 reg, TCGv val) STUB_HELPER(wrmsr, TCGv_env env) #endif -static void gen_eob(DisasContext *s); -static void gen_jr(DisasContext *s); static void gen_jmp_rel(DisasContext *s, MemOp ot, int diff, int tb_num); static void gen_jmp_rel_csize(DisasContext *s, int diff, int tb_num); -static void gen_op(DisasContext *s1, int op, MemOp ot, int d); static void gen_exception_gpf(DisasContext *s); -/* i386 arith/logic operations */ -enum { - OP_ADDL, - OP_ORL, - OP_ADCL, - OP_SBBL, - OP_ANDL, - OP_SUBL, - OP_XORL, - OP_CMPL, -}; - /* i386 shift ops */ enum { OP_ROL, @@ -322,7 +332,7 @@ static const uint8_t cc_op_live[CC_OP_NB] = { [CC_OP_POPCNT] = USES_CC_SRC, }; -static void set_cc_op(DisasContext *s, CCOp op) +static void set_cc_op_1(DisasContext *s, CCOp op, bool dirty) { int dead; @@ -345,20 +355,27 @@ static void set_cc_op(DisasContext *s, CCOp op) tcg_gen_discard_tl(s->cc_srcT); } - if (op == CC_OP_DYNAMIC) { - /* The DYNAMIC setting is translator only, and should never be - stored. Thus we always consider it clean. */ - s->cc_op_dirty = false; - } else { - /* Discard any computed CC_OP value (see shifts). */ - if (s->cc_op == CC_OP_DYNAMIC) { - tcg_gen_discard_i32(cpu_cc_op); - } - s->cc_op_dirty = true; + if (dirty && s->cc_op == CC_OP_DYNAMIC) { + tcg_gen_discard_i32(cpu_cc_op); } + s->cc_op_dirty = dirty; s->cc_op = op; } +static void set_cc_op(DisasContext *s, CCOp op) +{ + /* + * The DYNAMIC setting is translator only, everything else + * will be spilled later. + */ + set_cc_op_1(s, op, op != CC_OP_DYNAMIC); +} + +static void assume_cc_op(DisasContext *s, CCOp op) +{ + set_cc_op_1(s, op, false); +} + static void gen_update_cc_op(DisasContext *s) { if (s->cc_op_dirty) { @@ -422,16 +439,6 @@ static inline MemOp mo_stacksize(DisasContext *s) return CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16; } -/* Select only size 64 else 32. Used for SSE operand sizes. */ -static inline MemOp mo_64_32(MemOp ot) -{ -#ifdef TARGET_X86_64 - return ot == MO_64 ? MO_64 : MO_32; -#else - return MO_32; -#endif -} - /* Select size 8 if lsb of B is clear, else OT. Used for decoding byte vs word opcodes. */ static inline MemOp mo_b_d(int b, MemOp ot) @@ -439,13 +446,6 @@ static inline MemOp mo_b_d(int b, MemOp ot) return b & 1 ? ot : MO_8; } -/* Select size 8 if lsb of B is clear, else OT capped at 32. - Used for decoding operand size of port opcodes. */ -static inline MemOp mo_b_d32(int b, MemOp ot) -{ - return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8; -} - /* Compute the result of writing t0 to the OT-sized register REG. * * If DEST is NULL, store the result into the register and return the @@ -562,19 +562,6 @@ static void gen_update_eip_cur(DisasContext *s) s->pc_save = s->base.pc_next; } -static void gen_update_eip_next(DisasContext *s) -{ - assert(s->pc_save != -1); - if (tb_cflags(s->base.tb) & CF_PCREL) { - tcg_gen_addi_tl(cpu_eip, cpu_eip, s->pc - s->pc_save); - } else if (CODE64(s)) { - tcg_gen_movi_tl(cpu_eip, s->pc); - } else { - tcg_gen_movi_tl(cpu_eip, (uint32_t)(s->pc - s->cs_base)); - } - s->pc_save = s->pc; -} - static int cur_insn_len(DisasContext *s) { return s->pc - s->base.pc_next; @@ -693,20 +680,20 @@ static void gen_lea_v_seg_dest(DisasContext *s, MemOp aflag, TCGv dest, TCGv a0, } } -static void gen_lea_v_seg(DisasContext *s, MemOp aflag, TCGv a0, +static void gen_lea_v_seg(DisasContext *s, TCGv a0, int def_seg, int ovr_seg) { - gen_lea_v_seg_dest(s, aflag, s->A0, a0, def_seg, ovr_seg); + gen_lea_v_seg_dest(s, s->aflag, s->A0, a0, def_seg, ovr_seg); } static inline void gen_string_movl_A0_ESI(DisasContext *s) { - gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override); + gen_lea_v_seg(s, cpu_regs[R_ESI], R_DS, s->override); } static inline void gen_string_movl_A0_EDI(DisasContext *s) { - gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1); + gen_lea_v_seg(s, cpu_regs[R_EDI], R_ES, -1); } static inline TCGv gen_compute_Dshift(DisasContext *s, MemOp ot) @@ -848,25 +835,6 @@ static void gen_op_update2_cc(DisasContext *s) tcg_gen_mov_tl(cpu_cc_dst, s->T0); } -static void gen_op_update3_cc(DisasContext *s, TCGv reg) -{ - tcg_gen_mov_tl(cpu_cc_src2, reg); - tcg_gen_mov_tl(cpu_cc_src, s->T1); - tcg_gen_mov_tl(cpu_cc_dst, s->T0); -} - -static inline void gen_op_testl_T0_T1_cc(DisasContext *s) -{ - tcg_gen_and_tl(cpu_cc_dst, s->T0, s->T1); -} - -static void gen_op_update_neg_cc(DisasContext *s) -{ - tcg_gen_mov_tl(cpu_cc_dst, s->T0); - tcg_gen_neg_tl(cpu_cc_src, s->T0); - tcg_gen_movi_tl(s->cc_srcT, 0); -} - /* compute all eflags to reg */ static void gen_mov_eflags(DisasContext *s, TCGv reg) { @@ -923,94 +891,100 @@ typedef struct CCPrepare { TCGv reg; TCGv reg2; target_ulong imm; - target_ulong mask; bool use_reg2; bool no_setcond; } CCPrepare; -/* compute eflags.C to reg */ +static CCPrepare gen_prepare_sign_nz(TCGv src, MemOp size) +{ + if (size == MO_TL) { + return (CCPrepare) { .cond = TCG_COND_LT, .reg = src }; + } else { + return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = src, + .imm = 1ull << ((8 << size) - 1) }; + } +} + +/* compute eflags.C, trying to store it in reg if not NULL */ static CCPrepare gen_prepare_eflags_c(DisasContext *s, TCGv reg) { - TCGv t0, t1; - int size, shift; + MemOp size; switch (s->cc_op) { case CC_OP_SUBB ... CC_OP_SUBQ: /* (DATA_TYPE)CC_SRCT < (DATA_TYPE)CC_SRC */ size = s->cc_op - CC_OP_SUBB; - t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false); - /* If no temporary was used, be careful not to alias t1 and t0. */ - t0 = t1 == cpu_cc_src ? s->tmp0 : reg; - tcg_gen_mov_tl(t0, s->cc_srcT); - gen_extu(size, t0); - goto add_sub; + gen_ext_tl(s->cc_srcT, s->cc_srcT, size, false); + gen_ext_tl(cpu_cc_src, cpu_cc_src, size, false); + return (CCPrepare) { .cond = TCG_COND_LTU, .reg = s->cc_srcT, + .reg2 = cpu_cc_src, .use_reg2 = true }; case CC_OP_ADDB ... CC_OP_ADDQ: /* (DATA_TYPE)CC_DST < (DATA_TYPE)CC_SRC */ size = s->cc_op - CC_OP_ADDB; - t1 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false); - t0 = gen_ext_tl(reg, cpu_cc_dst, size, false); - add_sub: - return (CCPrepare) { .cond = TCG_COND_LTU, .reg = t0, - .reg2 = t1, .mask = -1, .use_reg2 = true }; + gen_ext_tl(cpu_cc_dst, cpu_cc_dst, size, false); + gen_ext_tl(cpu_cc_src, cpu_cc_src, size, false); + return (CCPrepare) { .cond = TCG_COND_LTU, .reg = cpu_cc_dst, + .reg2 = cpu_cc_src, .use_reg2 = true }; case CC_OP_LOGICB ... CC_OP_LOGICQ: case CC_OP_CLR: case CC_OP_POPCNT: - return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 }; + return (CCPrepare) { .cond = TCG_COND_NEVER }; case CC_OP_INCB ... CC_OP_INCQ: case CC_OP_DECB ... CC_OP_DECQ: return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, - .mask = -1, .no_setcond = true }; + .no_setcond = true }; case CC_OP_SHLB ... CC_OP_SHLQ: /* (CC_SRC >> (DATA_BITS - 1)) & 1 */ size = s->cc_op - CC_OP_SHLB; - shift = (8 << size) - 1; - return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, - .mask = (target_ulong)1 << shift }; + return gen_prepare_sign_nz(cpu_cc_src, size); case CC_OP_MULB ... CC_OP_MULQ: return (CCPrepare) { .cond = TCG_COND_NE, - .reg = cpu_cc_src, .mask = -1 }; + .reg = cpu_cc_src }; case CC_OP_BMILGB ... CC_OP_BMILGQ: size = s->cc_op - CC_OP_BMILGB; - t0 = gen_ext_tl(reg, cpu_cc_src, size, false); - return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 }; + gen_ext_tl(cpu_cc_src, cpu_cc_src, size, false); + return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src }; case CC_OP_ADCX: case CC_OP_ADCOX: return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_dst, - .mask = -1, .no_setcond = true }; + .no_setcond = true }; case CC_OP_EFLAGS: case CC_OP_SARB ... CC_OP_SARQ: /* CC_SRC & 1 */ - return (CCPrepare) { .cond = TCG_COND_NE, - .reg = cpu_cc_src, .mask = CC_C }; + return (CCPrepare) { .cond = TCG_COND_TSTNE, + .reg = cpu_cc_src, .imm = CC_C }; default: /* The need to compute only C from CC_OP_DYNAMIC is important in efficiently implementing e.g. INC at the start of a TB. */ gen_update_cc_op(s); + if (!reg) { + reg = tcg_temp_new(); + } gen_helper_cc_compute_c(reg, cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_op); return (CCPrepare) { .cond = TCG_COND_NE, .reg = reg, - .mask = -1, .no_setcond = true }; + .no_setcond = true }; } } -/* compute eflags.P to reg */ +/* compute eflags.P, trying to store it in reg if not NULL */ static CCPrepare gen_prepare_eflags_p(DisasContext *s, TCGv reg) { gen_compute_eflags(s); - return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, - .mask = CC_P }; + return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src, + .imm = CC_P }; } -/* compute eflags.S to reg */ +/* compute eflags.S, trying to store it in reg if not NULL */ static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg) { switch (s->cc_op) { @@ -1021,42 +995,40 @@ static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg) case CC_OP_ADCX: case CC_OP_ADOX: case CC_OP_ADCOX: - return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, - .mask = CC_S }; + return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src, + .imm = CC_S }; case CC_OP_CLR: case CC_OP_POPCNT: - return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 }; + return (CCPrepare) { .cond = TCG_COND_NEVER }; default: { MemOp size = (s->cc_op - CC_OP_ADDB) & 3; - TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true); - return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 }; + return gen_prepare_sign_nz(cpu_cc_dst, size); } } } -/* compute eflags.O to reg */ +/* compute eflags.O, trying to store it in reg if not NULL */ static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg) { switch (s->cc_op) { case CC_OP_ADOX: case CC_OP_ADCOX: return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src2, - .mask = -1, .no_setcond = true }; + .no_setcond = true }; case CC_OP_CLR: case CC_OP_POPCNT: - return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 }; + return (CCPrepare) { .cond = TCG_COND_NEVER }; case CC_OP_MULB ... CC_OP_MULQ: - return (CCPrepare) { .cond = TCG_COND_NE, - .reg = cpu_cc_src, .mask = -1 }; + return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src }; default: gen_compute_eflags(s); - return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, - .mask = CC_O }; + return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src, + .imm = CC_O }; } } -/* compute eflags.Z to reg */ +/* compute eflags.Z, trying to store it in reg if not NULL */ static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg) { switch (s->cc_op) { @@ -1067,30 +1039,33 @@ static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg) case CC_OP_ADCX: case CC_OP_ADOX: case CC_OP_ADCOX: - return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, - .mask = CC_Z }; + return (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src, + .imm = CC_Z }; case CC_OP_CLR: - return (CCPrepare) { .cond = TCG_COND_ALWAYS, .mask = -1 }; + return (CCPrepare) { .cond = TCG_COND_ALWAYS }; case CC_OP_POPCNT: - return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src, - .mask = -1 }; + return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_src }; default: { MemOp size = (s->cc_op - CC_OP_ADDB) & 3; - TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false); - return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 }; + if (size == MO_TL) { + return (CCPrepare) { .cond = TCG_COND_EQ, .reg = cpu_cc_dst }; + } else { + return (CCPrepare) { .cond = TCG_COND_TSTEQ, .reg = cpu_cc_dst, + .imm = (1ull << (8 << size)) - 1 }; + } } } } -/* perform a conditional store into register 'reg' according to jump opcode - value 'b'. In the fast case, T0 is guaranteed not to be used. */ +/* return how to compute jump opcode 'b'. 'reg' can be clobbered + * if needed; it may be used for CCPrepare.reg if that will + * provide more freedom in the translation of a subsequent setcond. */ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg) { int inv, jcc_op, cond; MemOp size; CCPrepare cc; - TCGv t0; inv = b & 1; jcc_op = (b >> 1) & 7; @@ -1101,24 +1076,21 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg) size = s->cc_op - CC_OP_SUBB; switch (jcc_op) { case JCC_BE: - tcg_gen_mov_tl(s->tmp4, s->cc_srcT); - gen_extu(size, s->tmp4); - t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, false); - cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->tmp4, - .reg2 = t0, .mask = -1, .use_reg2 = true }; + gen_ext_tl(s->cc_srcT, s->cc_srcT, size, false); + gen_ext_tl(cpu_cc_src, cpu_cc_src, size, false); + cc = (CCPrepare) { .cond = TCG_COND_LEU, .reg = s->cc_srcT, + .reg2 = cpu_cc_src, .use_reg2 = true }; break; - case JCC_L: cond = TCG_COND_LT; goto fast_jcc_l; case JCC_LE: cond = TCG_COND_LE; fast_jcc_l: - tcg_gen_mov_tl(s->tmp4, s->cc_srcT); - gen_exts(size, s->tmp4); - t0 = gen_ext_tl(s->tmp0, cpu_cc_src, size, true); - cc = (CCPrepare) { .cond = cond, .reg = s->tmp4, - .reg2 = t0, .mask = -1, .use_reg2 = true }; + gen_ext_tl(s->cc_srcT, s->cc_srcT, size, true); + gen_ext_tl(cpu_cc_src, cpu_cc_src, size, true); + cc = (CCPrepare) { .cond = cond, .reg = s->cc_srcT, + .reg2 = cpu_cc_src, .use_reg2 = true }; break; default: @@ -1141,8 +1113,8 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg) break; case JCC_BE: gen_compute_eflags(s); - cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, - .mask = CC_Z | CC_C }; + cc = (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = cpu_cc_src, + .imm = CC_Z | CC_C }; break; case JCC_S: cc = gen_prepare_eflags_s(s, reg); @@ -1152,22 +1124,22 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg) break; case JCC_L: gen_compute_eflags(s); - if (reg == cpu_cc_src) { - reg = s->tmp0; + if (!reg || reg == cpu_cc_src) { + reg = tcg_temp_new(); } tcg_gen_addi_tl(reg, cpu_cc_src, CC_O - CC_S); - cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg, - .mask = CC_O }; + cc = (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = reg, + .imm = CC_O }; break; default: case JCC_LE: gen_compute_eflags(s); - if (reg == cpu_cc_src) { - reg = s->tmp0; + if (!reg || reg == cpu_cc_src) { + reg = tcg_temp_new(); } tcg_gen_addi_tl(reg, cpu_cc_src, CC_O - CC_S); - cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg, - .mask = CC_O | CC_Z }; + cc = (CCPrepare) { .cond = TCG_COND_TSTNE, .reg = reg, + .imm = CC_O | CC_Z }; break; } break; @@ -1192,16 +1164,6 @@ static void gen_setcc1(DisasContext *s, int b, TCGv reg) return; } - if (cc.cond == TCG_COND_NE && !cc.use_reg2 && cc.imm == 0 && - cc.mask != 0 && (cc.mask & (cc.mask - 1)) == 0) { - tcg_gen_shri_tl(reg, cc.reg, ctztl(cc.mask)); - tcg_gen_andi_tl(reg, reg, 1); - return; - } - if (cc.mask != -1) { - tcg_gen_andi_tl(reg, cc.reg, cc.mask); - cc.reg = reg; - } if (cc.use_reg2) { tcg_gen_setcond_tl(cc.cond, reg, cc.reg, cc.reg2); } else { @@ -1218,12 +1180,8 @@ static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg) value 'b'. In the fast case, T0 is guaranteed not to be used. */ static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1) { - CCPrepare cc = gen_prepare_cc(s, b, s->T0); + CCPrepare cc = gen_prepare_cc(s, b, NULL); - if (cc.mask != -1) { - tcg_gen_andi_tl(s->T0, cc.reg, cc.mask); - cc.reg = s->T0; - } if (cc.use_reg2) { tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1); } else { @@ -1233,17 +1191,13 @@ static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1) /* Generate a conditional jump to label 'l1' according to jump opcode value 'b'. In the fast case, T0 is guaranteed not to be used. - A translation block must end soon. */ + One or both of the branches will call gen_jmp_rel, so ensure + cc_op is clean. */ static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1) { - CCPrepare cc = gen_prepare_cc(s, b, s->T0); + CCPrepare cc = gen_prepare_cc(s, b, NULL); gen_update_cc_op(s); - if (cc.mask != -1) { - tcg_gen_andi_tl(s->T0, cc.reg, cc.mask); - cc.reg = s->T0; - } - set_cc_op(s, CC_OP_DYNAMIC); if (cc.use_reg2) { tcg_gen_brcond_tl(cc.cond, cc.reg, cc.reg2, l1); } else { @@ -1252,11 +1206,15 @@ static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1) } /* XXX: does not work with gdbstub "ice" single step - not a - serious problem */ + serious problem. The caller can jump to the returned label + to stop the REP but, if the flags have changed, it has to call + gen_update_cc_op before doing so. */ static TCGLabel *gen_jz_ecx_string(DisasContext *s) { TCGLabel *l1 = gen_new_label(); TCGLabel *l2 = gen_new_label(); + + gen_update_cc_op(s); gen_op_jnz_ecx(s, l1); gen_set_label(l2); gen_jmp_rel_csize(s, 0, 1); @@ -1298,7 +1256,11 @@ static void gen_cmps(DisasContext *s, MemOp ot) gen_string_movl_A0_EDI(s); gen_op_ld_v(s, ot, s->T1, s->A0); gen_string_movl_A0_ESI(s); - gen_op(s, OP_CMPL, ot, OR_TMP0); + gen_op_ld_v(s, ot, s->T0, s->A0); + tcg_gen_mov_tl(cpu_cc_src, s->T1); + tcg_gen_mov_tl(s->cc_srcT, s->T0); + tcg_gen_sub_tl(cpu_cc_dst, s->T0, s->T1); + set_cc_op(s, CC_OP_SUBB + ot); dshift = gen_compute_Dshift(s, ot); gen_op_add_reg(s, s->aflag, R_ESI, dshift); @@ -1352,7 +1314,6 @@ static void gen_repz(DisasContext *s, MemOp ot, void (*fn)(DisasContext *s, MemOp ot)) { TCGLabel *l2; - gen_update_cc_op(s); l2 = gen_jz_ecx_string(s); fn(s, ot); gen_op_add_reg_im(s, s->aflag, R_ECX, -1); @@ -1366,38 +1327,27 @@ static void gen_repz(DisasContext *s, MemOp ot, gen_jmp_rel_csize(s, -cur_insn_len(s), 0); } -#define GEN_REPZ(op) \ - static inline void gen_repz_ ## op(DisasContext *s, MemOp ot) \ - { gen_repz(s, ot, gen_##op); } - -static void gen_repz2(DisasContext *s, MemOp ot, int nz, - void (*fn)(DisasContext *s, MemOp ot)) +static void gen_repz_nz(DisasContext *s, MemOp ot, + void (*fn)(DisasContext *s, MemOp ot)) { TCGLabel *l2; - gen_update_cc_op(s); + int nz = (s->prefix & PREFIX_REPNZ) ? 1 : 0; + l2 = gen_jz_ecx_string(s); fn(s, ot); gen_op_add_reg_im(s, s->aflag, R_ECX, -1); - gen_update_cc_op(s); gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2); if (s->repz_opt) { gen_op_jz_ecx(s, l2); } + /* + * Only one iteration is done at a time, so the translation + * block ends unconditionally after this instruction and there + * is no control flow junction - no need to set CC_OP_DYNAMIC. + */ gen_jmp_rel_csize(s, -cur_insn_len(s), 0); } -#define GEN_REPZ2(op) \ - static inline void gen_repz_ ## op(DisasContext *s, MemOp ot, int nz) \ - { gen_repz2(s, ot, nz, gen_##op); } - -GEN_REPZ(movs) -GEN_REPZ(stos) -GEN_REPZ(lods) -GEN_REPZ(ins) -GEN_REPZ(outs) -GEN_REPZ2(scas) -GEN_REPZ2(cmps) - static void gen_helper_fp_arith_ST0_FT0(int op) { switch (op) { @@ -1485,165 +1435,6 @@ static bool check_cpl0(DisasContext *s) return false; } -/* If vm86, check for iopl == 3; if not, raise #GP and return false. */ -static bool check_vm86_iopl(DisasContext *s) -{ - if (!VM86(s) || IOPL(s) == 3) { - return true; - } - gen_exception_gpf(s); - return false; -} - -/* Check for iopl allowing access; if not, raise #GP and return false. */ -static bool check_iopl(DisasContext *s) -{ - if (VM86(s) ? IOPL(s) == 3 : CPL(s) <= IOPL(s)) { - return true; - } - gen_exception_gpf(s); - return false; -} - -/* if d == OR_TMP0, it means memory operand (address in A0) */ -static void gen_op(DisasContext *s1, int op, MemOp ot, int d) -{ - /* Invalid lock prefix when destination is not memory or OP_CMPL. */ - if ((d != OR_TMP0 || op == OP_CMPL) && s1->prefix & PREFIX_LOCK) { - gen_illegal_opcode(s1); - return; - } - - if (d != OR_TMP0) { - gen_op_mov_v_reg(s1, ot, s1->T0, d); - } else if (!(s1->prefix & PREFIX_LOCK)) { - gen_op_ld_v(s1, ot, s1->T0, s1->A0); - } - switch(op) { - case OP_ADCL: - gen_compute_eflags_c(s1, s1->tmp4); - if (s1->prefix & PREFIX_LOCK) { - tcg_gen_add_tl(s1->T0, s1->tmp4, s1->T1); - tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0, - s1->mem_index, ot | MO_LE); - } else { - tcg_gen_add_tl(s1->T0, s1->T0, s1->T1); - tcg_gen_add_tl(s1->T0, s1->T0, s1->tmp4); - gen_op_st_rm_T0_A0(s1, ot, d); - } - gen_op_update3_cc(s1, s1->tmp4); - set_cc_op(s1, CC_OP_ADCB + ot); - break; - case OP_SBBL: - gen_compute_eflags_c(s1, s1->tmp4); - if (s1->prefix & PREFIX_LOCK) { - tcg_gen_add_tl(s1->T0, s1->T1, s1->tmp4); - tcg_gen_neg_tl(s1->T0, s1->T0); - tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0, - s1->mem_index, ot | MO_LE); - } else { - tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1); - tcg_gen_sub_tl(s1->T0, s1->T0, s1->tmp4); - gen_op_st_rm_T0_A0(s1, ot, d); - } - gen_op_update3_cc(s1, s1->tmp4); - set_cc_op(s1, CC_OP_SBBB + ot); - break; - case OP_ADDL: - if (s1->prefix & PREFIX_LOCK) { - tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T1, - s1->mem_index, ot | MO_LE); - } else { - tcg_gen_add_tl(s1->T0, s1->T0, s1->T1); - gen_op_st_rm_T0_A0(s1, ot, d); - } - gen_op_update2_cc(s1); - set_cc_op(s1, CC_OP_ADDB + ot); - break; - case OP_SUBL: - if (s1->prefix & PREFIX_LOCK) { - tcg_gen_neg_tl(s1->T0, s1->T1); - tcg_gen_atomic_fetch_add_tl(s1->cc_srcT, s1->A0, s1->T0, - s1->mem_index, ot | MO_LE); - tcg_gen_sub_tl(s1->T0, s1->cc_srcT, s1->T1); - } else { - tcg_gen_mov_tl(s1->cc_srcT, s1->T0); - tcg_gen_sub_tl(s1->T0, s1->T0, s1->T1); - gen_op_st_rm_T0_A0(s1, ot, d); - } - gen_op_update2_cc(s1); - set_cc_op(s1, CC_OP_SUBB + ot); - break; - default: - case OP_ANDL: - if (s1->prefix & PREFIX_LOCK) { - tcg_gen_atomic_and_fetch_tl(s1->T0, s1->A0, s1->T1, - s1->mem_index, ot | MO_LE); - } else { - tcg_gen_and_tl(s1->T0, s1->T0, s1->T1); - gen_op_st_rm_T0_A0(s1, ot, d); - } - gen_op_update1_cc(s1); - set_cc_op(s1, CC_OP_LOGICB + ot); - break; - case OP_ORL: - if (s1->prefix & PREFIX_LOCK) { - tcg_gen_atomic_or_fetch_tl(s1->T0, s1->A0, s1->T1, - s1->mem_index, ot | MO_LE); - } else { - tcg_gen_or_tl(s1->T0, s1->T0, s1->T1); - gen_op_st_rm_T0_A0(s1, ot, d); - } - gen_op_update1_cc(s1); - set_cc_op(s1, CC_OP_LOGICB + ot); - break; - case OP_XORL: - if (s1->prefix & PREFIX_LOCK) { - tcg_gen_atomic_xor_fetch_tl(s1->T0, s1->A0, s1->T1, - s1->mem_index, ot | MO_LE); - } else { - tcg_gen_xor_tl(s1->T0, s1->T0, s1->T1); - gen_op_st_rm_T0_A0(s1, ot, d); - } - gen_op_update1_cc(s1); - set_cc_op(s1, CC_OP_LOGICB + ot); - break; - case OP_CMPL: - tcg_gen_mov_tl(cpu_cc_src, s1->T1); - tcg_gen_mov_tl(s1->cc_srcT, s1->T0); - tcg_gen_sub_tl(cpu_cc_dst, s1->T0, s1->T1); - set_cc_op(s1, CC_OP_SUBB + ot); - break; - } -} - -/* if d == OR_TMP0, it means memory operand (address in A0) */ -static void gen_inc(DisasContext *s1, MemOp ot, int d, int c) -{ - if (s1->prefix & PREFIX_LOCK) { - if (d != OR_TMP0) { - /* Lock prefix when destination is not memory */ - gen_illegal_opcode(s1); - return; - } - tcg_gen_movi_tl(s1->T0, c > 0 ? 1 : -1); - tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0, - s1->mem_index, ot | MO_LE); - } else { - if (d != OR_TMP0) { - gen_op_mov_v_reg(s1, ot, s1->T0, d); - } else { - gen_op_ld_v(s1, ot, s1->T0, s1->A0); - } - tcg_gen_addi_tl(s1->T0, s1->T0, (c > 0 ? 1 : -1)); - gen_op_st_rm_T0_A0(s1, ot, d); - } - - gen_compute_eflags_c(s1, cpu_cc_src); - tcg_gen_mov_tl(cpu_cc_dst, s1->T0); - set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot); -} - static void gen_shift_flags(DisasContext *s, MemOp ot, TCGv result, TCGv shm1, TCGv count, bool is_right) { @@ -1686,298 +1477,6 @@ static void gen_shift_flags(DisasContext *s, MemOp ot, TCGv result, set_cc_op(s, CC_OP_DYNAMIC); } -static void gen_shift_rm_T1(DisasContext *s, MemOp ot, int op1, - int is_right, int is_arith) -{ - target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f); - - /* load */ - if (op1 == OR_TMP0) { - gen_op_ld_v(s, ot, s->T0, s->A0); - } else { - gen_op_mov_v_reg(s, ot, s->T0, op1); - } - - tcg_gen_andi_tl(s->T1, s->T1, mask); - tcg_gen_subi_tl(s->tmp0, s->T1, 1); - - if (is_right) { - if (is_arith) { - gen_exts(ot, s->T0); - tcg_gen_sar_tl(s->tmp0, s->T0, s->tmp0); - tcg_gen_sar_tl(s->T0, s->T0, s->T1); - } else { - gen_extu(ot, s->T0); - tcg_gen_shr_tl(s->tmp0, s->T0, s->tmp0); - tcg_gen_shr_tl(s->T0, s->T0, s->T1); - } - } else { - tcg_gen_shl_tl(s->tmp0, s->T0, s->tmp0); - tcg_gen_shl_tl(s->T0, s->T0, s->T1); - } - - /* store */ - gen_op_st_rm_T0_A0(s, ot, op1); - - gen_shift_flags(s, ot, s->T0, s->tmp0, s->T1, is_right); -} - -static void gen_shift_rm_im(DisasContext *s, MemOp ot, int op1, int op2, - int is_right, int is_arith) -{ - int mask = (ot == MO_64 ? 0x3f : 0x1f); - - /* load */ - if (op1 == OR_TMP0) - gen_op_ld_v(s, ot, s->T0, s->A0); - else - gen_op_mov_v_reg(s, ot, s->T0, op1); - - op2 &= mask; - if (op2 != 0) { - if (is_right) { - if (is_arith) { - gen_exts(ot, s->T0); - tcg_gen_sari_tl(s->tmp4, s->T0, op2 - 1); - tcg_gen_sari_tl(s->T0, s->T0, op2); - } else { - gen_extu(ot, s->T0); - tcg_gen_shri_tl(s->tmp4, s->T0, op2 - 1); - tcg_gen_shri_tl(s->T0, s->T0, op2); - } - } else { - tcg_gen_shli_tl(s->tmp4, s->T0, op2 - 1); - tcg_gen_shli_tl(s->T0, s->T0, op2); - } - } - - /* store */ - gen_op_st_rm_T0_A0(s, ot, op1); - - /* update eflags if non zero shift */ - if (op2 != 0) { - tcg_gen_mov_tl(cpu_cc_src, s->tmp4); - tcg_gen_mov_tl(cpu_cc_dst, s->T0); - set_cc_op(s, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot); - } -} - -static void gen_rot_rm_T1(DisasContext *s, MemOp ot, int op1, int is_right) -{ - target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f); - TCGv_i32 t0, t1; - - /* load */ - if (op1 == OR_TMP0) { - gen_op_ld_v(s, ot, s->T0, s->A0); - } else { - gen_op_mov_v_reg(s, ot, s->T0, op1); - } - - tcg_gen_andi_tl(s->T1, s->T1, mask); - - switch (ot) { - case MO_8: - /* Replicate the 8-bit input so that a 32-bit rotate works. */ - tcg_gen_ext8u_tl(s->T0, s->T0); - tcg_gen_muli_tl(s->T0, s->T0, 0x01010101); - goto do_long; - case MO_16: - /* Replicate the 16-bit input so that a 32-bit rotate works. */ - tcg_gen_deposit_tl(s->T0, s->T0, s->T0, 16, 16); - goto do_long; - do_long: -#ifdef TARGET_X86_64 - case MO_32: - tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1); - if (is_right) { - tcg_gen_rotr_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32); - } else { - tcg_gen_rotl_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32); - } - tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); - break; -#endif - default: - if (is_right) { - tcg_gen_rotr_tl(s->T0, s->T0, s->T1); - } else { - tcg_gen_rotl_tl(s->T0, s->T0, s->T1); - } - break; - } - - /* store */ - gen_op_st_rm_T0_A0(s, ot, op1); - - /* We'll need the flags computed into CC_SRC. */ - gen_compute_eflags(s); - - /* The value that was "rotated out" is now present at the other end - of the word. Compute C into CC_DST and O into CC_SRC2. Note that - since we've computed the flags into CC_SRC, these variables are - currently dead. */ - if (is_right) { - tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1); - tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask); - tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1); - } else { - tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask); - tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1); - } - tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1); - tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst); - - /* Now conditionally store the new CC_OP value. If the shift count - is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live. - Otherwise reuse CC_OP_ADCOX which have the C and O flags split out - exactly as we computed above. */ - t0 = tcg_constant_i32(0); - t1 = tcg_temp_new_i32(); - tcg_gen_trunc_tl_i32(t1, s->T1); - tcg_gen_movi_i32(s->tmp2_i32, CC_OP_ADCOX); - tcg_gen_movi_i32(s->tmp3_i32, CC_OP_EFLAGS); - tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0, - s->tmp2_i32, s->tmp3_i32); - - /* The CC_OP value is no longer predictable. */ - set_cc_op(s, CC_OP_DYNAMIC); -} - -static void gen_rot_rm_im(DisasContext *s, MemOp ot, int op1, int op2, - int is_right) -{ - int mask = (ot == MO_64 ? 0x3f : 0x1f); - int shift; - - /* load */ - if (op1 == OR_TMP0) { - gen_op_ld_v(s, ot, s->T0, s->A0); - } else { - gen_op_mov_v_reg(s, ot, s->T0, op1); - } - - op2 &= mask; - if (op2 != 0) { - switch (ot) { -#ifdef TARGET_X86_64 - case MO_32: - tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - if (is_right) { - tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, op2); - } else { - tcg_gen_rotli_i32(s->tmp2_i32, s->tmp2_i32, op2); - } - tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); - break; -#endif - default: - if (is_right) { - tcg_gen_rotri_tl(s->T0, s->T0, op2); - } else { - tcg_gen_rotli_tl(s->T0, s->T0, op2); - } - break; - case MO_8: - mask = 7; - goto do_shifts; - case MO_16: - mask = 15; - do_shifts: - shift = op2 & mask; - if (is_right) { - shift = mask + 1 - shift; - } - gen_extu(ot, s->T0); - tcg_gen_shli_tl(s->tmp0, s->T0, shift); - tcg_gen_shri_tl(s->T0, s->T0, mask + 1 - shift); - tcg_gen_or_tl(s->T0, s->T0, s->tmp0); - break; - } - } - - /* store */ - gen_op_st_rm_T0_A0(s, ot, op1); - - if (op2 != 0) { - /* Compute the flags into CC_SRC. */ - gen_compute_eflags(s); - - /* The value that was "rotated out" is now present at the other end - of the word. Compute C into CC_DST and O into CC_SRC2. Note that - since we've computed the flags into CC_SRC, these variables are - currently dead. */ - if (is_right) { - tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask - 1); - tcg_gen_shri_tl(cpu_cc_dst, s->T0, mask); - tcg_gen_andi_tl(cpu_cc_dst, cpu_cc_dst, 1); - } else { - tcg_gen_shri_tl(cpu_cc_src2, s->T0, mask); - tcg_gen_andi_tl(cpu_cc_dst, s->T0, 1); - } - tcg_gen_andi_tl(cpu_cc_src2, cpu_cc_src2, 1); - tcg_gen_xor_tl(cpu_cc_src2, cpu_cc_src2, cpu_cc_dst); - set_cc_op(s, CC_OP_ADCOX); - } -} - -/* XXX: add faster immediate = 1 case */ -static void gen_rotc_rm_T1(DisasContext *s, MemOp ot, int op1, - int is_right) -{ - gen_compute_eflags(s); - assert(s->cc_op == CC_OP_EFLAGS); - - /* load */ - if (op1 == OR_TMP0) - gen_op_ld_v(s, ot, s->T0, s->A0); - else - gen_op_mov_v_reg(s, ot, s->T0, op1); - - if (is_right) { - switch (ot) { - case MO_8: - gen_helper_rcrb(s->T0, tcg_env, s->T0, s->T1); - break; - case MO_16: - gen_helper_rcrw(s->T0, tcg_env, s->T0, s->T1); - break; - case MO_32: - gen_helper_rcrl(s->T0, tcg_env, s->T0, s->T1); - break; -#ifdef TARGET_X86_64 - case MO_64: - gen_helper_rcrq(s->T0, tcg_env, s->T0, s->T1); - break; -#endif - default: - g_assert_not_reached(); - } - } else { - switch (ot) { - case MO_8: - gen_helper_rclb(s->T0, tcg_env, s->T0, s->T1); - break; - case MO_16: - gen_helper_rclw(s->T0, tcg_env, s->T0, s->T1); - break; - case MO_32: - gen_helper_rcll(s->T0, tcg_env, s->T0, s->T1); - break; -#ifdef TARGET_X86_64 - case MO_64: - gen_helper_rclq(s->T0, tcg_env, s->T0, s->T1); - break; -#endif - default: - g_assert_not_reached(); - } - } - /* store */ - gen_op_st_rm_T0_A0(s, ot, op1); -} - /* XXX: add faster immediate case */ static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, int op1, bool is_right, TCGv count_in) @@ -2062,63 +1561,6 @@ static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, int op1, gen_shift_flags(s, ot, s->T0, s->tmp0, count, is_right); } -static void gen_shift(DisasContext *s1, int op, MemOp ot, int d, int s) -{ - if (s != OR_TMP1) - gen_op_mov_v_reg(s1, ot, s1->T1, s); - switch(op) { - case OP_ROL: - gen_rot_rm_T1(s1, ot, d, 0); - break; - case OP_ROR: - gen_rot_rm_T1(s1, ot, d, 1); - break; - case OP_SHL: - case OP_SHL1: - gen_shift_rm_T1(s1, ot, d, 0, 0); - break; - case OP_SHR: - gen_shift_rm_T1(s1, ot, d, 1, 0); - break; - case OP_SAR: - gen_shift_rm_T1(s1, ot, d, 1, 1); - break; - case OP_RCL: - gen_rotc_rm_T1(s1, ot, d, 0); - break; - case OP_RCR: - gen_rotc_rm_T1(s1, ot, d, 1); - break; - } -} - -static void gen_shifti(DisasContext *s1, int op, MemOp ot, int d, int c) -{ - switch(op) { - case OP_ROL: - gen_rot_rm_im(s1, ot, d, c, 0); - break; - case OP_ROR: - gen_rot_rm_im(s1, ot, d, c, 1); - break; - case OP_SHL: - case OP_SHL1: - gen_shift_rm_im(s1, ot, d, c, 0, 0); - break; - case OP_SHR: - gen_shift_rm_im(s1, ot, d, c, 1, 0); - break; - case OP_SAR: - gen_shift_rm_im(s1, ot, d, c, 1, 1); - break; - default: - /* currently not optimized */ - tcg_gen_movi_tl(s1->T1, c); - gen_shift(s1, op, ot, d, OR_TMP1); - break; - } -} - #define X86_MAX_INSN_LENGTH 15 static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes) @@ -2139,9 +1581,8 @@ static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes) * This can happen even if the operand is only one byte long! */ if (((s->pc - 1) ^ (pc - 1)) & TARGET_PAGE_MASK) { - volatile uint8_t unused = - cpu_ldub_code(env, (s->pc - 1) & TARGET_PAGE_MASK); - (void) unused; + (void)translator_ldub(env, &s->base, + (s->pc - 1) & TARGET_PAGE_MASK); } siglongjmp(s->jmpbuf, 1); } @@ -2154,11 +1595,6 @@ static inline uint8_t x86_ldub_code(CPUX86State *env, DisasContext *s) return translator_ldub(env, &s->base, advance_pc(env, s, 1)); } -static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s) -{ - return translator_lduw(env, &s->base, advance_pc(env, s, 2)); -} - static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s) { return translator_lduw(env, &s->base, advance_pc(env, s, 2)); @@ -2348,7 +1784,7 @@ static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm) { AddressParts a = gen_lea_modrm_0(env, s, modrm); TCGv ea = gen_lea_modrm_1(s, a, false); - gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override); + gen_lea_v_seg(s, ea, a.def_seg, s->override); } static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm) @@ -2372,42 +1808,33 @@ static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm, gen_helper_bndck(tcg_env, s->tmp2_i32); } -/* used for LEA and MOV AX, mem */ -static void gen_add_A0_ds_seg(DisasContext *s) +/* generate modrm load of memory or register. */ +static void gen_ld_modrm(CPUX86State *env, DisasContext *s, int modrm, MemOp ot) { - gen_lea_v_seg(s, s->aflag, s->A0, R_DS, s->override); + int mod, rm; + + mod = (modrm >> 6) & 3; + rm = (modrm & 7) | REX_B(s); + if (mod == 3) { + gen_op_mov_v_reg(s, ot, s->T0, rm); + } else { + gen_lea_modrm(env, s, modrm); + gen_op_ld_v(s, ot, s->T0, s->A0); + } } -/* generate modrm memory load or store of 'reg'. TMP0 is used if reg == - OR_TMP0 */ -static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm, - MemOp ot, int reg, int is_store) +/* generate modrm store of memory or register. */ +static void gen_st_modrm(CPUX86State *env, DisasContext *s, int modrm, MemOp ot) { int mod, rm; mod = (modrm >> 6) & 3; rm = (modrm & 7) | REX_B(s); if (mod == 3) { - if (is_store) { - if (reg != OR_TMP0) - gen_op_mov_v_reg(s, ot, s->T0, reg); - gen_op_mov_reg_v(s, ot, rm, s->T0); - } else { - gen_op_mov_v_reg(s, ot, s->T0, rm); - if (reg != OR_TMP0) - gen_op_mov_reg_v(s, ot, reg, s->T0); - } + gen_op_mov_reg_v(s, ot, rm, s->T0); } else { gen_lea_modrm(env, s, modrm); - if (is_store) { - if (reg != OR_TMP0) - gen_op_mov_v_reg(s, ot, s->T0, reg); - gen_op_st_v(s, ot, s->T0, s->A0); - } else { - gen_op_ld_v(s, ot, s->T0, s->A0); - if (reg != OR_TMP0) - gen_op_mov_reg_v(s, ot, reg, s->T0); - } + gen_op_st_v(s, ot, s->T0, s->A0); } } @@ -2484,13 +1911,16 @@ static target_long insn_get_signed(CPUX86State *env, DisasContext *s, MemOp ot) return ret; } -static inline int insn_const_size(MemOp ot) +static void gen_conditional_jump_labels(DisasContext *s, target_long diff, + TCGLabel *not_taken, TCGLabel *taken) { - if (ot <= MO_32) { - return 1 << ot; - } else { - return 4; + if (not_taken) { + gen_set_label(not_taken); } + gen_jmp_rel_csize(s, 0, 1); + + gen_set_label(taken); + gen_jmp_rel(s, s->dflag, diff, 0); } static void gen_jcc(DisasContext *s, int b, int diff) @@ -2498,20 +1928,13 @@ static void gen_jcc(DisasContext *s, int b, int diff) TCGLabel *l1 = gen_new_label(); gen_jcc1(s, b, l1); - gen_jmp_rel_csize(s, 0, 1); - gen_set_label(l1); - gen_jmp_rel(s, s->dflag, diff, 0); + gen_conditional_jump_labels(s, diff, NULL, l1); } static void gen_cmovcc1(DisasContext *s, int b, TCGv dest, TCGv src) { - CCPrepare cc = gen_prepare_cc(s, b, s->T1); + CCPrepare cc = gen_prepare_cc(s, b, NULL); - if (cc.mask != -1) { - TCGv t0 = tcg_temp_new(); - tcg_gen_andi_tl(t0, cc.reg, cc.mask); - cc.reg = t0; - } if (!cc.use_reg2) { cc.reg2 = tcg_constant_tl(cc.imm); } @@ -2519,26 +1942,21 @@ static void gen_cmovcc1(DisasContext *s, int b, TCGv dest, TCGv src) tcg_gen_movcond_tl(cc.cond, dest, cc.reg, cc.reg2, src, dest); } -static inline void gen_op_movl_T0_seg(DisasContext *s, X86Seg seg_reg) +static void gen_op_movl_seg_real(DisasContext *s, X86Seg seg_reg, TCGv seg) { - tcg_gen_ld32u_tl(s->T0, tcg_env, - offsetof(CPUX86State,segs[seg_reg].selector)); -} - -static inline void gen_op_movl_seg_T0_vm(DisasContext *s, X86Seg seg_reg) -{ - tcg_gen_ext16u_tl(s->T0, s->T0); - tcg_gen_st32_tl(s->T0, tcg_env, + TCGv selector = tcg_temp_new(); + tcg_gen_ext16u_tl(selector, seg); + tcg_gen_st32_tl(selector, tcg_env, offsetof(CPUX86State,segs[seg_reg].selector)); - tcg_gen_shli_tl(cpu_seg_base[seg_reg], s->T0, 4); + tcg_gen_shli_tl(cpu_seg_base[seg_reg], selector, 4); } -/* move T0 to seg_reg and compute if the CPU state may change. Never +/* move SRC to seg_reg and compute if the CPU state may change. Never call this function with seg_reg == R_CS */ -static void gen_movl_seg_T0(DisasContext *s, X86Seg seg_reg) +static void gen_movl_seg(DisasContext *s, X86Seg seg_reg, TCGv src) { if (PE(s) && !VM86(s)) { - tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); + tcg_gen_trunc_tl_i32(s->tmp2_i32, src); gen_helper_load_seg(tcg_env, tcg_constant_i32(seg_reg), s->tmp2_i32); /* abort translation because the addseg value may change or because ss32 may change. For R_SS, translation must always @@ -2550,13 +1968,45 @@ static void gen_movl_seg_T0(DisasContext *s, X86Seg seg_reg) s->base.is_jmp = DISAS_EOB_NEXT; } } else { - gen_op_movl_seg_T0_vm(s, seg_reg); + gen_op_movl_seg_real(s, seg_reg, src); if (seg_reg == R_SS) { s->base.is_jmp = DISAS_EOB_INHIBIT_IRQ; } } } +static void gen_far_call(DisasContext *s) +{ + TCGv_i32 new_cs = tcg_temp_new_i32(); + tcg_gen_trunc_tl_i32(new_cs, s->T1); + if (PE(s) && !VM86(s)) { + gen_helper_lcall_protected(tcg_env, new_cs, s->T0, + tcg_constant_i32(s->dflag - 1), + eip_next_tl(s)); + } else { + TCGv_i32 new_eip = tcg_temp_new_i32(); + tcg_gen_trunc_tl_i32(new_eip, s->T0); + gen_helper_lcall_real(tcg_env, new_cs, new_eip, + tcg_constant_i32(s->dflag - 1), + eip_next_i32(s)); + } + s->base.is_jmp = DISAS_JUMP; +} + +static void gen_far_jmp(DisasContext *s) +{ + if (PE(s) && !VM86(s)) { + TCGv_i32 new_cs = tcg_temp_new_i32(); + tcg_gen_trunc_tl_i32(new_cs, s->T1); + gen_helper_ljmp_protected(tcg_env, new_cs, s->T0, + eip_next_tl(s)); + } else { + gen_op_movl_seg_real(s, R_CS, s->T1); + gen_op_jmp_v(s, s->T0); + } + s->base.is_jmp = DISAS_JUMP; +} + static void gen_svm_check_intercept(DisasContext *s, uint32_t type) { /* no SVM activated; fast case */ @@ -2571,24 +2021,27 @@ static inline void gen_stack_update(DisasContext *s, int addend) gen_op_add_reg_im(s, mo_stacksize(s), R_ESP, addend); } +static void gen_lea_ss_ofs(DisasContext *s, TCGv dest, TCGv src, target_ulong offset) +{ + if (offset) { + tcg_gen_addi_tl(dest, src, offset); + src = dest; + } + gen_lea_v_seg_dest(s, mo_stacksize(s), dest, src, R_SS, -1); +} + /* Generate a push. It depends on ss32, addseg and dflag. */ static void gen_push_v(DisasContext *s, TCGv val) { MemOp d_ot = mo_pushpop(s, s->dflag); MemOp a_ot = mo_stacksize(s); int size = 1 << d_ot; - TCGv new_esp = s->A0; + TCGv new_esp = tcg_temp_new(); - tcg_gen_subi_tl(s->A0, cpu_regs[R_ESP], size); - - if (!CODE64(s)) { - if (ADDSEG(s)) { - new_esp = tcg_temp_new(); - tcg_gen_mov_tl(new_esp, s->A0); - } - gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1); - } + tcg_gen_subi_tl(new_esp, cpu_regs[R_ESP], size); + /* Now reduce the value to the address size and apply SS base. */ + gen_lea_ss_ofs(s, s->A0, new_esp, 0); gen_op_st_v(s, d_ot, val, s->A0); gen_op_mov_reg_v(s, a_ot, R_ESP, new_esp); } @@ -2598,7 +2051,7 @@ static MemOp gen_pop_T0(DisasContext *s) { MemOp d_ot = mo_pushpop(s, s->dflag); - gen_lea_v_seg_dest(s, mo_stacksize(s), s->T0, cpu_regs[R_ESP], R_SS, -1); + gen_lea_ss_ofs(s, s->T0, cpu_regs[R_ESP], 0); gen_op_ld_v(s, d_ot, s->T0, s->T0); return d_ot; @@ -2609,21 +2062,14 @@ static inline void gen_pop_update(DisasContext *s, MemOp ot) gen_stack_update(s, 1 << ot); } -static inline void gen_stack_A0(DisasContext *s) -{ - gen_lea_v_seg(s, SS32(s) ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1); -} - static void gen_pusha(DisasContext *s) { - MemOp s_ot = SS32(s) ? MO_32 : MO_16; MemOp d_ot = s->dflag; int size = 1 << d_ot; int i; for (i = 0; i < 8; i++) { - tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], (i - 8) * size); - gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1); + gen_lea_ss_ofs(s, s->A0, cpu_regs[R_ESP], (i - 8) * size); gen_op_st_v(s, d_ot, cpu_regs[7 - i], s->A0); } @@ -2632,7 +2078,6 @@ static void gen_pusha(DisasContext *s) static void gen_popa(DisasContext *s) { - MemOp s_ot = SS32(s) ? MO_32 : MO_16; MemOp d_ot = s->dflag; int size = 1 << d_ot; int i; @@ -2642,8 +2087,7 @@ static void gen_popa(DisasContext *s) if (7 - i == R_ESP) { continue; } - tcg_gen_addi_tl(s->A0, cpu_regs[R_ESP], i * size); - gen_lea_v_seg(s, s_ot, s->A0, R_SS, -1); + gen_lea_ss_ofs(s, s->A0, cpu_regs[R_ESP], i * size); gen_op_ld_v(s, d_ot, s->T0, s->A0); gen_op_mov_reg_v(s, d_ot, 7 - i, s->T0); } @@ -2654,12 +2098,12 @@ static void gen_popa(DisasContext *s) static void gen_enter(DisasContext *s, int esp_addend, int level) { MemOp d_ot = mo_pushpop(s, s->dflag); - MemOp a_ot = CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16; + MemOp a_ot = mo_stacksize(s); int size = 1 << d_ot; /* Push BP; compute FrameTemp into T1. */ tcg_gen_subi_tl(s->T1, cpu_regs[R_ESP], size); - gen_lea_v_seg(s, a_ot, s->T1, R_SS, -1); + gen_lea_ss_ofs(s, s->A0, s->T1, 0); gen_op_st_v(s, d_ot, cpu_regs[R_EBP], s->A0); level &= 31; @@ -2668,18 +2112,15 @@ static void gen_enter(DisasContext *s, int esp_addend, int level) /* Copy level-1 pointers from the previous frame. */ for (i = 1; i < level; ++i) { - tcg_gen_subi_tl(s->A0, cpu_regs[R_EBP], size * i); - gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1); + gen_lea_ss_ofs(s, s->A0, cpu_regs[R_EBP], -size * i); gen_op_ld_v(s, d_ot, s->tmp0, s->A0); - tcg_gen_subi_tl(s->A0, s->T1, size * i); - gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1); + gen_lea_ss_ofs(s, s->A0, s->T1, -size * i); gen_op_st_v(s, d_ot, s->tmp0, s->A0); } /* Push the current FrameTemp as the last level. */ - tcg_gen_subi_tl(s->A0, s->T1, size * level); - gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1); + gen_lea_ss_ofs(s, s->A0, s->T1, -size * level); gen_op_st_v(s, d_ot, s->T1, s->A0); } @@ -2696,7 +2137,7 @@ static void gen_leave(DisasContext *s) MemOp d_ot = mo_pushpop(s, s->dflag); MemOp a_ot = mo_stacksize(s); - gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1); + gen_lea_ss_ofs(s, s->A0, cpu_regs[R_EBP], 0); gen_op_ld_v(s, d_ot, s->T0, s->A0); tcg_gen_addi_tl(s->T1, cpu_regs[R_EBP], 1 << d_ot); @@ -2719,7 +2160,7 @@ static void gen_unknown_opcode(CPUX86State *env, DisasContext *s) fprintf(logfile, "ILLOPC: " TARGET_FMT_lx ":", pc); for (; pc < end; ++pc) { - fprintf(logfile, " %02x", cpu_ldub_code(env, pc)); + fprintf(logfile, " %02x", translator_ldub(env, &s->base, pc)); } fprintf(logfile, "\n"); qemu_log_unlock(logfile); @@ -2729,7 +2170,7 @@ static void gen_unknown_opcode(CPUX86State *env, DisasContext *s) /* an interrupt is different from an exception because of the privilege checks */ -static void gen_interrupt(DisasContext *s, int intno) +static void gen_interrupt(DisasContext *s, uint8_t intno) { gen_update_cc_op(s); gen_update_eip_cur(s); @@ -2791,61 +2232,44 @@ static void gen_bnd_jmp(DisasContext *s) } } -/* Generate an end of block. Trace exception is also generated if needed. - If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set. - If RECHECK_TF, emit a rechecking helper for #DB, ignoring the state of - S->TF. This is used by the syscall/sysret insns. */ +/* + * Generate an end of block, including common tasks such as generating + * single step traps, resetting the RF flag, and handling the interrupt + * shadow. + */ static void -do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr) +gen_eob(DisasContext *s, int mode) { + bool inhibit_reset; + gen_update_cc_op(s); /* If several instructions disable interrupts, only the first does it. */ - if (inhibit && !(s->flags & HF_INHIBIT_IRQ_MASK)) { - gen_set_hflag(s, HF_INHIBIT_IRQ_MASK); - } else { + inhibit_reset = false; + if (s->flags & HF_INHIBIT_IRQ_MASK) { gen_reset_hflag(s, HF_INHIBIT_IRQ_MASK); + inhibit_reset = true; + } else if (mode == DISAS_EOB_INHIBIT_IRQ) { + gen_set_hflag(s, HF_INHIBIT_IRQ_MASK); } if (s->base.tb->flags & HF_RF_MASK) { gen_reset_eflags(s, RF_MASK); } - if (recheck_tf) { + if (mode == DISAS_EOB_RECHECK_TF) { gen_helper_rechecking_single_step(tcg_env); tcg_gen_exit_tb(NULL, 0); - } else if (s->flags & HF_TF_MASK) { + } else if ((s->flags & HF_TF_MASK) && mode != DISAS_EOB_INHIBIT_IRQ) { gen_helper_single_step(tcg_env); - } else if (jr) { + } else if (mode == DISAS_JUMP && + /* give irqs a chance to happen */ + !inhibit_reset) { tcg_gen_lookup_and_goto_ptr(); } else { tcg_gen_exit_tb(NULL, 0); } - s->base.is_jmp = DISAS_NORETURN; -} -static inline void -gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf) -{ - do_gen_eob_worker(s, inhibit, recheck_tf, false); -} - -/* End of block. - If INHIBIT, set HF_INHIBIT_IRQ_MASK if it isn't already set. */ -static void gen_eob_inhibit_irq(DisasContext *s, bool inhibit) -{ - gen_eob_worker(s, inhibit, false); -} - -/* End of block, resetting the inhibit irq flag. */ -static void gen_eob(DisasContext *s) -{ - gen_eob_worker(s, false, false); -} - -/* Jump to register */ -static void gen_jr(DisasContext *s) -{ - do_gen_eob_worker(s, false, false, true); + s->base.is_jmp = DISAS_NORETURN; } /* Jump to eip+diff, truncating the result to OT. */ @@ -2856,6 +2280,8 @@ static void gen_jmp_rel(DisasContext *s, MemOp ot, int diff, int tb_num) target_ulong new_pc = s->pc + diff; target_ulong new_eip = new_pc - s->cs_base; + assert(!s->cc_op_dirty); + /* In 64-bit mode, operand size is fixed at 64 bits. */ if (!CODE64(s)) { if (ot == MO_16) { @@ -2869,9 +2295,6 @@ static void gen_jmp_rel(DisasContext *s, MemOp ot, int diff, int tb_num) } new_eip &= mask; - gen_update_cc_op(s); - set_cc_op(s, CC_OP_DYNAMIC); - if (tb_cflags(s->base.tb) & CF_PCREL) { tcg_gen_addi_tl(cpu_eip, cpu_eip, new_pc - s->pc_save); /* @@ -2900,9 +2323,9 @@ static void gen_jmp_rel(DisasContext *s, MemOp ot, int diff, int tb_num) tcg_gen_movi_tl(cpu_eip, new_eip); } if (s->jmp_opt) { - gen_jr(s); /* jump to another page */ + gen_eob(s, DISAS_JUMP); /* jump to another page */ } else { - gen_eob(s); /* exit to main loop */ + gen_eob(s, DISAS_EOB_ONLY); /* exit to main loop */ } } } @@ -2978,10 +2401,6 @@ static void gen_sty_env_A0(DisasContext *s, int offset, bool align) tcg_gen_qemu_st_i128(t, s->tmp0, mem_index, mop); } -#include "decode-new.h" -#include "emit.c.inc" -#include "decode-new.c.inc" - static void gen_cmpxchg8b(DisasContext *s, CPUX86State *env, int modrm) { TCGv_i64 cmp, val, old; @@ -3080,739 +2499,583 @@ static void gen_cmpxchg16b(DisasContext *s, CPUX86State *env, int modrm) } #endif -/* convert one instruction. s->base.is_jmp is set if the translation must - be stopped. Return the next pc value */ -static bool disas_insn(DisasContext *s, CPUState *cpu) +static bool disas_insn_x87(DisasContext *s, CPUState *cpu, int b) { CPUX86State *env = cpu_env(cpu); - int b, prefixes; - int shift; - MemOp ot, aflag, dflag; - int modrm, reg, rm, mod, op, opreg, val; - bool orig_cc_op_dirty = s->cc_op_dirty; - CCOp orig_cc_op = s->cc_op; - target_ulong orig_pc_save = s->pc_save; + bool update_fip = true; + int modrm, mod, rm, op; - s->pc = s->base.pc_next; - s->override = -1; -#ifdef TARGET_X86_64 - s->rex_r = 0; - s->rex_x = 0; - s->rex_b = 0; -#endif - s->rip_offset = 0; /* for relative ip address */ - s->vex_l = 0; - s->vex_v = 0; - s->vex_w = false; - switch (sigsetjmp(s->jmpbuf, 0)) { - case 0: - break; - case 1: - gen_exception_gpf(s); + if (s->flags & (HF_EM_MASK | HF_TS_MASK)) { + /* if CR0.EM or CR0.TS are set, generate an FPU exception */ + /* XXX: what to do if illegal op ? */ + gen_exception(s, EXCP07_PREX); return true; - case 2: - /* Restore state that may affect the next instruction. */ - s->pc = s->base.pc_next; - /* - * TODO: These save/restore can be removed after the table-based - * decoder is complete; we will be decoding the insn completely - * before any code generation that might affect these variables. - */ - s->cc_op_dirty = orig_cc_op_dirty; - s->cc_op = orig_cc_op; - s->pc_save = orig_pc_save; - /* END TODO */ - s->base.num_insns--; - tcg_remove_ops_after(s->prev_insn_end); - s->base.insn_start = s->prev_insn_start; - s->base.is_jmp = DISAS_TOO_MANY; - return false; - default: - g_assert_not_reached(); } + modrm = x86_ldub_code(env, s); + mod = (modrm >> 6) & 3; + rm = modrm & 7; + op = ((b & 7) << 3) | ((modrm >> 3) & 7); + if (mod != 3) { + /* memory op */ + AddressParts a = gen_lea_modrm_0(env, s, modrm); + TCGv ea = gen_lea_modrm_1(s, a, false); + TCGv last_addr = tcg_temp_new(); + bool update_fdp = true; + + tcg_gen_mov_tl(last_addr, ea); + gen_lea_v_seg(s, ea, a.def_seg, s->override); + + switch (op) { + case 0x00 ... 0x07: /* fxxxs */ + case 0x10 ... 0x17: /* fixxxl */ + case 0x20 ... 0x27: /* fxxxl */ + case 0x30 ... 0x37: /* fixxx */ + { + int op1; + op1 = op & 7; + + switch (op >> 4) { + case 0: + tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, + s->mem_index, MO_LEUL); + gen_helper_flds_FT0(tcg_env, s->tmp2_i32); + break; + case 1: + tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, + s->mem_index, MO_LEUL); + gen_helper_fildl_FT0(tcg_env, s->tmp2_i32); + break; + case 2: + tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, + s->mem_index, MO_LEUQ); + gen_helper_fldl_FT0(tcg_env, s->tmp1_i64); + break; + case 3: + default: + tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, + s->mem_index, MO_LESW); + gen_helper_fildl_FT0(tcg_env, s->tmp2_i32); + break; + } - prefixes = 0; - - next_byte: - s->prefix = prefixes; - b = x86_ldub_code(env, s); - /* Collect prefixes. */ - switch (b) { - default: - break; - case 0x0f: - b = x86_ldub_code(env, s) + 0x100; - break; - case 0xf3: - prefixes |= PREFIX_REPZ; - prefixes &= ~PREFIX_REPNZ; - goto next_byte; - case 0xf2: - prefixes |= PREFIX_REPNZ; - prefixes &= ~PREFIX_REPZ; - goto next_byte; - case 0xf0: - prefixes |= PREFIX_LOCK; - goto next_byte; - case 0x2e: - s->override = R_CS; - goto next_byte; - case 0x36: - s->override = R_SS; - goto next_byte; - case 0x3e: - s->override = R_DS; - goto next_byte; - case 0x26: - s->override = R_ES; - goto next_byte; - case 0x64: - s->override = R_FS; - goto next_byte; - case 0x65: - s->override = R_GS; - goto next_byte; - case 0x66: - prefixes |= PREFIX_DATA; - goto next_byte; - case 0x67: - prefixes |= PREFIX_ADR; - goto next_byte; -#ifdef TARGET_X86_64 - case 0x40 ... 0x4f: - if (CODE64(s)) { - /* REX prefix */ - prefixes |= PREFIX_REX; - s->vex_w = (b >> 3) & 1; - s->rex_r = (b & 0x4) << 1; - s->rex_x = (b & 0x2) << 2; - s->rex_b = (b & 0x1) << 3; - goto next_byte; - } - break; -#endif - case 0xc5: /* 2-byte VEX */ - case 0xc4: /* 3-byte VEX */ - if (CODE32(s) && !VM86(s)) { - int vex2 = x86_ldub_code(env, s); - s->pc--; /* rewind the advance_pc() x86_ldub_code() did */ - - if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) { - /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b, - otherwise the instruction is LES or LDS. */ - break; + gen_helper_fp_arith_ST0_FT0(op1); + if (op1 == 3) { + /* fcomp needs pop */ + gen_helper_fpop(tcg_env); + } } - disas_insn_new(s, cpu, b); - return s->pc; - } - break; - } - - /* Post-process prefixes. */ - if (CODE64(s)) { - /* In 64-bit mode, the default data size is 32-bit. Select 64-bit - data with rex_w, and 16-bit data with 0x66; rex_w takes precedence - over 0x66 if both are present. */ - dflag = (REX_W(s) ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32); - /* In 64-bit mode, 0x67 selects 32-bit addressing. */ - aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64); - } else { - /* In 16/32-bit mode, 0x66 selects the opposite data size. */ - if (CODE32(s) ^ ((prefixes & PREFIX_DATA) != 0)) { - dflag = MO_32; - } else { - dflag = MO_16; - } - /* In 16/32-bit mode, 0x67 selects the opposite addressing. */ - if (CODE32(s) ^ ((prefixes & PREFIX_ADR) != 0)) { - aflag = MO_32; - } else { - aflag = MO_16; - } - } - - s->prefix = prefixes; - s->aflag = aflag; - s->dflag = dflag; - - /* now check op code */ - switch (b) { - /**************************/ - /* arith & logic */ - case 0x00 ... 0x05: - case 0x08 ... 0x0d: - case 0x10 ... 0x15: - case 0x18 ... 0x1d: - case 0x20 ... 0x25: - case 0x28 ... 0x2d: - case 0x30 ... 0x35: - case 0x38 ... 0x3d: - { - int f; - op = (b >> 3) & 7; - f = (b >> 1) & 3; - - ot = mo_b_d(b, dflag); - - switch(f) { - case 0: /* OP Ev, Gv */ - modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | REX_R(s); - mod = (modrm >> 6) & 3; - rm = (modrm & 7) | REX_B(s); - if (mod != 3) { - gen_lea_modrm(env, s, modrm); - opreg = OR_TMP0; - } else if (op == OP_XORL && rm == reg) { - xor_zero: - /* xor reg, reg optimisation */ - set_cc_op(s, CC_OP_CLR); - tcg_gen_movi_tl(s->T0, 0); - gen_op_mov_reg_v(s, ot, reg, s->T0); + break; + case 0x08: /* flds */ + case 0x0a: /* fsts */ + case 0x0b: /* fstps */ + case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */ + case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */ + case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */ + switch (op & 7) { + case 0: + switch (op >> 4) { + case 0: + tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, + s->mem_index, MO_LEUL); + gen_helper_flds_ST0(tcg_env, s->tmp2_i32); + break; + case 1: + tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, + s->mem_index, MO_LEUL); + gen_helper_fildl_ST0(tcg_env, s->tmp2_i32); + break; + case 2: + tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, + s->mem_index, MO_LEUQ); + gen_helper_fldl_ST0(tcg_env, s->tmp1_i64); + break; + case 3: + default: + tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, + s->mem_index, MO_LESW); + gen_helper_fildl_ST0(tcg_env, s->tmp2_i32); break; - } else { - opreg = rm; } - gen_op_mov_v_reg(s, ot, s->T1, reg); - gen_op(s, op, ot, opreg); break; - case 1: /* OP Gv, Ev */ - modrm = x86_ldub_code(env, s); - mod = (modrm >> 6) & 3; - reg = ((modrm >> 3) & 7) | REX_R(s); - rm = (modrm & 7) | REX_B(s); - if (mod != 3) { - gen_lea_modrm(env, s, modrm); - gen_op_ld_v(s, ot, s->T1, s->A0); - } else if (op == OP_XORL && rm == reg) { - goto xor_zero; - } else { - gen_op_mov_v_reg(s, ot, s->T1, rm); + case 1: + /* XXX: the corresponding CPUID bit must be tested ! */ + switch (op >> 4) { + case 1: + gen_helper_fisttl_ST0(s->tmp2_i32, tcg_env); + tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, + s->mem_index, MO_LEUL); + break; + case 2: + gen_helper_fisttll_ST0(s->tmp1_i64, tcg_env); + tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, + s->mem_index, MO_LEUQ); + break; + case 3: + default: + gen_helper_fistt_ST0(s->tmp2_i32, tcg_env); + tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, + s->mem_index, MO_LEUW); + break; } - gen_op(s, op, ot, reg); - break; - case 2: /* OP A, Iv */ - val = insn_get(env, s, ot); - tcg_gen_movi_tl(s->T1, val); - gen_op(s, op, ot, OR_EAX); + gen_helper_fpop(tcg_env); break; - } - } - break; - - case 0x82: - if (CODE64(s)) - goto illegal_op; - /* fall through */ - case 0x80: /* GRP1 */ - case 0x81: - case 0x83: - { - ot = mo_b_d(b, dflag); - - modrm = x86_ldub_code(env, s); - mod = (modrm >> 6) & 3; - rm = (modrm & 7) | REX_B(s); - op = (modrm >> 3) & 7; - - if (mod != 3) { - if (b == 0x83) - s->rip_offset = 1; - else - s->rip_offset = insn_const_size(ot); - gen_lea_modrm(env, s, modrm); - opreg = OR_TMP0; - } else { - opreg = rm; - } - - switch(b) { default: - case 0x80: - case 0x81: - case 0x82: - val = insn_get(env, s, ot); - break; - case 0x83: - val = (int8_t)insn_get(env, s, MO_8); + switch (op >> 4) { + case 0: + gen_helper_fsts_ST0(s->tmp2_i32, tcg_env); + tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, + s->mem_index, MO_LEUL); + break; + case 1: + gen_helper_fistl_ST0(s->tmp2_i32, tcg_env); + tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, + s->mem_index, MO_LEUL); + break; + case 2: + gen_helper_fstl_ST0(s->tmp1_i64, tcg_env); + tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, + s->mem_index, MO_LEUQ); + break; + case 3: + default: + gen_helper_fist_ST0(s->tmp2_i32, tcg_env); + tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, + s->mem_index, MO_LEUW); + break; + } + if ((op & 7) == 3) { + gen_helper_fpop(tcg_env); + } break; } - tcg_gen_movi_tl(s->T1, val); - gen_op(s, op, ot, opreg); + break; + case 0x0c: /* fldenv mem */ + gen_helper_fldenv(tcg_env, s->A0, + tcg_constant_i32(s->dflag - 1)); + update_fip = update_fdp = false; + break; + case 0x0d: /* fldcw mem */ + tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, + s->mem_index, MO_LEUW); + gen_helper_fldcw(tcg_env, s->tmp2_i32); + update_fip = update_fdp = false; + break; + case 0x0e: /* fnstenv mem */ + gen_helper_fstenv(tcg_env, s->A0, + tcg_constant_i32(s->dflag - 1)); + update_fip = update_fdp = false; + break; + case 0x0f: /* fnstcw mem */ + gen_helper_fnstcw(s->tmp2_i32, tcg_env); + tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, + s->mem_index, MO_LEUW); + update_fip = update_fdp = false; + break; + case 0x1d: /* fldt mem */ + gen_helper_fldt_ST0(tcg_env, s->A0); + break; + case 0x1f: /* fstpt mem */ + gen_helper_fstt_ST0(tcg_env, s->A0); + gen_helper_fpop(tcg_env); + break; + case 0x2c: /* frstor mem */ + gen_helper_frstor(tcg_env, s->A0, + tcg_constant_i32(s->dflag - 1)); + update_fip = update_fdp = false; + break; + case 0x2e: /* fnsave mem */ + gen_helper_fsave(tcg_env, s->A0, + tcg_constant_i32(s->dflag - 1)); + update_fip = update_fdp = false; + break; + case 0x2f: /* fnstsw mem */ + gen_helper_fnstsw(s->tmp2_i32, tcg_env); + tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, + s->mem_index, MO_LEUW); + update_fip = update_fdp = false; + break; + case 0x3c: /* fbld */ + gen_helper_fbld_ST0(tcg_env, s->A0); + break; + case 0x3e: /* fbstp */ + gen_helper_fbst_ST0(tcg_env, s->A0); + gen_helper_fpop(tcg_env); + break; + case 0x3d: /* fildll */ + tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, + s->mem_index, MO_LEUQ); + gen_helper_fildll_ST0(tcg_env, s->tmp1_i64); + break; + case 0x3f: /* fistpll */ + gen_helper_fistll_ST0(s->tmp1_i64, tcg_env); + tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, + s->mem_index, MO_LEUQ); + gen_helper_fpop(tcg_env); + break; + default: + return false; } - break; - /**************************/ - /* inc, dec, and other misc arith */ - case 0x40 ... 0x47: /* inc Gv */ - ot = dflag; - gen_inc(s, ot, OR_EAX + (b & 7), 1); - break; - case 0x48 ... 0x4f: /* dec Gv */ - ot = dflag; - gen_inc(s, ot, OR_EAX + (b & 7), -1); - break; - case 0xf6: /* GRP3 */ - case 0xf7: - ot = mo_b_d(b, dflag); + if (update_fdp) { + int last_seg = s->override >= 0 ? s->override : a.def_seg; - modrm = x86_ldub_code(env, s); - mod = (modrm >> 6) & 3; - rm = (modrm & 7) | REX_B(s); - op = (modrm >> 3) & 7; - if (mod != 3) { - if (op == 0) { - s->rip_offset = insn_const_size(ot); - } - gen_lea_modrm(env, s, modrm); - /* For those below that handle locked memory, don't load here. */ - if (!(s->prefix & PREFIX_LOCK) - || op != 2) { - gen_op_ld_v(s, ot, s->T0, s->A0); - } - } else { - gen_op_mov_v_reg(s, ot, s->T0, rm); + tcg_gen_ld_i32(s->tmp2_i32, tcg_env, + offsetof(CPUX86State, + segs[last_seg].selector)); + tcg_gen_st16_i32(s->tmp2_i32, tcg_env, + offsetof(CPUX86State, fpds)); + tcg_gen_st_tl(last_addr, tcg_env, + offsetof(CPUX86State, fpdp)); } - - switch(op) { - case 0: /* test */ - val = insn_get(env, s, ot); - tcg_gen_movi_tl(s->T1, val); - gen_op_testl_T0_T1_cc(s); - set_cc_op(s, CC_OP_LOGICB + ot); + } else { + /* register float ops */ + int opreg = rm; + + switch (op) { + case 0x08: /* fld sti */ + gen_helper_fpush(tcg_env); + gen_helper_fmov_ST0_STN(tcg_env, + tcg_constant_i32((opreg + 1) & 7)); break; - case 2: /* not */ - if (s->prefix & PREFIX_LOCK) { - if (mod == 3) { - goto illegal_op; - } - tcg_gen_movi_tl(s->T0, ~0); - tcg_gen_atomic_xor_fetch_tl(s->T0, s->A0, s->T0, - s->mem_index, ot | MO_LE); - } else { - tcg_gen_not_tl(s->T0, s->T0); - if (mod != 3) { - gen_op_st_v(s, ot, s->T0, s->A0); - } else { - gen_op_mov_reg_v(s, ot, rm, s->T0); - } - } + case 0x09: /* fxchg sti */ + case 0x29: /* fxchg4 sti, undocumented op */ + case 0x39: /* fxchg7 sti, undocumented op */ + gen_helper_fxchg_ST0_STN(tcg_env, tcg_constant_i32(opreg)); break; - case 3: /* neg */ - if (s->prefix & PREFIX_LOCK) { - TCGLabel *label1; - TCGv a0, t0, t1, t2; - - if (mod == 3) { - goto illegal_op; - } - a0 = s->A0; - t0 = s->T0; - label1 = gen_new_label(); - - gen_set_label(label1); - t1 = tcg_temp_new(); - t2 = tcg_temp_new(); - tcg_gen_mov_tl(t2, t0); - tcg_gen_neg_tl(t1, t0); - tcg_gen_atomic_cmpxchg_tl(t0, a0, t0, t1, - s->mem_index, ot | MO_LE); - tcg_gen_brcond_tl(TCG_COND_NE, t0, t2, label1); - - tcg_gen_neg_tl(s->T0, t0); - } else { - tcg_gen_neg_tl(s->T0, s->T0); - if (mod != 3) { - gen_op_st_v(s, ot, s->T0, s->A0); - } else { - gen_op_mov_reg_v(s, ot, rm, s->T0); - } + case 0x0a: /* grp d9/2 */ + switch (rm) { + case 0: /* fnop */ + /* + * check exceptions (FreeBSD FPU probe) + * needs to be treated as I/O because of ferr_irq + */ + translator_io_start(&s->base); + gen_helper_fwait(tcg_env); + update_fip = false; + break; + default: + return false; } - gen_op_update_neg_cc(s); - set_cc_op(s, CC_OP_SUBB + ot); break; - case 4: /* mul */ - switch(ot) { - case MO_8: - gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX); - tcg_gen_ext8u_tl(s->T0, s->T0); - tcg_gen_ext8u_tl(s->T1, s->T1); - /* XXX: use 32 bit mul which could be faster */ - tcg_gen_mul_tl(s->T0, s->T0, s->T1); - gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0); - tcg_gen_mov_tl(cpu_cc_dst, s->T0); - tcg_gen_andi_tl(cpu_cc_src, s->T0, 0xff00); - set_cc_op(s, CC_OP_MULB); + case 0x0c: /* grp d9/4 */ + switch (rm) { + case 0: /* fchs */ + gen_helper_fchs_ST0(tcg_env); break; - case MO_16: - gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX); - tcg_gen_ext16u_tl(s->T0, s->T0); - tcg_gen_ext16u_tl(s->T1, s->T1); - /* XXX: use 32 bit mul which could be faster */ - tcg_gen_mul_tl(s->T0, s->T0, s->T1); - gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0); - tcg_gen_mov_tl(cpu_cc_dst, s->T0); - tcg_gen_shri_tl(s->T0, s->T0, 16); - gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0); - tcg_gen_mov_tl(cpu_cc_src, s->T0); - set_cc_op(s, CC_OP_MULW); + case 1: /* fabs */ + gen_helper_fabs_ST0(tcg_env); break; - default: - case MO_32: - tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]); - tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32, - s->tmp2_i32, s->tmp3_i32); - tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32); - tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32); - tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]); - tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]); - set_cc_op(s, CC_OP_MULL); + case 4: /* ftst */ + gen_helper_fldz_FT0(tcg_env); + gen_helper_fcom_ST0_FT0(tcg_env); break; -#ifdef TARGET_X86_64 - case MO_64: - tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX], - s->T0, cpu_regs[R_EAX]); - tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]); - tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]); - set_cc_op(s, CC_OP_MULQ); + case 5: /* fxam */ + gen_helper_fxam_ST0(tcg_env); break; -#endif + default: + return false; } break; - case 5: /* imul */ - switch(ot) { - case MO_8: - gen_op_mov_v_reg(s, MO_8, s->T1, R_EAX); - tcg_gen_ext8s_tl(s->T0, s->T0); - tcg_gen_ext8s_tl(s->T1, s->T1); - /* XXX: use 32 bit mul which could be faster */ - tcg_gen_mul_tl(s->T0, s->T0, s->T1); - gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0); - tcg_gen_mov_tl(cpu_cc_dst, s->T0); - tcg_gen_ext8s_tl(s->tmp0, s->T0); - tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0); - set_cc_op(s, CC_OP_MULB); + case 0x0d: /* grp d9/5 */ + { + switch (rm) { + case 0: + gen_helper_fpush(tcg_env); + gen_helper_fld1_ST0(tcg_env); + break; + case 1: + gen_helper_fpush(tcg_env); + gen_helper_fldl2t_ST0(tcg_env); + break; + case 2: + gen_helper_fpush(tcg_env); + gen_helper_fldl2e_ST0(tcg_env); + break; + case 3: + gen_helper_fpush(tcg_env); + gen_helper_fldpi_ST0(tcg_env); + break; + case 4: + gen_helper_fpush(tcg_env); + gen_helper_fldlg2_ST0(tcg_env); + break; + case 5: + gen_helper_fpush(tcg_env); + gen_helper_fldln2_ST0(tcg_env); + break; + case 6: + gen_helper_fpush(tcg_env); + gen_helper_fldz_ST0(tcg_env); + break; + default: + return false; + } + } + break; + case 0x0e: /* grp d9/6 */ + switch (rm) { + case 0: /* f2xm1 */ + gen_helper_f2xm1(tcg_env); break; - case MO_16: - gen_op_mov_v_reg(s, MO_16, s->T1, R_EAX); - tcg_gen_ext16s_tl(s->T0, s->T0); - tcg_gen_ext16s_tl(s->T1, s->T1); - /* XXX: use 32 bit mul which could be faster */ - tcg_gen_mul_tl(s->T0, s->T0, s->T1); - gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0); - tcg_gen_mov_tl(cpu_cc_dst, s->T0); - tcg_gen_ext16s_tl(s->tmp0, s->T0); - tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0); - tcg_gen_shri_tl(s->T0, s->T0, 16); - gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0); - set_cc_op(s, CC_OP_MULW); + case 1: /* fyl2x */ + gen_helper_fyl2x(tcg_env); break; - default: - case MO_32: - tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EAX]); - tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32, - s->tmp2_i32, s->tmp3_i32); - tcg_gen_extu_i32_tl(cpu_regs[R_EAX], s->tmp2_i32); - tcg_gen_extu_i32_tl(cpu_regs[R_EDX], s->tmp3_i32); - tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31); - tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]); - tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32); - tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32); - set_cc_op(s, CC_OP_MULL); + case 2: /* fptan */ + gen_helper_fptan(tcg_env); break; -#ifdef TARGET_X86_64 - case MO_64: - tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX], - s->T0, cpu_regs[R_EAX]); - tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]); - tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63); - tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]); - set_cc_op(s, CC_OP_MULQ); + case 3: /* fpatan */ + gen_helper_fpatan(tcg_env); break; -#endif - } - break; - case 6: /* div */ - switch(ot) { - case MO_8: - gen_helper_divb_AL(tcg_env, s->T0); + case 4: /* fxtract */ + gen_helper_fxtract(tcg_env); break; - case MO_16: - gen_helper_divw_AX(tcg_env, s->T0); + case 5: /* fprem1 */ + gen_helper_fprem1(tcg_env); break; - default: - case MO_32: - gen_helper_divl_EAX(tcg_env, s->T0); + case 6: /* fdecstp */ + gen_helper_fdecstp(tcg_env); break; -#ifdef TARGET_X86_64 - case MO_64: - gen_helper_divq_EAX(tcg_env, s->T0); + default: + case 7: /* fincstp */ + gen_helper_fincstp(tcg_env); break; -#endif } break; - case 7: /* idiv */ - switch(ot) { - case MO_8: - gen_helper_idivb_AL(tcg_env, s->T0); + case 0x0f: /* grp d9/7 */ + switch (rm) { + case 0: /* fprem */ + gen_helper_fprem(tcg_env); break; - case MO_16: - gen_helper_idivw_AX(tcg_env, s->T0); + case 1: /* fyl2xp1 */ + gen_helper_fyl2xp1(tcg_env); break; - default: - case MO_32: - gen_helper_idivl_EAX(tcg_env, s->T0); + case 2: /* fsqrt */ + gen_helper_fsqrt(tcg_env); break; -#ifdef TARGET_X86_64 - case MO_64: - gen_helper_idivq_EAX(tcg_env, s->T0); + case 3: /* fsincos */ + gen_helper_fsincos(tcg_env); + break; + case 5: /* fscale */ + gen_helper_fscale(tcg_env); + break; + case 4: /* frndint */ + gen_helper_frndint(tcg_env); + break; + case 6: /* fsin */ + gen_helper_fsin(tcg_env); + break; + default: + case 7: /* fcos */ + gen_helper_fcos(tcg_env); break; -#endif } break; - default: - goto unknown_op; - } - break; - - case 0xfe: /* GRP4 */ - case 0xff: /* GRP5 */ - ot = mo_b_d(b, dflag); - - modrm = x86_ldub_code(env, s); - mod = (modrm >> 6) & 3; - rm = (modrm & 7) | REX_B(s); - op = (modrm >> 3) & 7; - if (op >= 2 && b == 0xfe) { - goto unknown_op; - } - if (CODE64(s)) { - if (op == 2 || op == 4) { - /* operand size for jumps is 64 bit */ - ot = MO_64; - } else if (op == 3 || op == 5) { - ot = dflag != MO_16 ? MO_32 + REX_W(s) : MO_16; - } else if (op == 6) { - /* default push size is 64 bit */ - ot = mo_pushpop(s, dflag); + case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */ + case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */ + case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */ + { + int op1; + + op1 = op & 7; + if (op >= 0x20) { + gen_helper_fp_arith_STN_ST0(op1, opreg); + if (op >= 0x30) { + gen_helper_fpop(tcg_env); + } + } else { + gen_helper_fmov_FT0_STN(tcg_env, + tcg_constant_i32(opreg)); + gen_helper_fp_arith_ST0_FT0(op1); + } } - } - if (mod != 3) { - gen_lea_modrm(env, s, modrm); - if (op >= 2 && op != 3 && op != 5) - gen_op_ld_v(s, ot, s->T0, s->A0); - } else { - gen_op_mov_v_reg(s, ot, s->T0, rm); - } - - switch(op) { - case 0: /* inc Ev */ - if (mod != 3) - opreg = OR_TMP0; - else - opreg = rm; - gen_inc(s, ot, opreg, 1); break; - case 1: /* dec Ev */ - if (mod != 3) - opreg = OR_TMP0; - else - opreg = rm; - gen_inc(s, ot, opreg, -1); + case 0x02: /* fcom */ + case 0x22: /* fcom2, undocumented op */ + gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg)); + gen_helper_fcom_ST0_FT0(tcg_env); break; - case 2: /* call Ev */ - /* XXX: optimize if memory (no 'and' is necessary) */ - if (dflag == MO_16) { - tcg_gen_ext16u_tl(s->T0, s->T0); - } - gen_push_v(s, eip_next_tl(s)); - gen_op_jmp_v(s, s->T0); - gen_bnd_jmp(s); - s->base.is_jmp = DISAS_JUMP; + case 0x03: /* fcomp */ + case 0x23: /* fcomp3, undocumented op */ + case 0x32: /* fcomp5, undocumented op */ + gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg)); + gen_helper_fcom_ST0_FT0(tcg_env); + gen_helper_fpop(tcg_env); break; - case 3: /* lcall Ev */ - if (mod == 3) { - goto illegal_op; - } - gen_op_ld_v(s, ot, s->T1, s->A0); - gen_add_A0_im(s, 1 << ot); - gen_op_ld_v(s, MO_16, s->T0, s->A0); - do_lcall: - if (PE(s) && !VM86(s)) { - tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - gen_helper_lcall_protected(tcg_env, s->tmp2_i32, s->T1, - tcg_constant_i32(dflag - 1), - eip_next_tl(s)); - } else { - tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1); - gen_helper_lcall_real(tcg_env, s->tmp2_i32, s->tmp3_i32, - tcg_constant_i32(dflag - 1), - eip_next_i32(s)); + case 0x15: /* da/5 */ + switch (rm) { + case 1: /* fucompp */ + gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(1)); + gen_helper_fucom_ST0_FT0(tcg_env); + gen_helper_fpop(tcg_env); + gen_helper_fpop(tcg_env); + break; + default: + return false; } - s->base.is_jmp = DISAS_JUMP; break; - case 4: /* jmp Ev */ - if (dflag == MO_16) { - tcg_gen_ext16u_tl(s->T0, s->T0); + case 0x1c: + switch (rm) { + case 0: /* feni (287 only, just do nop here) */ + break; + case 1: /* fdisi (287 only, just do nop here) */ + break; + case 2: /* fclex */ + gen_helper_fclex(tcg_env); + update_fip = false; + break; + case 3: /* fninit */ + gen_helper_fninit(tcg_env); + update_fip = false; + break; + case 4: /* fsetpm (287 only, just do nop here) */ + break; + default: + return false; } - gen_op_jmp_v(s, s->T0); - gen_bnd_jmp(s); - s->base.is_jmp = DISAS_JUMP; break; - case 5: /* ljmp Ev */ - if (mod == 3) { + case 0x1d: /* fucomi */ + if (!(s->cpuid_features & CPUID_CMOV)) { goto illegal_op; } - gen_op_ld_v(s, ot, s->T1, s->A0); - gen_add_A0_im(s, 1 << ot); - gen_op_ld_v(s, MO_16, s->T0, s->A0); - do_ljmp: - if (PE(s) && !VM86(s)) { - tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - gen_helper_ljmp_protected(tcg_env, s->tmp2_i32, s->T1, - eip_next_tl(s)); - } else { - gen_op_movl_seg_T0_vm(s, R_CS); - gen_op_jmp_v(s, s->T1); + gen_update_cc_op(s); + gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg)); + gen_helper_fucomi_ST0_FT0(tcg_env); + assume_cc_op(s, CC_OP_EFLAGS); + break; + case 0x1e: /* fcomi */ + if (!(s->cpuid_features & CPUID_CMOV)) { + goto illegal_op; } - s->base.is_jmp = DISAS_JUMP; + gen_update_cc_op(s); + gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg)); + gen_helper_fcomi_ST0_FT0(tcg_env); + assume_cc_op(s, CC_OP_EFLAGS); break; - case 6: /* push Ev */ - gen_push_v(s, s->T0); + case 0x28: /* ffree sti */ + gen_helper_ffree_STN(tcg_env, tcg_constant_i32(opreg)); break; - default: - goto unknown_op; - } - break; - - case 0x84: /* test Ev, Gv */ - case 0x85: - ot = mo_b_d(b, dflag); - - modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | REX_R(s); - - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - gen_op_mov_v_reg(s, ot, s->T1, reg); - gen_op_testl_T0_T1_cc(s); - set_cc_op(s, CC_OP_LOGICB + ot); - break; - - case 0xa8: /* test eAX, Iv */ - case 0xa9: - ot = mo_b_d(b, dflag); - val = insn_get(env, s, ot); - - gen_op_mov_v_reg(s, ot, s->T0, OR_EAX); - tcg_gen_movi_tl(s->T1, val); - gen_op_testl_T0_T1_cc(s); - set_cc_op(s, CC_OP_LOGICB + ot); - break; - - case 0x98: /* CWDE/CBW */ - switch (dflag) { -#ifdef TARGET_X86_64 - case MO_64: - gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX); - tcg_gen_ext32s_tl(s->T0, s->T0); - gen_op_mov_reg_v(s, MO_64, R_EAX, s->T0); + case 0x2a: /* fst sti */ + gen_helper_fmov_STN_ST0(tcg_env, tcg_constant_i32(opreg)); break; -#endif - case MO_32: - gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX); - tcg_gen_ext16s_tl(s->T0, s->T0); - gen_op_mov_reg_v(s, MO_32, R_EAX, s->T0); + case 0x2b: /* fstp sti */ + case 0x0b: /* fstp1 sti, undocumented op */ + case 0x3a: /* fstp8 sti, undocumented op */ + case 0x3b: /* fstp9 sti, undocumented op */ + gen_helper_fmov_STN_ST0(tcg_env, tcg_constant_i32(opreg)); + gen_helper_fpop(tcg_env); break; - case MO_16: - gen_op_mov_v_reg(s, MO_8, s->T0, R_EAX); - tcg_gen_ext8s_tl(s->T0, s->T0); - gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0); + case 0x2c: /* fucom st(i) */ + gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg)); + gen_helper_fucom_ST0_FT0(tcg_env); break; - default: - g_assert_not_reached(); - } - break; - case 0x99: /* CDQ/CWD */ - switch (dflag) { -#ifdef TARGET_X86_64 - case MO_64: - gen_op_mov_v_reg(s, MO_64, s->T0, R_EAX); - tcg_gen_sari_tl(s->T0, s->T0, 63); - gen_op_mov_reg_v(s, MO_64, R_EDX, s->T0); + case 0x2d: /* fucomp st(i) */ + gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg)); + gen_helper_fucom_ST0_FT0(tcg_env); + gen_helper_fpop(tcg_env); break; -#endif - case MO_32: - gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX); - tcg_gen_ext32s_tl(s->T0, s->T0); - tcg_gen_sari_tl(s->T0, s->T0, 31); - gen_op_mov_reg_v(s, MO_32, R_EDX, s->T0); + case 0x33: /* de/3 */ + switch (rm) { + case 1: /* fcompp */ + gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(1)); + gen_helper_fcom_ST0_FT0(tcg_env); + gen_helper_fpop(tcg_env); + gen_helper_fpop(tcg_env); + break; + default: + return false; + } break; - case MO_16: - gen_op_mov_v_reg(s, MO_16, s->T0, R_EAX); - tcg_gen_ext16s_tl(s->T0, s->T0); - tcg_gen_sari_tl(s->T0, s->T0, 15); - gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0); + case 0x38: /* ffreep sti, undocumented op */ + gen_helper_ffree_STN(tcg_env, tcg_constant_i32(opreg)); + gen_helper_fpop(tcg_env); break; - default: - g_assert_not_reached(); - } - break; - case 0x1af: /* imul Gv, Ev */ - case 0x69: /* imul Gv, Ev, I */ - case 0x6b: - ot = dflag; - modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | REX_R(s); - if (b == 0x69) - s->rip_offset = insn_const_size(ot); - else if (b == 0x6b) - s->rip_offset = 1; - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - if (b == 0x69) { - val = insn_get(env, s, ot); - tcg_gen_movi_tl(s->T1, val); - } else if (b == 0x6b) { - val = (int8_t)insn_get(env, s, MO_8); - tcg_gen_movi_tl(s->T1, val); - } else { - gen_op_mov_v_reg(s, ot, s->T1, reg); - } - switch (ot) { -#ifdef TARGET_X86_64 - case MO_64: - tcg_gen_muls2_i64(cpu_regs[reg], s->T1, s->T0, s->T1); - tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]); - tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63); - tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, s->T1); + case 0x3c: /* df/4 */ + switch (rm) { + case 0: + gen_helper_fnstsw(s->tmp2_i32, tcg_env); + tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); + gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0); + break; + default: + return false; + } break; -#endif - case MO_32: - tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1); - tcg_gen_muls2_i32(s->tmp2_i32, s->tmp3_i32, - s->tmp2_i32, s->tmp3_i32); - tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32); - tcg_gen_sari_i32(s->tmp2_i32, s->tmp2_i32, 31); - tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]); - tcg_gen_sub_i32(s->tmp2_i32, s->tmp2_i32, s->tmp3_i32); - tcg_gen_extu_i32_tl(cpu_cc_src, s->tmp2_i32); + case 0x3d: /* fucomip */ + if (!(s->cpuid_features & CPUID_CMOV)) { + goto illegal_op; + } + gen_update_cc_op(s); + gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg)); + gen_helper_fucomi_ST0_FT0(tcg_env); + gen_helper_fpop(tcg_env); + assume_cc_op(s, CC_OP_EFLAGS); break; - default: - tcg_gen_ext16s_tl(s->T0, s->T0); - tcg_gen_ext16s_tl(s->T1, s->T1); - /* XXX: use 32 bit mul which could be faster */ - tcg_gen_mul_tl(s->T0, s->T0, s->T1); - tcg_gen_mov_tl(cpu_cc_dst, s->T0); - tcg_gen_ext16s_tl(s->tmp0, s->T0); - tcg_gen_sub_tl(cpu_cc_src, s->T0, s->tmp0); - gen_op_mov_reg_v(s, ot, reg, s->T0); + case 0x3e: /* fcomip */ + if (!(s->cpuid_features & CPUID_CMOV)) { + goto illegal_op; + } + gen_update_cc_op(s); + gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg)); + gen_helper_fcomi_ST0_FT0(tcg_env); + gen_helper_fpop(tcg_env); + assume_cc_op(s, CC_OP_EFLAGS); + break; + case 0x10 ... 0x13: /* fcmovxx */ + case 0x18 ... 0x1b: + { + int op1; + TCGLabel *l1; + static const uint8_t fcmov_cc[8] = { + (JCC_B << 1), + (JCC_Z << 1), + (JCC_BE << 1), + (JCC_P << 1), + }; + + if (!(s->cpuid_features & CPUID_CMOV)) { + goto illegal_op; + } + op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1); + l1 = gen_new_label(); + gen_jcc1_noeob(s, op1, l1); + gen_helper_fmov_ST0_STN(tcg_env, + tcg_constant_i32(opreg)); + gen_set_label(l1); + } break; + default: + return false; } - set_cc_op(s, CC_OP_MULB + ot); - break; + } + + if (update_fip) { + tcg_gen_ld_i32(s->tmp2_i32, tcg_env, + offsetof(CPUX86State, segs[R_CS].selector)); + tcg_gen_st16_i32(s->tmp2_i32, tcg_env, + offsetof(CPUX86State, fpcs)); + tcg_gen_st_tl(eip_cur_tl(s), + tcg_env, offsetof(CPUX86State, fpip)); + } + return true; + + illegal_op: + gen_illegal_opcode(s); + return true; +} + +static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) +{ + CPUX86State *env = cpu_env(cpu); + int prefixes = s->prefix; + MemOp dflag = s->dflag; + int shift; + MemOp ot; + int modrm, reg, rm, mod, op, opreg, val; + + /* now check op code */ + switch (b) { + /**************************/ + /* arith & logic */ case 0x1c0: case 0x1c1: /* xadd Ev, Gv */ ot = mo_b_d(b, dflag); @@ -3961,7 +3224,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) gen_helper_rdrand(s->T0, tcg_env); rm = (modrm & 7) | REX_B(s); gen_op_mov_reg_v(s, dflag, rm, s->T0); - set_cc_op(s, CC_OP_EFLAGS); + assume_cc_op(s, CC_OP_EFLAGS); break; default: @@ -3970,375 +3233,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) break; /**************************/ - /* push/pop */ - case 0x50 ... 0x57: /* push */ - gen_op_mov_v_reg(s, MO_32, s->T0, (b & 7) | REX_B(s)); - gen_push_v(s, s->T0); - break; - case 0x58 ... 0x5f: /* pop */ - ot = gen_pop_T0(s); - /* NOTE: order is important for pop %sp */ - gen_pop_update(s, ot); - gen_op_mov_reg_v(s, ot, (b & 7) | REX_B(s), s->T0); - break; - case 0x60: /* pusha */ - if (CODE64(s)) - goto illegal_op; - gen_pusha(s); - break; - case 0x61: /* popa */ - if (CODE64(s)) - goto illegal_op; - gen_popa(s); - break; - case 0x68: /* push Iv */ - case 0x6a: - ot = mo_pushpop(s, dflag); - if (b == 0x68) - val = insn_get(env, s, ot); - else - val = (int8_t)insn_get(env, s, MO_8); - tcg_gen_movi_tl(s->T0, val); - gen_push_v(s, s->T0); - break; - case 0x8f: /* pop Ev */ - modrm = x86_ldub_code(env, s); - mod = (modrm >> 6) & 3; - ot = gen_pop_T0(s); - if (mod == 3) { - /* NOTE: order is important for pop %sp */ - gen_pop_update(s, ot); - rm = (modrm & 7) | REX_B(s); - gen_op_mov_reg_v(s, ot, rm, s->T0); - } else { - /* NOTE: order is important too for MMU exceptions */ - s->popl_esp_hack = 1 << ot; - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1); - s->popl_esp_hack = 0; - gen_pop_update(s, ot); - } - break; - case 0xc8: /* enter */ - { - int level; - val = x86_lduw_code(env, s); - level = x86_ldub_code(env, s); - gen_enter(s, val, level); - } - break; - case 0xc9: /* leave */ - gen_leave(s); - break; - case 0x06: /* push es */ - case 0x0e: /* push cs */ - case 0x16: /* push ss */ - case 0x1e: /* push ds */ - if (CODE64(s)) - goto illegal_op; - gen_op_movl_T0_seg(s, b >> 3); - gen_push_v(s, s->T0); - break; - case 0x1a0: /* push fs */ - case 0x1a8: /* push gs */ - gen_op_movl_T0_seg(s, (b >> 3) & 7); - gen_push_v(s, s->T0); - break; - case 0x07: /* pop es */ - case 0x17: /* pop ss */ - case 0x1f: /* pop ds */ - if (CODE64(s)) - goto illegal_op; - reg = b >> 3; - ot = gen_pop_T0(s); - gen_movl_seg_T0(s, reg); - gen_pop_update(s, ot); - break; - case 0x1a1: /* pop fs */ - case 0x1a9: /* pop gs */ - ot = gen_pop_T0(s); - gen_movl_seg_T0(s, (b >> 3) & 7); - gen_pop_update(s, ot); - break; - - /**************************/ - /* mov */ - case 0x88: - case 0x89: /* mov Gv, Ev */ - ot = mo_b_d(b, dflag); - modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | REX_R(s); - - /* generate a generic store */ - gen_ldst_modrm(env, s, modrm, ot, reg, 1); - break; - case 0xc6: - case 0xc7: /* mov Ev, Iv */ - ot = mo_b_d(b, dflag); - modrm = x86_ldub_code(env, s); - mod = (modrm >> 6) & 3; - if (mod != 3) { - s->rip_offset = insn_const_size(ot); - gen_lea_modrm(env, s, modrm); - } - val = insn_get(env, s, ot); - tcg_gen_movi_tl(s->T0, val); - if (mod != 3) { - gen_op_st_v(s, ot, s->T0, s->A0); - } else { - gen_op_mov_reg_v(s, ot, (modrm & 7) | REX_B(s), s->T0); - } - break; - case 0x8a: - case 0x8b: /* mov Ev, Gv */ - ot = mo_b_d(b, dflag); - modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | REX_R(s); - - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - gen_op_mov_reg_v(s, ot, reg, s->T0); - break; - case 0x8e: /* mov seg, Gv */ - modrm = x86_ldub_code(env, s); - reg = (modrm >> 3) & 7; - if (reg >= 6 || reg == R_CS) - goto illegal_op; - gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0); - gen_movl_seg_T0(s, reg); - break; - case 0x8c: /* mov Gv, seg */ - modrm = x86_ldub_code(env, s); - reg = (modrm >> 3) & 7; - mod = (modrm >> 6) & 3; - if (reg >= 6) - goto illegal_op; - gen_op_movl_T0_seg(s, reg); - ot = mod == 3 ? dflag : MO_16; - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1); - break; - - case 0x1b6: /* movzbS Gv, Eb */ - case 0x1b7: /* movzwS Gv, Eb */ - case 0x1be: /* movsbS Gv, Eb */ - case 0x1bf: /* movswS Gv, Eb */ - { - MemOp d_ot; - MemOp s_ot; - - /* d_ot is the size of destination */ - d_ot = dflag; - /* ot is the size of source */ - ot = (b & 1) + MO_8; - /* s_ot is the sign+size of source */ - s_ot = b & 8 ? MO_SIGN | ot : ot; - - modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | REX_R(s); - mod = (modrm >> 6) & 3; - rm = (modrm & 7) | REX_B(s); - - if (mod == 3) { - if (s_ot == MO_SB && byte_reg_is_xH(s, rm)) { - tcg_gen_sextract_tl(s->T0, cpu_regs[rm - 4], 8, 8); - } else { - gen_op_mov_v_reg(s, ot, s->T0, rm); - switch (s_ot) { - case MO_UB: - tcg_gen_ext8u_tl(s->T0, s->T0); - break; - case MO_SB: - tcg_gen_ext8s_tl(s->T0, s->T0); - break; - case MO_UW: - tcg_gen_ext16u_tl(s->T0, s->T0); - break; - default: - case MO_SW: - tcg_gen_ext16s_tl(s->T0, s->T0); - break; - } - } - gen_op_mov_reg_v(s, d_ot, reg, s->T0); - } else { - gen_lea_modrm(env, s, modrm); - gen_op_ld_v(s, s_ot, s->T0, s->A0); - gen_op_mov_reg_v(s, d_ot, reg, s->T0); - } - } - break; - - case 0x8d: /* lea */ - modrm = x86_ldub_code(env, s); - mod = (modrm >> 6) & 3; - if (mod == 3) - goto illegal_op; - reg = ((modrm >> 3) & 7) | REX_R(s); - { - AddressParts a = gen_lea_modrm_0(env, s, modrm); - TCGv ea = gen_lea_modrm_1(s, a, false); - gen_lea_v_seg(s, s->aflag, ea, -1, -1); - gen_op_mov_reg_v(s, dflag, reg, s->A0); - } - break; - - case 0xa0: /* mov EAX, Ov */ - case 0xa1: - case 0xa2: /* mov Ov, EAX */ - case 0xa3: - { - target_ulong offset_addr; - - ot = mo_b_d(b, dflag); - offset_addr = insn_get_addr(env, s, s->aflag); - tcg_gen_movi_tl(s->A0, offset_addr); - gen_add_A0_ds_seg(s); - if ((b & 2) == 0) { - gen_op_ld_v(s, ot, s->T0, s->A0); - gen_op_mov_reg_v(s, ot, R_EAX, s->T0); - } else { - gen_op_mov_v_reg(s, ot, s->T0, R_EAX); - gen_op_st_v(s, ot, s->T0, s->A0); - } - } - break; - case 0xd7: /* xlat */ - tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]); - tcg_gen_ext8u_tl(s->T0, cpu_regs[R_EAX]); - tcg_gen_add_tl(s->A0, s->A0, s->T0); - gen_add_A0_ds_seg(s); - gen_op_ld_v(s, MO_8, s->T0, s->A0); - gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0); - break; - case 0xb0 ... 0xb7: /* mov R, Ib */ - val = insn_get(env, s, MO_8); - tcg_gen_movi_tl(s->T0, val); - gen_op_mov_reg_v(s, MO_8, (b & 7) | REX_B(s), s->T0); - break; - case 0xb8 ... 0xbf: /* mov R, Iv */ -#ifdef TARGET_X86_64 - if (dflag == MO_64) { - uint64_t tmp; - /* 64 bit case */ - tmp = x86_ldq_code(env, s); - reg = (b & 7) | REX_B(s); - tcg_gen_movi_tl(s->T0, tmp); - gen_op_mov_reg_v(s, MO_64, reg, s->T0); - } else -#endif - { - ot = dflag; - val = insn_get(env, s, ot); - reg = (b & 7) | REX_B(s); - tcg_gen_movi_tl(s->T0, val); - gen_op_mov_reg_v(s, ot, reg, s->T0); - } - break; - - case 0x91 ... 0x97: /* xchg R, EAX */ - do_xchg_reg_eax: - ot = dflag; - reg = (b & 7) | REX_B(s); - rm = R_EAX; - goto do_xchg_reg; - case 0x86: - case 0x87: /* xchg Ev, Gv */ - ot = mo_b_d(b, dflag); - modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | REX_R(s); - mod = (modrm >> 6) & 3; - if (mod == 3) { - rm = (modrm & 7) | REX_B(s); - do_xchg_reg: - gen_op_mov_v_reg(s, ot, s->T0, reg); - gen_op_mov_v_reg(s, ot, s->T1, rm); - gen_op_mov_reg_v(s, ot, rm, s->T0); - gen_op_mov_reg_v(s, ot, reg, s->T1); - } else { - gen_lea_modrm(env, s, modrm); - gen_op_mov_v_reg(s, ot, s->T0, reg); - /* for xchg, lock is implicit */ - tcg_gen_atomic_xchg_tl(s->T1, s->A0, s->T0, - s->mem_index, ot | MO_LE); - gen_op_mov_reg_v(s, ot, reg, s->T1); - } - break; - case 0xc4: /* les Gv */ - /* In CODE64 this is VEX3; see above. */ - op = R_ES; - goto do_lxx; - case 0xc5: /* lds Gv */ - /* In CODE64 this is VEX2; see above. */ - op = R_DS; - goto do_lxx; - case 0x1b2: /* lss Gv */ - op = R_SS; - goto do_lxx; - case 0x1b4: /* lfs Gv */ - op = R_FS; - goto do_lxx; - case 0x1b5: /* lgs Gv */ - op = R_GS; - do_lxx: - ot = dflag != MO_16 ? MO_32 : MO_16; - modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | REX_R(s); - mod = (modrm >> 6) & 3; - if (mod == 3) - goto illegal_op; - gen_lea_modrm(env, s, modrm); - gen_op_ld_v(s, ot, s->T1, s->A0); - gen_add_A0_im(s, 1 << ot); - /* load the segment first to handle exceptions properly */ - gen_op_ld_v(s, MO_16, s->T0, s->A0); - gen_movl_seg_T0(s, op); - /* then put the data */ - gen_op_mov_reg_v(s, ot, reg, s->T1); - break; - - /************************/ /* shifts */ - case 0xc0: - case 0xc1: - /* shift Ev,Ib */ - shift = 2; - grp2: - { - ot = mo_b_d(b, dflag); - modrm = x86_ldub_code(env, s); - mod = (modrm >> 6) & 3; - op = (modrm >> 3) & 7; - - if (mod != 3) { - if (shift == 2) { - s->rip_offset = 1; - } - gen_lea_modrm(env, s, modrm); - opreg = OR_TMP0; - } else { - opreg = (modrm & 7) | REX_B(s); - } - - /* simpler op */ - if (shift == 0) { - gen_shift(s, op, ot, opreg, OR_ECX); - } else { - if (shift == 2) { - shift = x86_ldub_code(env, s); - } - gen_shifti(s, op, ot, opreg, shift); - } - } - break; - case 0xd0: - case 0xd1: - /* shift Ev,1 */ - shift = 1; - goto grp2; - case 0xd2: - case 0xd3: - /* shift Ev,cl */ - shift = 0; - goto grp2; - case 0x1a4: /* shld imm */ op = 0; shift = 1; @@ -4377,929 +3272,6 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) break; /************************/ - /* floats */ - case 0xd8 ... 0xdf: - { - bool update_fip = true; - - if (s->flags & (HF_EM_MASK | HF_TS_MASK)) { - /* if CR0.EM or CR0.TS are set, generate an FPU exception */ - /* XXX: what to do if illegal op ? */ - gen_exception(s, EXCP07_PREX); - break; - } - modrm = x86_ldub_code(env, s); - mod = (modrm >> 6) & 3; - rm = modrm & 7; - op = ((b & 7) << 3) | ((modrm >> 3) & 7); - if (mod != 3) { - /* memory op */ - AddressParts a = gen_lea_modrm_0(env, s, modrm); - TCGv ea = gen_lea_modrm_1(s, a, false); - TCGv last_addr = tcg_temp_new(); - bool update_fdp = true; - - tcg_gen_mov_tl(last_addr, ea); - gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override); - - switch (op) { - case 0x00 ... 0x07: /* fxxxs */ - case 0x10 ... 0x17: /* fixxxl */ - case 0x20 ... 0x27: /* fxxxl */ - case 0x30 ... 0x37: /* fixxx */ - { - int op1; - op1 = op & 7; - - switch (op >> 4) { - case 0: - tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, - s->mem_index, MO_LEUL); - gen_helper_flds_FT0(tcg_env, s->tmp2_i32); - break; - case 1: - tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, - s->mem_index, MO_LEUL); - gen_helper_fildl_FT0(tcg_env, s->tmp2_i32); - break; - case 2: - tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, - s->mem_index, MO_LEUQ); - gen_helper_fldl_FT0(tcg_env, s->tmp1_i64); - break; - case 3: - default: - tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, - s->mem_index, MO_LESW); - gen_helper_fildl_FT0(tcg_env, s->tmp2_i32); - break; - } - - gen_helper_fp_arith_ST0_FT0(op1); - if (op1 == 3) { - /* fcomp needs pop */ - gen_helper_fpop(tcg_env); - } - } - break; - case 0x08: /* flds */ - case 0x0a: /* fsts */ - case 0x0b: /* fstps */ - case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */ - case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */ - case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */ - switch (op & 7) { - case 0: - switch (op >> 4) { - case 0: - tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, - s->mem_index, MO_LEUL); - gen_helper_flds_ST0(tcg_env, s->tmp2_i32); - break; - case 1: - tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, - s->mem_index, MO_LEUL); - gen_helper_fildl_ST0(tcg_env, s->tmp2_i32); - break; - case 2: - tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, - s->mem_index, MO_LEUQ); - gen_helper_fldl_ST0(tcg_env, s->tmp1_i64); - break; - case 3: - default: - tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, - s->mem_index, MO_LESW); - gen_helper_fildl_ST0(tcg_env, s->tmp2_i32); - break; - } - break; - case 1: - /* XXX: the corresponding CPUID bit must be tested ! */ - switch (op >> 4) { - case 1: - gen_helper_fisttl_ST0(s->tmp2_i32, tcg_env); - tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, - s->mem_index, MO_LEUL); - break; - case 2: - gen_helper_fisttll_ST0(s->tmp1_i64, tcg_env); - tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, - s->mem_index, MO_LEUQ); - break; - case 3: - default: - gen_helper_fistt_ST0(s->tmp2_i32, tcg_env); - tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, - s->mem_index, MO_LEUW); - break; - } - gen_helper_fpop(tcg_env); - break; - default: - switch (op >> 4) { - case 0: - gen_helper_fsts_ST0(s->tmp2_i32, tcg_env); - tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, - s->mem_index, MO_LEUL); - break; - case 1: - gen_helper_fistl_ST0(s->tmp2_i32, tcg_env); - tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, - s->mem_index, MO_LEUL); - break; - case 2: - gen_helper_fstl_ST0(s->tmp1_i64, tcg_env); - tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, - s->mem_index, MO_LEUQ); - break; - case 3: - default: - gen_helper_fist_ST0(s->tmp2_i32, tcg_env); - tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, - s->mem_index, MO_LEUW); - break; - } - if ((op & 7) == 3) { - gen_helper_fpop(tcg_env); - } - break; - } - break; - case 0x0c: /* fldenv mem */ - gen_helper_fldenv(tcg_env, s->A0, - tcg_constant_i32(dflag - 1)); - update_fip = update_fdp = false; - break; - case 0x0d: /* fldcw mem */ - tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, - s->mem_index, MO_LEUW); - gen_helper_fldcw(tcg_env, s->tmp2_i32); - update_fip = update_fdp = false; - break; - case 0x0e: /* fnstenv mem */ - gen_helper_fstenv(tcg_env, s->A0, - tcg_constant_i32(dflag - 1)); - update_fip = update_fdp = false; - break; - case 0x0f: /* fnstcw mem */ - gen_helper_fnstcw(s->tmp2_i32, tcg_env); - tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, - s->mem_index, MO_LEUW); - update_fip = update_fdp = false; - break; - case 0x1d: /* fldt mem */ - gen_helper_fldt_ST0(tcg_env, s->A0); - break; - case 0x1f: /* fstpt mem */ - gen_helper_fstt_ST0(tcg_env, s->A0); - gen_helper_fpop(tcg_env); - break; - case 0x2c: /* frstor mem */ - gen_helper_frstor(tcg_env, s->A0, - tcg_constant_i32(dflag - 1)); - update_fip = update_fdp = false; - break; - case 0x2e: /* fnsave mem */ - gen_helper_fsave(tcg_env, s->A0, - tcg_constant_i32(dflag - 1)); - update_fip = update_fdp = false; - break; - case 0x2f: /* fnstsw mem */ - gen_helper_fnstsw(s->tmp2_i32, tcg_env); - tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, - s->mem_index, MO_LEUW); - update_fip = update_fdp = false; - break; - case 0x3c: /* fbld */ - gen_helper_fbld_ST0(tcg_env, s->A0); - break; - case 0x3e: /* fbstp */ - gen_helper_fbst_ST0(tcg_env, s->A0); - gen_helper_fpop(tcg_env); - break; - case 0x3d: /* fildll */ - tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, - s->mem_index, MO_LEUQ); - gen_helper_fildll_ST0(tcg_env, s->tmp1_i64); - break; - case 0x3f: /* fistpll */ - gen_helper_fistll_ST0(s->tmp1_i64, tcg_env); - tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, - s->mem_index, MO_LEUQ); - gen_helper_fpop(tcg_env); - break; - default: - goto unknown_op; - } - - if (update_fdp) { - int last_seg = s->override >= 0 ? s->override : a.def_seg; - - tcg_gen_ld_i32(s->tmp2_i32, tcg_env, - offsetof(CPUX86State, - segs[last_seg].selector)); - tcg_gen_st16_i32(s->tmp2_i32, tcg_env, - offsetof(CPUX86State, fpds)); - tcg_gen_st_tl(last_addr, tcg_env, - offsetof(CPUX86State, fpdp)); - } - } else { - /* register float ops */ - opreg = rm; - - switch (op) { - case 0x08: /* fld sti */ - gen_helper_fpush(tcg_env); - gen_helper_fmov_ST0_STN(tcg_env, - tcg_constant_i32((opreg + 1) & 7)); - break; - case 0x09: /* fxchg sti */ - case 0x29: /* fxchg4 sti, undocumented op */ - case 0x39: /* fxchg7 sti, undocumented op */ - gen_helper_fxchg_ST0_STN(tcg_env, tcg_constant_i32(opreg)); - break; - case 0x0a: /* grp d9/2 */ - switch (rm) { - case 0: /* fnop */ - /* - * check exceptions (FreeBSD FPU probe) - * needs to be treated as I/O because of ferr_irq - */ - translator_io_start(&s->base); - gen_helper_fwait(tcg_env); - update_fip = false; - break; - default: - goto unknown_op; - } - break; - case 0x0c: /* grp d9/4 */ - switch (rm) { - case 0: /* fchs */ - gen_helper_fchs_ST0(tcg_env); - break; - case 1: /* fabs */ - gen_helper_fabs_ST0(tcg_env); - break; - case 4: /* ftst */ - gen_helper_fldz_FT0(tcg_env); - gen_helper_fcom_ST0_FT0(tcg_env); - break; - case 5: /* fxam */ - gen_helper_fxam_ST0(tcg_env); - break; - default: - goto unknown_op; - } - break; - case 0x0d: /* grp d9/5 */ - { - switch (rm) { - case 0: - gen_helper_fpush(tcg_env); - gen_helper_fld1_ST0(tcg_env); - break; - case 1: - gen_helper_fpush(tcg_env); - gen_helper_fldl2t_ST0(tcg_env); - break; - case 2: - gen_helper_fpush(tcg_env); - gen_helper_fldl2e_ST0(tcg_env); - break; - case 3: - gen_helper_fpush(tcg_env); - gen_helper_fldpi_ST0(tcg_env); - break; - case 4: - gen_helper_fpush(tcg_env); - gen_helper_fldlg2_ST0(tcg_env); - break; - case 5: - gen_helper_fpush(tcg_env); - gen_helper_fldln2_ST0(tcg_env); - break; - case 6: - gen_helper_fpush(tcg_env); - gen_helper_fldz_ST0(tcg_env); - break; - default: - goto unknown_op; - } - } - break; - case 0x0e: /* grp d9/6 */ - switch (rm) { - case 0: /* f2xm1 */ - gen_helper_f2xm1(tcg_env); - break; - case 1: /* fyl2x */ - gen_helper_fyl2x(tcg_env); - break; - case 2: /* fptan */ - gen_helper_fptan(tcg_env); - break; - case 3: /* fpatan */ - gen_helper_fpatan(tcg_env); - break; - case 4: /* fxtract */ - gen_helper_fxtract(tcg_env); - break; - case 5: /* fprem1 */ - gen_helper_fprem1(tcg_env); - break; - case 6: /* fdecstp */ - gen_helper_fdecstp(tcg_env); - break; - default: - case 7: /* fincstp */ - gen_helper_fincstp(tcg_env); - break; - } - break; - case 0x0f: /* grp d9/7 */ - switch (rm) { - case 0: /* fprem */ - gen_helper_fprem(tcg_env); - break; - case 1: /* fyl2xp1 */ - gen_helper_fyl2xp1(tcg_env); - break; - case 2: /* fsqrt */ - gen_helper_fsqrt(tcg_env); - break; - case 3: /* fsincos */ - gen_helper_fsincos(tcg_env); - break; - case 5: /* fscale */ - gen_helper_fscale(tcg_env); - break; - case 4: /* frndint */ - gen_helper_frndint(tcg_env); - break; - case 6: /* fsin */ - gen_helper_fsin(tcg_env); - break; - default: - case 7: /* fcos */ - gen_helper_fcos(tcg_env); - break; - } - break; - case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */ - case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */ - case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */ - { - int op1; - - op1 = op & 7; - if (op >= 0x20) { - gen_helper_fp_arith_STN_ST0(op1, opreg); - if (op >= 0x30) { - gen_helper_fpop(tcg_env); - } - } else { - gen_helper_fmov_FT0_STN(tcg_env, - tcg_constant_i32(opreg)); - gen_helper_fp_arith_ST0_FT0(op1); - } - } - break; - case 0x02: /* fcom */ - case 0x22: /* fcom2, undocumented op */ - gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg)); - gen_helper_fcom_ST0_FT0(tcg_env); - break; - case 0x03: /* fcomp */ - case 0x23: /* fcomp3, undocumented op */ - case 0x32: /* fcomp5, undocumented op */ - gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg)); - gen_helper_fcom_ST0_FT0(tcg_env); - gen_helper_fpop(tcg_env); - break; - case 0x15: /* da/5 */ - switch (rm) { - case 1: /* fucompp */ - gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(1)); - gen_helper_fucom_ST0_FT0(tcg_env); - gen_helper_fpop(tcg_env); - gen_helper_fpop(tcg_env); - break; - default: - goto unknown_op; - } - break; - case 0x1c: - switch (rm) { - case 0: /* feni (287 only, just do nop here) */ - break; - case 1: /* fdisi (287 only, just do nop here) */ - break; - case 2: /* fclex */ - gen_helper_fclex(tcg_env); - update_fip = false; - break; - case 3: /* fninit */ - gen_helper_fninit(tcg_env); - update_fip = false; - break; - case 4: /* fsetpm (287 only, just do nop here) */ - break; - default: - goto unknown_op; - } - break; - case 0x1d: /* fucomi */ - if (!(s->cpuid_features & CPUID_CMOV)) { - goto illegal_op; - } - gen_update_cc_op(s); - gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg)); - gen_helper_fucomi_ST0_FT0(tcg_env); - set_cc_op(s, CC_OP_EFLAGS); - break; - case 0x1e: /* fcomi */ - if (!(s->cpuid_features & CPUID_CMOV)) { - goto illegal_op; - } - gen_update_cc_op(s); - gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg)); - gen_helper_fcomi_ST0_FT0(tcg_env); - set_cc_op(s, CC_OP_EFLAGS); - break; - case 0x28: /* ffree sti */ - gen_helper_ffree_STN(tcg_env, tcg_constant_i32(opreg)); - break; - case 0x2a: /* fst sti */ - gen_helper_fmov_STN_ST0(tcg_env, tcg_constant_i32(opreg)); - break; - case 0x2b: /* fstp sti */ - case 0x0b: /* fstp1 sti, undocumented op */ - case 0x3a: /* fstp8 sti, undocumented op */ - case 0x3b: /* fstp9 sti, undocumented op */ - gen_helper_fmov_STN_ST0(tcg_env, tcg_constant_i32(opreg)); - gen_helper_fpop(tcg_env); - break; - case 0x2c: /* fucom st(i) */ - gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg)); - gen_helper_fucom_ST0_FT0(tcg_env); - break; - case 0x2d: /* fucomp st(i) */ - gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg)); - gen_helper_fucom_ST0_FT0(tcg_env); - gen_helper_fpop(tcg_env); - break; - case 0x33: /* de/3 */ - switch (rm) { - case 1: /* fcompp */ - gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(1)); - gen_helper_fcom_ST0_FT0(tcg_env); - gen_helper_fpop(tcg_env); - gen_helper_fpop(tcg_env); - break; - default: - goto unknown_op; - } - break; - case 0x38: /* ffreep sti, undocumented op */ - gen_helper_ffree_STN(tcg_env, tcg_constant_i32(opreg)); - gen_helper_fpop(tcg_env); - break; - case 0x3c: /* df/4 */ - switch (rm) { - case 0: - gen_helper_fnstsw(s->tmp2_i32, tcg_env); - tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); - gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0); - break; - default: - goto unknown_op; - } - break; - case 0x3d: /* fucomip */ - if (!(s->cpuid_features & CPUID_CMOV)) { - goto illegal_op; - } - gen_update_cc_op(s); - gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg)); - gen_helper_fucomi_ST0_FT0(tcg_env); - gen_helper_fpop(tcg_env); - set_cc_op(s, CC_OP_EFLAGS); - break; - case 0x3e: /* fcomip */ - if (!(s->cpuid_features & CPUID_CMOV)) { - goto illegal_op; - } - gen_update_cc_op(s); - gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg)); - gen_helper_fcomi_ST0_FT0(tcg_env); - gen_helper_fpop(tcg_env); - set_cc_op(s, CC_OP_EFLAGS); - break; - case 0x10 ... 0x13: /* fcmovxx */ - case 0x18 ... 0x1b: - { - int op1; - TCGLabel *l1; - static const uint8_t fcmov_cc[8] = { - (JCC_B << 1), - (JCC_Z << 1), - (JCC_BE << 1), - (JCC_P << 1), - }; - - if (!(s->cpuid_features & CPUID_CMOV)) { - goto illegal_op; - } - op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1); - l1 = gen_new_label(); - gen_jcc1_noeob(s, op1, l1); - gen_helper_fmov_ST0_STN(tcg_env, - tcg_constant_i32(opreg)); - gen_set_label(l1); - } - break; - default: - goto unknown_op; - } - } - - if (update_fip) { - tcg_gen_ld_i32(s->tmp2_i32, tcg_env, - offsetof(CPUX86State, segs[R_CS].selector)); - tcg_gen_st16_i32(s->tmp2_i32, tcg_env, - offsetof(CPUX86State, fpcs)); - tcg_gen_st_tl(eip_cur_tl(s), - tcg_env, offsetof(CPUX86State, fpip)); - } - } - break; - /************************/ - /* string ops */ - - case 0xa4: /* movsS */ - case 0xa5: - ot = mo_b_d(b, dflag); - if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) { - gen_repz_movs(s, ot); - } else { - gen_movs(s, ot); - } - break; - - case 0xaa: /* stosS */ - case 0xab: - ot = mo_b_d(b, dflag); - gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX); - if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) { - gen_repz_stos(s, ot); - } else { - gen_stos(s, ot); - } - break; - case 0xac: /* lodsS */ - case 0xad: - ot = mo_b_d(b, dflag); - if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) { - gen_repz_lods(s, ot); - } else { - gen_lods(s, ot); - } - break; - case 0xae: /* scasS */ - case 0xaf: - ot = mo_b_d(b, dflag); - gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX); - if (prefixes & PREFIX_REPNZ) { - gen_repz_scas(s, ot, 1); - } else if (prefixes & PREFIX_REPZ) { - gen_repz_scas(s, ot, 0); - } else { - gen_scas(s, ot); - } - break; - - case 0xa6: /* cmpsS */ - case 0xa7: - ot = mo_b_d(b, dflag); - if (prefixes & PREFIX_REPNZ) { - gen_repz_cmps(s, ot, 1); - } else if (prefixes & PREFIX_REPZ) { - gen_repz_cmps(s, ot, 0); - } else { - gen_cmps(s, ot); - } - break; - case 0x6c: /* insS */ - case 0x6d: - ot = mo_b_d32(b, dflag); - tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]); - tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32); - if (!gen_check_io(s, ot, s->tmp2_i32, - SVM_IOIO_TYPE_MASK | SVM_IOIO_STR_MASK)) { - break; - } - translator_io_start(&s->base); - if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) { - gen_repz_ins(s, ot); - } else { - gen_ins(s, ot); - } - break; - case 0x6e: /* outsS */ - case 0x6f: - ot = mo_b_d32(b, dflag); - tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]); - tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32); - if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_STR_MASK)) { - break; - } - translator_io_start(&s->base); - if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) { - gen_repz_outs(s, ot); - } else { - gen_outs(s, ot); - } - break; - - /************************/ - /* port I/O */ - - case 0xe4: - case 0xe5: - ot = mo_b_d32(b, dflag); - val = x86_ldub_code(env, s); - tcg_gen_movi_i32(s->tmp2_i32, val); - if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_TYPE_MASK)) { - break; - } - translator_io_start(&s->base); - gen_helper_in_func(ot, s->T1, s->tmp2_i32); - gen_op_mov_reg_v(s, ot, R_EAX, s->T1); - gen_bpt_io(s, s->tmp2_i32, ot); - break; - case 0xe6: - case 0xe7: - ot = mo_b_d32(b, dflag); - val = x86_ldub_code(env, s); - tcg_gen_movi_i32(s->tmp2_i32, val); - if (!gen_check_io(s, ot, s->tmp2_i32, 0)) { - break; - } - translator_io_start(&s->base); - gen_op_mov_v_reg(s, ot, s->T1, R_EAX); - tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1); - gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32); - gen_bpt_io(s, s->tmp2_i32, ot); - break; - case 0xec: - case 0xed: - ot = mo_b_d32(b, dflag); - tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]); - tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32); - if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_TYPE_MASK)) { - break; - } - translator_io_start(&s->base); - gen_helper_in_func(ot, s->T1, s->tmp2_i32); - gen_op_mov_reg_v(s, ot, R_EAX, s->T1); - gen_bpt_io(s, s->tmp2_i32, ot); - break; - case 0xee: - case 0xef: - ot = mo_b_d32(b, dflag); - tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]); - tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32); - if (!gen_check_io(s, ot, s->tmp2_i32, 0)) { - break; - } - translator_io_start(&s->base); - gen_op_mov_v_reg(s, ot, s->T1, R_EAX); - tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1); - gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32); - gen_bpt_io(s, s->tmp2_i32, ot); - break; - - /************************/ - /* control */ - case 0xc2: /* ret im */ - val = x86_ldsw_code(env, s); - ot = gen_pop_T0(s); - gen_stack_update(s, val + (1 << ot)); - /* Note that gen_pop_T0 uses a zero-extending load. */ - gen_op_jmp_v(s, s->T0); - gen_bnd_jmp(s); - s->base.is_jmp = DISAS_JUMP; - break; - case 0xc3: /* ret */ - ot = gen_pop_T0(s); - gen_pop_update(s, ot); - /* Note that gen_pop_T0 uses a zero-extending load. */ - gen_op_jmp_v(s, s->T0); - gen_bnd_jmp(s); - s->base.is_jmp = DISAS_JUMP; - break; - case 0xca: /* lret im */ - val = x86_ldsw_code(env, s); - do_lret: - if (PE(s) && !VM86(s)) { - gen_update_cc_op(s); - gen_update_eip_cur(s); - gen_helper_lret_protected(tcg_env, tcg_constant_i32(dflag - 1), - tcg_constant_i32(val)); - } else { - gen_stack_A0(s); - /* pop offset */ - gen_op_ld_v(s, dflag, s->T0, s->A0); - /* NOTE: keeping EIP updated is not a problem in case of - exception */ - gen_op_jmp_v(s, s->T0); - /* pop selector */ - gen_add_A0_im(s, 1 << dflag); - gen_op_ld_v(s, dflag, s->T0, s->A0); - gen_op_movl_seg_T0_vm(s, R_CS); - /* add stack offset */ - gen_stack_update(s, val + (2 << dflag)); - } - s->base.is_jmp = DISAS_EOB_ONLY; - break; - case 0xcb: /* lret */ - val = 0; - goto do_lret; - case 0xcf: /* iret */ - gen_svm_check_intercept(s, SVM_EXIT_IRET); - if (!PE(s) || VM86(s)) { - /* real mode or vm86 mode */ - if (!check_vm86_iopl(s)) { - break; - } - gen_helper_iret_real(tcg_env, tcg_constant_i32(dflag - 1)); - } else { - gen_helper_iret_protected(tcg_env, tcg_constant_i32(dflag - 1), - eip_next_i32(s)); - } - set_cc_op(s, CC_OP_EFLAGS); - s->base.is_jmp = DISAS_EOB_ONLY; - break; - case 0xe8: /* call im */ - { - int diff = (dflag != MO_16 - ? (int32_t)insn_get(env, s, MO_32) - : (int16_t)insn_get(env, s, MO_16)); - gen_push_v(s, eip_next_tl(s)); - gen_bnd_jmp(s); - gen_jmp_rel(s, dflag, diff, 0); - } - break; - case 0x9a: /* lcall im */ - { - unsigned int selector, offset; - - if (CODE64(s)) - goto illegal_op; - ot = dflag; - offset = insn_get(env, s, ot); - selector = insn_get(env, s, MO_16); - - tcg_gen_movi_tl(s->T0, selector); - tcg_gen_movi_tl(s->T1, offset); - } - goto do_lcall; - case 0xe9: /* jmp im */ - { - int diff = (dflag != MO_16 - ? (int32_t)insn_get(env, s, MO_32) - : (int16_t)insn_get(env, s, MO_16)); - gen_bnd_jmp(s); - gen_jmp_rel(s, dflag, diff, 0); - } - break; - case 0xea: /* ljmp im */ - { - unsigned int selector, offset; - - if (CODE64(s)) - goto illegal_op; - ot = dflag; - offset = insn_get(env, s, ot); - selector = insn_get(env, s, MO_16); - - tcg_gen_movi_tl(s->T0, selector); - tcg_gen_movi_tl(s->T1, offset); - } - goto do_ljmp; - case 0xeb: /* jmp Jb */ - { - int diff = (int8_t)insn_get(env, s, MO_8); - gen_jmp_rel(s, dflag, diff, 0); - } - break; - case 0x70 ... 0x7f: /* jcc Jb */ - { - int diff = (int8_t)insn_get(env, s, MO_8); - gen_bnd_jmp(s); - gen_jcc(s, b, diff); - } - break; - case 0x180 ... 0x18f: /* jcc Jv */ - { - int diff = (dflag != MO_16 - ? (int32_t)insn_get(env, s, MO_32) - : (int16_t)insn_get(env, s, MO_16)); - gen_bnd_jmp(s); - gen_jcc(s, b, diff); - } - break; - - case 0x190 ... 0x19f: /* setcc Gv */ - modrm = x86_ldub_code(env, s); - gen_setcc1(s, b, s->T0); - gen_ldst_modrm(env, s, modrm, MO_8, OR_TMP0, 1); - break; - case 0x140 ... 0x14f: /* cmov Gv, Ev */ - if (!(s->cpuid_features & CPUID_CMOV)) { - goto illegal_op; - } - ot = dflag; - modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | REX_R(s); - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - gen_cmovcc1(s, b ^ 1, s->T0, cpu_regs[reg]); - gen_op_mov_reg_v(s, ot, reg, s->T0); - break; - - /************************/ - /* flags */ - case 0x9c: /* pushf */ - gen_svm_check_intercept(s, SVM_EXIT_PUSHF); - if (check_vm86_iopl(s)) { - gen_update_cc_op(s); - gen_helper_read_eflags(s->T0, tcg_env); - gen_push_v(s, s->T0); - } - break; - case 0x9d: /* popf */ - gen_svm_check_intercept(s, SVM_EXIT_POPF); - if (check_vm86_iopl(s)) { - int mask = TF_MASK | AC_MASK | ID_MASK | NT_MASK; - - if (CPL(s) == 0) { - mask |= IF_MASK | IOPL_MASK; - } else if (CPL(s) <= IOPL(s)) { - mask |= IF_MASK; - } - if (dflag == MO_16) { - mask &= 0xffff; - } - - ot = gen_pop_T0(s); - gen_helper_write_eflags(tcg_env, s->T0, tcg_constant_i32(mask)); - gen_pop_update(s, ot); - set_cc_op(s, CC_OP_EFLAGS); - /* abort translation because TF/AC flag may change */ - s->base.is_jmp = DISAS_EOB_NEXT; - } - break; - case 0x9e: /* sahf */ - if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM)) - goto illegal_op; - tcg_gen_shri_tl(s->T0, cpu_regs[R_EAX], 8); - gen_compute_eflags(s); - tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O); - tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C); - tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0); - break; - case 0x9f: /* lahf */ - if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM)) - goto illegal_op; - gen_compute_eflags(s); - /* Note: gen_compute_eflags() only gives the condition codes */ - tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02); - tcg_gen_deposit_tl(cpu_regs[R_EAX], cpu_regs[R_EAX], s->T0, 8, 8); - break; - case 0xf5: /* cmc */ - gen_compute_eflags(s); - tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C); - break; - case 0xf8: /* clc */ - gen_compute_eflags(s); - tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C); - break; - case 0xf9: /* stc */ - gen_compute_eflags(s); - tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C); - break; - case 0xfc: /* cld */ - tcg_gen_movi_i32(s->tmp2_i32, 1); - tcg_gen_st_i32(s->tmp2_i32, tcg_env, offsetof(CPUX86State, df)); - break; - case 0xfd: /* std */ - tcg_gen_movi_i32(s->tmp2_i32, -1); - tcg_gen_st_i32(s->tmp2_i32, tcg_env, offsetof(CPUX86State, df)); - break; - - /************************/ /* bit operations */ case 0x1ba: /* bt/bts/btr/btc Gv, im */ ot = dflag; @@ -5348,7 +3320,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) tcg_gen_sari_tl(s->tmp0, s->T1, 3 + ot); tcg_gen_shli_tl(s->tmp0, s->tmp0, ot); tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a, false), s->tmp0); - gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override); + gen_lea_v_seg(s, s->A0, a.def_seg, s->override); if (!(s->prefix & PREFIX_LOCK)) { gen_op_ld_v(s, ot, s->T0, s->A0); } @@ -5443,7 +3415,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) ot = dflag; modrm = x86_ldub_code(env, s); reg = ((modrm >> 3) & 7) | REX_R(s); - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + gen_ld_modrm(env, s, modrm, ot); gen_extu(ot, s->T0); /* Note that lzcnt and tzcnt are in different extensions. */ @@ -5488,188 +3460,6 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) } gen_op_mov_reg_v(s, ot, reg, s->T0); break; - /************************/ - /* bcd */ - case 0x27: /* daa */ - if (CODE64(s)) - goto illegal_op; - gen_update_cc_op(s); - gen_helper_daa(tcg_env); - set_cc_op(s, CC_OP_EFLAGS); - break; - case 0x2f: /* das */ - if (CODE64(s)) - goto illegal_op; - gen_update_cc_op(s); - gen_helper_das(tcg_env); - set_cc_op(s, CC_OP_EFLAGS); - break; - case 0x37: /* aaa */ - if (CODE64(s)) - goto illegal_op; - gen_update_cc_op(s); - gen_helper_aaa(tcg_env); - set_cc_op(s, CC_OP_EFLAGS); - break; - case 0x3f: /* aas */ - if (CODE64(s)) - goto illegal_op; - gen_update_cc_op(s); - gen_helper_aas(tcg_env); - set_cc_op(s, CC_OP_EFLAGS); - break; - case 0xd4: /* aam */ - if (CODE64(s)) - goto illegal_op; - val = x86_ldub_code(env, s); - if (val == 0) { - gen_exception(s, EXCP00_DIVZ); - } else { - gen_helper_aam(tcg_env, tcg_constant_i32(val)); - set_cc_op(s, CC_OP_LOGICB); - } - break; - case 0xd5: /* aad */ - if (CODE64(s)) - goto illegal_op; - val = x86_ldub_code(env, s); - gen_helper_aad(tcg_env, tcg_constant_i32(val)); - set_cc_op(s, CC_OP_LOGICB); - break; - /************************/ - /* misc */ - case 0x90: /* nop */ - /* XXX: correct lock test for all insn */ - if (prefixes & PREFIX_LOCK) { - goto illegal_op; - } - /* If REX_B is set, then this is xchg eax, r8d, not a nop. */ - if (REX_B(s)) { - goto do_xchg_reg_eax; - } - if (prefixes & PREFIX_REPZ) { - gen_update_cc_op(s); - gen_update_eip_cur(s); - gen_helper_pause(tcg_env, cur_insn_len_i32(s)); - s->base.is_jmp = DISAS_NORETURN; - } - break; - case 0x9b: /* fwait */ - if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) == - (HF_MP_MASK | HF_TS_MASK)) { - gen_exception(s, EXCP07_PREX); - } else { - /* needs to be treated as I/O because of ferr_irq */ - translator_io_start(&s->base); - gen_helper_fwait(tcg_env); - } - break; - case 0xcc: /* int3 */ - gen_interrupt(s, EXCP03_INT3); - break; - case 0xcd: /* int N */ - val = x86_ldub_code(env, s); - if (check_vm86_iopl(s)) { - gen_interrupt(s, val); - } - break; - case 0xce: /* into */ - if (CODE64(s)) - goto illegal_op; - gen_update_cc_op(s); - gen_update_eip_cur(s); - gen_helper_into(tcg_env, cur_insn_len_i32(s)); - break; -#ifdef WANT_ICEBP - case 0xf1: /* icebp (undocumented, exits to external debugger) */ - gen_svm_check_intercept(s, SVM_EXIT_ICEBP); - gen_debug(s); - break; -#endif - case 0xfa: /* cli */ - if (check_iopl(s)) { - gen_reset_eflags(s, IF_MASK); - } - break; - case 0xfb: /* sti */ - if (check_iopl(s)) { - gen_set_eflags(s, IF_MASK); - /* interruptions are enabled only the first insn after sti */ - gen_update_eip_next(s); - gen_eob_inhibit_irq(s, true); - } - break; - case 0x62: /* bound */ - if (CODE64(s)) - goto illegal_op; - ot = dflag; - modrm = x86_ldub_code(env, s); - reg = (modrm >> 3) & 7; - mod = (modrm >> 6) & 3; - if (mod == 3) - goto illegal_op; - gen_op_mov_v_reg(s, ot, s->T0, reg); - gen_lea_modrm(env, s, modrm); - tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - if (ot == MO_16) { - gen_helper_boundw(tcg_env, s->A0, s->tmp2_i32); - } else { - gen_helper_boundl(tcg_env, s->A0, s->tmp2_i32); - } - break; - case 0x1c8 ... 0x1cf: /* bswap reg */ - reg = (b & 7) | REX_B(s); -#ifdef TARGET_X86_64 - if (dflag == MO_64) { - tcg_gen_bswap64_i64(cpu_regs[reg], cpu_regs[reg]); - break; - } -#endif - tcg_gen_bswap32_tl(cpu_regs[reg], cpu_regs[reg], TCG_BSWAP_OZ); - break; - case 0xd6: /* salc */ - if (CODE64(s)) - goto illegal_op; - gen_compute_eflags_c(s, s->T0); - tcg_gen_neg_tl(s->T0, s->T0); - gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0); - break; - case 0xe0: /* loopnz */ - case 0xe1: /* loopz */ - case 0xe2: /* loop */ - case 0xe3: /* jecxz */ - { - TCGLabel *l1, *l2; - int diff = (int8_t)insn_get(env, s, MO_8); - - l1 = gen_new_label(); - l2 = gen_new_label(); - gen_update_cc_op(s); - b &= 3; - switch(b) { - case 0: /* loopnz */ - case 1: /* loopz */ - gen_op_add_reg_im(s, s->aflag, R_ECX, -1); - gen_op_jz_ecx(s, l2); - gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1); - break; - case 2: /* loop */ - gen_op_add_reg_im(s, s->aflag, R_ECX, -1); - gen_op_jnz_ecx(s, l1); - break; - default: - case 3: /* jcxz */ - gen_op_jz_ecx(s, l1); - break; - } - - gen_set_label(l2); - gen_jmp_rel_csize(s, 0, 1); - - gen_set_label(l1); - gen_jmp_rel(s, dflag, diff, 0); - } - break; case 0x130: /* wrmsr */ case 0x132: /* rdmsr */ if (check_cpl0(s)) { @@ -5727,10 +3517,14 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) gen_update_cc_op(s); gen_update_eip_cur(s); gen_helper_syscall(tcg_env, cur_insn_len_i32(s)); + /* condition codes are modified only in long mode */ + if (LMA(s)) { + assume_cc_op(s, CC_OP_EFLAGS); + } /* TF handling for the syscall insn is different. The TF bit is checked after the syscall insn completes. This allows #DB to not be generated after one has entered CPL0 if TF is set in FMASK. */ - gen_eob_worker(s, false, true); + s->base.is_jmp = DISAS_EOB_RECHECK_TF; break; case 0x107: /* sysret */ /* For Intel SYSRET is only valid in long mode */ @@ -5743,13 +3537,13 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) gen_helper_sysret(tcg_env, tcg_constant_i32(dflag - 1)); /* condition codes are modified only in long mode */ if (LMA(s)) { - set_cc_op(s, CC_OP_EFLAGS); + assume_cc_op(s, CC_OP_EFLAGS); } /* TF handling for the sysret insn is different. The TF bit is checked after the sysret insn completes. This allows #DB to be generated "as if" the syscall insn in userspace has just completed. */ - gen_eob_worker(s, false, true); + s->base.is_jmp = DISAS_EOB_RECHECK_TF; } break; case 0x1a2: /* cpuid */ @@ -5757,14 +3551,6 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) gen_update_eip_cur(s); gen_helper_cpuid(tcg_env); break; - case 0xf4: /* hlt */ - if (check_cpl0(s)) { - gen_update_cc_op(s); - gen_update_eip_cur(s); - gen_helper_hlt(tcg_env, cur_insn_len_i32(s)); - s->base.is_jmp = DISAS_NORETURN; - } - break; case 0x100: modrm = x86_ldub_code(env, s); mod = (modrm >> 6) & 3; @@ -5780,14 +3566,14 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) tcg_gen_ld32u_tl(s->T0, tcg_env, offsetof(CPUX86State, ldt.selector)); ot = mod == 3 ? dflag : MO_16; - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1); + gen_st_modrm(env, s, modrm, ot); break; case 2: /* lldt */ if (!PE(s) || VM86(s)) goto illegal_op; if (check_cpl0(s)) { gen_svm_check_intercept(s, SVM_EXIT_LDTR_WRITE); - gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0); + gen_ld_modrm(env, s, modrm, MO_16); tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); gen_helper_lldt(tcg_env, s->tmp2_i32); } @@ -5802,14 +3588,14 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) tcg_gen_ld32u_tl(s->T0, tcg_env, offsetof(CPUX86State, tr.selector)); ot = mod == 3 ? dflag : MO_16; - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1); + gen_st_modrm(env, s, modrm, ot); break; case 3: /* ltr */ if (!PE(s) || VM86(s)) goto illegal_op; if (check_cpl0(s)) { gen_svm_check_intercept(s, SVM_EXIT_TR_WRITE); - gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0); + gen_ld_modrm(env, s, modrm, MO_16); tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); gen_helper_ltr(tcg_env, s->tmp2_i32); } @@ -5818,14 +3604,14 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) case 5: /* verw */ if (!PE(s) || VM86(s)) goto illegal_op; - gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0); + gen_ld_modrm(env, s, modrm, MO_16); gen_update_cc_op(s); if (op == 4) { gen_helper_verr(tcg_env, s->T0); } else { gen_helper_verw(tcg_env, s->T0); } - set_cc_op(s, CC_OP_EFLAGS); + assume_cc_op(s, CC_OP_EFLAGS); break; default: goto unknown_op; @@ -5859,8 +3645,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) } gen_update_cc_op(s); gen_update_eip_cur(s); - tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]); - gen_add_A0_ds_seg(s); + gen_lea_v_seg(s, cpu_regs[R_EAX], R_DS, s->override); gen_helper_monitor(tcg_env, s->A0); break; @@ -6082,10 +3867,11 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) */ mod = (modrm >> 6) & 3; ot = (mod != 3 ? MO_16 : s->dflag); - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1); + gen_st_modrm(env, s, modrm, ot); break; case 0xee: /* rdpkru */ - if (prefixes & PREFIX_LOCK) { + if (s->prefix & (PREFIX_LOCK | PREFIX_DATA + | PREFIX_REPZ | PREFIX_REPNZ)) { goto illegal_op; } tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]); @@ -6093,7 +3879,8 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64); break; case 0xef: /* wrpkru */ - if (prefixes & PREFIX_LOCK) { + if (s->prefix & (PREFIX_LOCK | PREFIX_DATA + | PREFIX_REPZ | PREFIX_REPNZ)) { goto illegal_op; } tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX], @@ -6107,7 +3894,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) break; } gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0); - gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0); + gen_ld_modrm(env, s, modrm, MO_16); /* * Only the 4 lower bits of CR0 are modified. * PE cannot be set to zero if already set to one. @@ -6169,72 +3956,6 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) /* nothing to do */ } break; - case 0x63: /* arpl or movslS (x86_64) */ -#ifdef TARGET_X86_64 - if (CODE64(s)) { - int d_ot; - /* d_ot is the size of destination */ - d_ot = dflag; - - modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | REX_R(s); - mod = (modrm >> 6) & 3; - rm = (modrm & 7) | REX_B(s); - - if (mod == 3) { - gen_op_mov_v_reg(s, MO_32, s->T0, rm); - /* sign extend */ - if (d_ot == MO_64) { - tcg_gen_ext32s_tl(s->T0, s->T0); - } - gen_op_mov_reg_v(s, d_ot, reg, s->T0); - } else { - gen_lea_modrm(env, s, modrm); - gen_op_ld_v(s, MO_32 | MO_SIGN, s->T0, s->A0); - gen_op_mov_reg_v(s, d_ot, reg, s->T0); - } - } else -#endif - { - TCGLabel *label1; - TCGv t0, t1, t2; - - if (!PE(s) || VM86(s)) - goto illegal_op; - t0 = tcg_temp_new(); - t1 = tcg_temp_new(); - t2 = tcg_temp_new(); - ot = MO_16; - modrm = x86_ldub_code(env, s); - reg = (modrm >> 3) & 7; - mod = (modrm >> 6) & 3; - rm = modrm & 7; - if (mod != 3) { - gen_lea_modrm(env, s, modrm); - gen_op_ld_v(s, ot, t0, s->A0); - } else { - gen_op_mov_v_reg(s, ot, t0, rm); - } - gen_op_mov_v_reg(s, ot, t1, reg); - tcg_gen_andi_tl(s->tmp0, t0, 3); - tcg_gen_andi_tl(t1, t1, 3); - tcg_gen_movi_tl(t2, 0); - label1 = gen_new_label(); - tcg_gen_brcond_tl(TCG_COND_GE, s->tmp0, t1, label1); - tcg_gen_andi_tl(t0, t0, ~3); - tcg_gen_or_tl(t0, t0, t1); - tcg_gen_movi_tl(t2, CC_Z); - gen_set_label(label1); - if (mod != 3) { - gen_op_st_v(s, ot, t0, s->A0); - } else { - gen_op_mov_reg_v(s, ot, rm, t0); - } - gen_compute_eflags(s); - tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z); - tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2); - } - break; case 0x102: /* lar */ case 0x103: /* lsl */ { @@ -6245,7 +3966,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) ot = dflag != MO_16 ? MO_32 : MO_16; modrm = x86_ldub_code(env, s); reg = ((modrm >> 3) & 7) | REX_R(s); - gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0); + gen_ld_modrm(env, s, modrm, MO_16); t0 = tcg_temp_new(); gen_update_cc_op(s); if (b == 0x102) { @@ -6261,25 +3982,6 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) set_cc_op(s, CC_OP_EFLAGS); } break; - case 0x118: - modrm = x86_ldub_code(env, s); - mod = (modrm >> 6) & 3; - op = (modrm >> 3) & 7; - switch(op) { - case 0: /* prefetchnta */ - case 1: /* prefetchnt0 */ - case 2: /* prefetchnt0 */ - case 3: /* prefetchnt0 */ - if (mod == 3) - goto illegal_op; - gen_nop_modrm(env, s, modrm); - /* nothing more to do */ - break; - default: /* nop (multi byte) */ - gen_nop_modrm(env, s, modrm); - break; - } - break; case 0x11a: modrm = x86_ldub_code(env, s); if (s->flags & HF_MPX_EN_MASK) { @@ -6349,7 +4051,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) } else { tcg_gen_movi_tl(s->A0, 0); } - gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override); + gen_lea_v_seg(s, s->A0, a.def_seg, s->override); if (a.index >= 0) { tcg_gen_mov_tl(s->T0, cpu_regs[a.index]); } else { @@ -6454,7 +4156,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) } else { tcg_gen_movi_tl(s->A0, 0); } - gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override); + gen_lea_v_seg(s, s->A0, a.def_seg, s->override); if (a.index >= 0) { tcg_gen_mov_tl(s->T0, cpu_regs[a.index]); } else { @@ -6471,10 +4173,6 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) } gen_nop_modrm(env, s, modrm); break; - case 0x119: case 0x11c ... 0x11f: /* nop (multi byte) */ - modrm = x86_ldub_code(env, s); - gen_nop_modrm(env, s, modrm); - break; case 0x120: /* mov reg, crN */ case 0x122: /* mov crN, reg */ @@ -6561,18 +4259,6 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) } break; /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */ - case 0x1c3: /* MOVNTI reg, mem */ - if (!(s->cpuid_features & CPUID_SSE2)) - goto illegal_op; - ot = mo_64_32(dflag); - modrm = x86_ldub_code(env, s); - mod = (modrm >> 6) & 3; - if (mod == 3) - goto illegal_op; - reg = ((modrm >> 3) & 7) | REX_R(s); - /* generate a generic store */ - gen_ldst_modrm(env, s, modrm, ot, reg, 1); - break; case 0x1ae: modrm = x86_ldub_code(env, s); switch (modrm) { @@ -6733,17 +4419,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) } goto unknown_op; - case 0xf8: /* sfence / pcommit */ - if (prefixes & PREFIX_DATA) { - /* pcommit */ - if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_PCOMMIT) - || (prefixes & PREFIX_LOCK)) { - goto illegal_op; - } - break; - } - /* fallthru */ - case 0xf9 ... 0xff: /* sfence */ + case 0xf8 ... 0xff: /* sfence */ if (!(s->cpuid_features & CPUID_SSE) || (prefixes & PREFIX_LOCK)) { goto illegal_op; @@ -6770,13 +4446,6 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) } break; - case 0x10d: /* 3DNow! prefetch(w) */ - modrm = x86_ldub_code(env, s); - mod = (modrm >> 6) & 3; - if (mod == 3) - goto illegal_op; - gen_nop_modrm(env, s, modrm); - break; case 0x1aa: /* rsm */ gen_svm_check_intercept(s, SVM_EXIT_RSM); if (!(s->flags & HF_SMM_MASK)) @@ -6785,9 +4454,8 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) /* we should not be in SMM mode */ g_assert_not_reached(); #else - gen_update_cc_op(s); - gen_update_eip_next(s); gen_helper_rsm(tcg_env); + assume_cc_op(s, CC_OP_EFLAGS); #endif /* CONFIG_USER_ONLY */ s->base.is_jmp = DISAS_EOB_ONLY; break; @@ -6801,13 +4469,8 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) modrm = x86_ldub_code(env, s); reg = ((modrm >> 3) & 7) | REX_R(s); - if (s->prefix & PREFIX_DATA) { - ot = MO_16; - } else { - ot = mo_64_32(dflag); - } - - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + ot = dflag; + gen_ld_modrm(env, s, modrm, ot); gen_extu(ot, s->T0); tcg_gen_mov_tl(cpu_cc_src, s->T0); tcg_gen_ctpop_tl(s->T0, s->T0); @@ -6815,28 +4478,21 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) set_cc_op(s, CC_OP_POPCNT); break; - case 0x10e ... 0x117: - case 0x128 ... 0x12f: - case 0x138 ... 0x13a: - case 0x150 ... 0x179: - case 0x17c ... 0x17f: - case 0x1c2: - case 0x1c4 ... 0x1c6: - case 0x1d0 ... 0x1fe: - disas_insn_new(s, cpu, b); - break; default: - goto unknown_op; + g_assert_not_reached(); } - return true; + return; illegal_op: gen_illegal_opcode(s); - return true; + return; unknown_op: gen_unknown_opcode(env, s); - return true; } +#include "decode-new.h" +#include "emit.c.inc" +#include "decode-new.c.inc" + void tcg_x86_init(void) { static const char reg_names[CPU_NB_REGS][4] = { @@ -6958,7 +4614,6 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu) dc->cc_op = CC_OP_DYNAMIC; dc->cc_op_dirty = false; - dc->popl_esp_hack = 0; /* select memory access functions */ dc->mem_index = cpu_mmu_index(cpu, false); dc->cpuid_features = env->features[FEAT_1_EDX]; @@ -6970,7 +4625,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu) dc->cpuid_7_1_eax_features = env->features[FEAT_7_1_EAX]; dc->cpuid_xsave_features = env->features[FEAT_XSAVE]; dc->jmp_opt = !((cflags & CF_NO_GOTO_TB) || - (flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK))); + (flags & (HF_RF_MASK | HF_TF_MASK | HF_INHIBIT_IRQ_MASK))); /* * If jmp_opt, we want to handle each string instruction individually. * For icount also disable repz optimization so that each iteration @@ -7010,6 +4665,9 @@ static void i386_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) { DisasContext *dc = container_of(dcbase, DisasContext, base); + bool orig_cc_op_dirty = dc->cc_op_dirty; + CCOp orig_cc_op = dc->cc_op; + target_ulong orig_pc_save = dc->pc_save; #ifdef TARGET_VSYSCALL_PAGE /* @@ -7022,23 +4680,51 @@ static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) } #endif - if (disas_insn(dc, cpu)) { - target_ulong pc_next = dc->pc; - dc->base.pc_next = pc_next; + switch (sigsetjmp(dc->jmpbuf, 0)) { + case 0: + disas_insn(dc, cpu); + break; + case 1: + gen_exception_gpf(dc); + break; + case 2: + /* Restore state that may affect the next instruction. */ + dc->pc = dc->base.pc_next; + /* + * TODO: These save/restore can be removed after the table-based + * decoder is complete; we will be decoding the insn completely + * before any code generation that might affect these variables. + */ + dc->cc_op_dirty = orig_cc_op_dirty; + dc->cc_op = orig_cc_op; + dc->pc_save = orig_pc_save; + /* END TODO */ + dc->base.num_insns--; + tcg_remove_ops_after(dc->prev_insn_end); + dc->base.insn_start = dc->prev_insn_start; + dc->base.is_jmp = DISAS_TOO_MANY; + return; + default: + g_assert_not_reached(); + } - if (dc->base.is_jmp == DISAS_NEXT) { - if (dc->flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)) { - /* - * If single step mode, we generate only one instruction and - * generate an exception. - * If irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear - * the flag and abort the translation to give the irqs a - * chance to happen. - */ - dc->base.is_jmp = DISAS_EOB_NEXT; - } else if (!is_same_page(&dc->base, pc_next)) { - dc->base.is_jmp = DISAS_TOO_MANY; - } + /* + * Instruction decoding completed (possibly with #GP if the + * 15-byte boundary was exceeded). + */ + dc->base.pc_next = dc->pc; + if (dc->base.is_jmp == DISAS_NEXT) { + if (dc->flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)) { + /* + * If single step mode, we generate only one instruction and + * generate an exception. + * If irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear + * the flag and abort the translation to give the irqs a + * chance to happen. + */ + dc->base.is_jmp = DISAS_EOB_NEXT; + } else if (!is_same_page(&dc->base, dc->base.pc_next)) { + dc->base.is_jmp = DISAS_TOO_MANY; } } } @@ -7055,41 +4741,26 @@ static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) gen_jmp_rel_csize(dc, 0, 0); break; case DISAS_EOB_NEXT: - gen_update_cc_op(dc); + case DISAS_EOB_INHIBIT_IRQ: + assert(dc->base.pc_next == dc->pc); gen_update_eip_cur(dc); /* fall through */ case DISAS_EOB_ONLY: - gen_eob(dc); - break; - case DISAS_EOB_INHIBIT_IRQ: - gen_update_cc_op(dc); - gen_update_eip_cur(dc); - gen_eob_inhibit_irq(dc, true); - break; + case DISAS_EOB_RECHECK_TF: case DISAS_JUMP: - gen_jr(dc); + gen_eob(dc, dc->base.is_jmp); break; default: g_assert_not_reached(); } } -static void i386_tr_disas_log(const DisasContextBase *dcbase, - CPUState *cpu, FILE *logfile) -{ - DisasContext *dc = container_of(dcbase, DisasContext, base); - - fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first)); - target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size); -} - static const TranslatorOps i386_tr_ops = { .init_disas_context = i386_tr_init_disas_context, .tb_start = i386_tr_tb_start, .insn_start = i386_tr_insn_start, .translate_insn = i386_tr_translate_insn, .tb_stop = i386_tr_tb_stop, - .disas_log = i386_tr_disas_log, }; /* generate intermediate code for basic block 'tb'. */ diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c index 31eec7048c..a6674a826d 100644 --- a/target/i386/whpx/whpx-all.c +++ b/target/i386/whpx/whpx-all.c @@ -237,6 +237,7 @@ struct AccelCPUState { uint64_t tpr; uint64_t apic_base; bool interruption_pending; + bool dirty; /* Must be the last field as it may have a tail */ WHV_RUN_VP_EXIT_CONTEXT exit_ctx; @@ -839,7 +840,7 @@ static HRESULT CALLBACK whpx_emu_setreg_callback( * The emulator just successfully wrote the register state. We clear the * dirty state so we avoid the double write on resume of the VP. */ - cpu->vcpu_dirty = false; + cpu->accel->dirty = false; return hr; } @@ -1394,7 +1395,7 @@ static int whpx_last_vcpu_stopping(CPUState *cpu) /* Returns the address of the next instruction that is about to be executed. */ static vaddr whpx_vcpu_get_pc(CPUState *cpu, bool exit_context_valid) { - if (cpu->vcpu_dirty) { + if (cpu->accel->dirty) { /* The CPU registers have been modified by other parts of QEMU. */ return cpu_env(cpu)->eip; } else if (exit_context_valid) { @@ -1713,9 +1714,9 @@ static int whpx_vcpu_run(CPUState *cpu) } do { - if (cpu->vcpu_dirty) { + if (cpu->accel->dirty) { whpx_set_registers(cpu, WHPX_SET_RUNTIME_STATE); - cpu->vcpu_dirty = false; + cpu->accel->dirty = false; } if (exclusive_step_mode == WHPX_STEP_NONE) { @@ -2063,9 +2064,9 @@ static int whpx_vcpu_run(CPUState *cpu) static void do_whpx_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) { - if (!cpu->vcpu_dirty) { + if (!cpu->accel->dirty) { whpx_get_registers(cpu); - cpu->vcpu_dirty = true; + cpu->accel->dirty = true; } } @@ -2073,20 +2074,20 @@ static void do_whpx_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg) { whpx_set_registers(cpu, WHPX_SET_RESET_STATE); - cpu->vcpu_dirty = false; + cpu->accel->dirty = false; } static void do_whpx_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) { whpx_set_registers(cpu, WHPX_SET_FULL_STATE); - cpu->vcpu_dirty = false; + cpu->accel->dirty = false; } static void do_whpx_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg) { - cpu->vcpu_dirty = true; + cpu->accel->dirty = true; } /* @@ -2095,7 +2096,7 @@ static void do_whpx_cpu_synchronize_pre_loadvm(CPUState *cpu, void whpx_cpu_synchronize_state(CPUState *cpu) { - if (!cpu->vcpu_dirty) { + if (!cpu->accel->dirty) { run_on_cpu(cpu, do_whpx_cpu_synchronize_state, RUN_ON_CPU_NULL); } } @@ -2235,7 +2236,7 @@ int whpx_init_vcpu(CPUState *cpu) } vcpu->interruptable = true; - cpu->vcpu_dirty = true; + vcpu->dirty = true; cpu->accel = vcpu; max_vcpu_index = max(max_vcpu_index, cpu->cpu_index); qemu_add_vm_change_state_handler(whpx_cpu_update_state, env); diff --git a/target/loongarch/cpu-param.h b/target/loongarch/cpu-param.h index cfe195db4e..db5ad1c69f 100644 --- a/target/loongarch/cpu-param.h +++ b/target/loongarch/cpu-param.h @@ -14,4 +14,6 @@ #define TARGET_PAGE_BITS 12 +#define TCG_GUEST_DEFAULT_MO (0) + #endif diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c index bac84dca7a..b5c1ec94af 100644 --- a/target/loongarch/cpu.c +++ b/target/loongarch/cpu.c @@ -92,7 +92,7 @@ void G_NORETURN do_raise_exception(CPULoongArchState *env, { CPUState *cs = env_cpu(env); - qemu_log_mask(CPU_LOG_INT, "%s: expection: %d (%s)\n", + qemu_log_mask(CPU_LOG_INT, "%s: exception: %d (%s)\n", __func__, exception, loongarch_exception_name(exception)); @@ -336,7 +336,7 @@ static bool loongarch_cpu_exec_interrupt(CPUState *cs, int interrupt_request) static void loongarch_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb) { - tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL)); + tcg_debug_assert(!tcg_cflags_has(cs, CF_PCREL)); set_pc(cpu_env(cs), tb->pc); } @@ -505,7 +505,9 @@ static void loongarch_cpu_reset_hold(Object *obj, ResetType type) lacc->parent_phases.hold(obj, type); } +#ifdef CONFIG_TCG env->fcsr0_mask = FCSR0_M1 | FCSR0_M2 | FCSR0_M3; +#endif env->fcsr0 = 0x0; int n; @@ -550,7 +552,9 @@ static void loongarch_cpu_reset_hold(Object *obj, ResetType type) #ifndef CONFIG_USER_ONLY env->pc = 0x1c000000; +#ifdef CONFIG_TCG memset(env->tlb, 0, sizeof(env->tlb)); +#endif if (kvm_enabled()) { kvm_arch_reset_vcpu(env); } @@ -641,16 +645,10 @@ static void loongarch_set_lasx(Object *obj, bool value, Error **errp) void loongarch_cpu_post_init(Object *obj) { - LoongArchCPU *cpu = LOONGARCH_CPU(obj); - - if (FIELD_EX32(cpu->env.cpucfg[2], CPUCFG2, LSX)) { - object_property_add_bool(obj, "lsx", loongarch_get_lsx, - loongarch_set_lsx); - } - if (FIELD_EX32(cpu->env.cpucfg[2], CPUCFG2, LASX)) { - object_property_add_bool(obj, "lasx", loongarch_get_lasx, - loongarch_set_lasx); - } + object_property_add_bool(obj, "lsx", loongarch_get_lsx, + loongarch_set_lsx); + object_property_add_bool(obj, "lasx", loongarch_get_lasx, + loongarch_set_lasx); } static void loongarch_cpu_init(Object *obj) @@ -686,8 +684,7 @@ void loongarch_cpu_dump_state(CPUState *cs, FILE *f, int flags) int i; qemu_fprintf(f, " PC=%016" PRIx64 " ", env->pc); - qemu_fprintf(f, " FCSR0 0x%08x fp_status 0x%02x\n", env->fcsr0, - get_float_exception_flags(&env->fp_status)); + qemu_fprintf(f, " FCSR0 0x%08x\n", env->fcsr0); /* gpr */ for (i = 0; i < 32; i++) { diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h index ec37579fd6..41b8e6d96d 100644 --- a/target/loongarch/cpu.h +++ b/target/loongarch/cpu.h @@ -39,8 +39,6 @@ #define IOCSR_MEM_SIZE 0x428 -#define TCG_GUEST_DEFAULT_MO (0) - #define FCSR0_M1 0x1f /* FCSR1 mask, Enables */ #define FCSR0_M2 0x1f1f0000 /* FCSR2 mask, Cause and Flags */ #define FCSR0_M3 0x300 /* FCSR3 mask, Round Mode */ @@ -272,6 +270,7 @@ union fpr_t { VReg vreg; }; +#ifdef CONFIG_TCG struct LoongArchTLB { uint64_t tlb_misc; /* Fields corresponding to CSR_TLBELO0/1 */ @@ -279,23 +278,18 @@ struct LoongArchTLB { uint64_t tlb_entry1; }; typedef struct LoongArchTLB LoongArchTLB; +#endif typedef struct CPUArchState { uint64_t gpr[32]; uint64_t pc; fpr_t fpr[32]; - float_status fp_status; bool cf[8]; - uint32_t fcsr0; - uint32_t fcsr0_mask; uint32_t cpucfg[21]; - uint64_t lladdr; /* LL virtual address compared against SC */ - uint64_t llval; - /* LoongArch CSRs */ uint64_t CSR_CRMD; uint64_t CSR_PRMD; @@ -352,8 +346,16 @@ typedef struct CPUArchState { uint64_t CSR_DERA; uint64_t CSR_DSAVE; +#ifdef CONFIG_TCG + float_status fp_status; + uint32_t fcsr0_mask; + uint64_t lladdr; /* LL virtual address compared against SC */ + uint64_t llval; +#endif #ifndef CONFIG_USER_ONLY +#ifdef CONFIG_TCG LoongArchTLB tlb[LOONGARCH_TLB_MAX]; +#endif AddressSpace *address_space_iocsr; bool load_elf; @@ -361,6 +363,8 @@ typedef struct CPUArchState { uint32_t mp_state; /* Store ipistate to access from this struct */ DeviceState *ipistate; + + struct loongarch_boot_info *boot_info; #endif } CPULoongArchState; diff --git a/target/loongarch/cpu_helper.c b/target/loongarch/cpu_helper.c index 960eec9567..580362ac3e 100644 --- a/target/loongarch/cpu_helper.c +++ b/target/loongarch/cpu_helper.c @@ -11,6 +11,7 @@ #include "internals.h" #include "cpu-csr.h" +#ifdef CONFIG_TCG static int loongarch_map_tlb_entry(CPULoongArchState *env, hwaddr *physical, int *prot, target_ulong address, int access_type, int index, int mmu_idx) @@ -154,6 +155,14 @@ static int loongarch_map_address(CPULoongArchState *env, hwaddr *physical, return TLBRET_NOMATCH; } +#else +static int loongarch_map_address(CPULoongArchState *env, hwaddr *physical, + int *prot, target_ulong address, + MMUAccessType access_type, int mmu_idx) +{ + return TLBRET_NOMATCH; +} +#endif static hwaddr dmw_va2pa(CPULoongArchState *env, target_ulong va, target_ulong dmw) diff --git a/target/loongarch/kvm/kvm.c b/target/loongarch/kvm/kvm.c index 8224d94333..8e6e27c8bf 100644 --- a/target/loongarch/kvm/kvm.c +++ b/target/loongarch/kvm/kvm.c @@ -436,6 +436,9 @@ static int kvm_loongarch_get_regs_fp(CPUState *cs) env->fcsr0 = fpu.fcsr; for (i = 0; i < 32; i++) { env->fpr[i].vreg.UD[0] = fpu.fpr[i].val64[0]; + env->fpr[i].vreg.UD[1] = fpu.fpr[i].val64[1]; + env->fpr[i].vreg.UD[2] = fpu.fpr[i].val64[2]; + env->fpr[i].vreg.UD[3] = fpu.fpr[i].val64[3]; } for (i = 0; i < 8; i++) { env->cf[i] = fpu.fcc & 0xFF; @@ -455,6 +458,9 @@ static int kvm_loongarch_put_regs_fp(CPUState *cs) fpu.fcc = 0; for (i = 0; i < 32; i++) { fpu.fpr[i].val64[0] = env->fpr[i].vreg.UD[0]; + fpu.fpr[i].val64[1] = env->fpr[i].vreg.UD[1]; + fpu.fpr[i].val64[2] = env->fpr[i].vreg.UD[2]; + fpu.fpr[i].val64[3] = env->fpr[i].vreg.UD[3]; } for (i = 0; i < 8; i++) { @@ -587,22 +593,22 @@ int kvm_arch_get_registers(CPUState *cs) return ret; } - ret = kvm_loongarch_get_csr(cs); + ret = kvm_loongarch_get_cpucfg(cs); if (ret) { return ret; } - ret = kvm_loongarch_get_regs_fp(cs); + ret = kvm_loongarch_get_csr(cs); if (ret) { return ret; } - ret = kvm_loongarch_get_mpstate(cs); + ret = kvm_loongarch_get_regs_fp(cs); if (ret) { return ret; } - ret = kvm_loongarch_get_cpucfg(cs); + ret = kvm_loongarch_get_mpstate(cs); return ret; } @@ -615,22 +621,22 @@ int kvm_arch_put_registers(CPUState *cs, int level) return ret; } - ret = kvm_loongarch_put_csr(cs, level); + ret = kvm_loongarch_put_cpucfg(cs); if (ret) { return ret; } - ret = kvm_loongarch_put_regs_fp(cs); + ret = kvm_loongarch_put_csr(cs, level); if (ret) { return ret; } - ret = kvm_loongarch_put_mpstate(cs); + ret = kvm_loongarch_put_regs_fp(cs); if (ret) { return ret; } - ret = kvm_loongarch_put_cpucfg(cs); + ret = kvm_loongarch_put_mpstate(cs); return ret; } diff --git a/target/loongarch/machine.c b/target/loongarch/machine.c index c7029fb9b4..08a7fa5370 100644 --- a/target/loongarch/machine.c +++ b/target/loongarch/machine.c @@ -8,6 +8,7 @@ #include "qemu/osdep.h" #include "cpu.h" #include "migration/cpu.h" +#include "sysemu/tcg.h" #include "vec.h" static const VMStateDescription vmstate_fpu_reg = { @@ -109,9 +110,15 @@ static const VMStateDescription vmstate_lasx = { }, }; +#if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY) +static bool tlb_needed(void *opaque) +{ + return tcg_enabled(); +} + /* TLB state */ -const VMStateDescription vmstate_tlb = { - .name = "cpu/tlb", +static const VMStateDescription vmstate_tlb_entry = { + .name = "cpu/tlb_entry", .version_id = 0, .minimum_version_id = 0, .fields = (const VMStateField[]) { @@ -122,11 +129,24 @@ const VMStateDescription vmstate_tlb = { } }; +static const VMStateDescription vmstate_tlb = { + .name = "cpu/tlb", + .version_id = 0, + .minimum_version_id = 0, + .needed = tlb_needed, + .fields = (const VMStateField[]) { + VMSTATE_STRUCT_ARRAY(env.tlb, LoongArchCPU, LOONGARCH_TLB_MAX, + 0, vmstate_tlb_entry, LoongArchTLB), + VMSTATE_END_OF_LIST() + } +}; +#endif + /* LoongArch CPU state */ const VMStateDescription vmstate_loongarch_cpu = { .name = "cpu", - .version_id = 1, - .minimum_version_id = 1, + .version_id = 2, + .minimum_version_id = 2, .fields = (const VMStateField[]) { VMSTATE_UINTTL_ARRAY(env.gpr, LoongArchCPU, 32), VMSTATE_UINTTL(env.pc, LoongArchCPU), @@ -187,9 +207,8 @@ const VMStateDescription vmstate_loongarch_cpu = { VMSTATE_UINT64(env.CSR_DBG, LoongArchCPU), VMSTATE_UINT64(env.CSR_DERA, LoongArchCPU), VMSTATE_UINT64(env.CSR_DSAVE, LoongArchCPU), - /* TLB */ - VMSTATE_STRUCT_ARRAY(env.tlb, LoongArchCPU, LOONGARCH_TLB_MAX, - 0, vmstate_tlb, LoongArchTLB), + + VMSTATE_UINT64(kvm_state_counter, LoongArchCPU), VMSTATE_END_OF_LIST() }, @@ -197,6 +216,9 @@ const VMStateDescription vmstate_loongarch_cpu = { &vmstate_fpu, &vmstate_lsx, &vmstate_lasx, +#if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY) + &vmstate_tlb, +#endif NULL } }; diff --git a/target/loongarch/tcg/tlb_helper.c b/target/loongarch/tcg/tlb_helper.c index 57f5308632..d6331f9b0b 100644 --- a/target/loongarch/tcg/tlb_helper.c +++ b/target/loongarch/tcg/tlb_helper.c @@ -13,6 +13,7 @@ #include "internals.h" #include "exec/helper-proto.h" #include "exec/exec-all.h" +#include "exec/page-protection.h" #include "exec/cpu_ldst.h" #include "exec/log.h" #include "cpu-csr.h" diff --git a/target/loongarch/tcg/translate.c b/target/loongarch/tcg/translate.c index 7567712655..1fca4afc73 100644 --- a/target/loongarch/tcg/translate.c +++ b/target/loongarch/tcg/translate.c @@ -325,20 +325,12 @@ static void loongarch_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs) } } -static void loongarch_tr_disas_log(const DisasContextBase *dcbase, - CPUState *cpu, FILE *logfile) -{ - qemu_log("IN: %s\n", lookup_symbol(dcbase->pc_first)); - target_disas(logfile, cpu, dcbase->pc_first, dcbase->tb->size); -} - static const TranslatorOps loongarch_tr_ops = { .init_disas_context = loongarch_tr_init_disas_context, .tb_start = loongarch_tr_tb_start, .insn_start = loongarch_tr_insn_start, .translate_insn = loongarch_tr_translate_insn, .tb_stop = loongarch_tr_tb_stop, - .disas_log = loongarch_tr_disas_log, }; void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns, diff --git a/target/m68k/helper.c b/target/m68k/helper.c index 7a91f33b17..7967ad13cb 100644 --- a/target/m68k/helper.c +++ b/target/m68k/helper.c @@ -21,6 +21,7 @@ #include "qemu/osdep.h" #include "cpu.h" #include "exec/exec-all.h" +#include "exec/page-protection.h" #include "exec/gdbstub.h" #include "exec/helper-proto.h" #include "gdbstub/helpers.h" diff --git a/target/m68k/translate.c b/target/m68k/translate.c index 169927552a..445966fb6a 100644 --- a/target/m68k/translate.c +++ b/target/m68k/translate.c @@ -20,7 +20,6 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "disas/disas.h" #include "exec/exec-all.h" #include "tcg/tcg-op.h" #include "qemu/log.h" @@ -6105,20 +6104,12 @@ static void m68k_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) } } -static void m68k_tr_disas_log(const DisasContextBase *dcbase, - CPUState *cpu, FILE *logfile) -{ - fprintf(logfile, "IN: %s\n", lookup_symbol(dcbase->pc_first)); - target_disas(logfile, cpu, dcbase->pc_first, dcbase->tb->size); -} - static const TranslatorOps m68k_tr_ops = { .init_disas_context = m68k_tr_init_disas_context, .tb_start = m68k_tr_tb_start, .insn_start = m68k_tr_insn_start, .translate_insn = m68k_tr_translate_insn, .tb_stop = m68k_tr_tb_stop, - .disas_log = m68k_tr_disas_log, }; void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns, diff --git a/target/meson.build b/target/meson.build index 59b46b2ef4..1c2e6f2b19 100644 --- a/target/meson.build +++ b/target/meson.build @@ -18,5 +18,3 @@ subdir('sh4') subdir('sparc') subdir('tricore') subdir('xtensa') - -specific_ss.add(files('target-common.c')) diff --git a/target/microblaze/Kconfig b/target/microblaze/Kconfig index a5410d9218..e91d58d88f 100644 --- a/target/microblaze/Kconfig +++ b/target/microblaze/Kconfig @@ -1,2 +1,3 @@ config MICROBLAZE bool + select DEVICE_TREE # needed by boot.c diff --git a/target/microblaze/cpu-param.h b/target/microblaze/cpu-param.h index 9770b0eb52..e530fead1c 100644 --- a/target/microblaze/cpu-param.h +++ b/target/microblaze/cpu-param.h @@ -29,4 +29,7 @@ /* FIXME: MB uses variable pages down to 1K but linux only uses 4k. */ #define TARGET_PAGE_BITS 12 +/* MicroBlaze is always in-order. */ +#define TCG_GUEST_DEFAULT_MO TCG_MO_ALL + #endif diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c index f8dc3173fc..41ad47d04c 100644 --- a/target/microblaze/cpu.c +++ b/target/microblaze/cpu.c @@ -28,6 +28,7 @@ #include "qemu/module.h" #include "hw/qdev-properties.h" #include "exec/exec-all.h" +#include "exec/cpu_ldst.h" #include "exec/gdbstub.h" #include "fpu/softfloat-helpers.h" #include "tcg/tcg.h" @@ -98,7 +99,7 @@ static void mb_cpu_synchronize_from_tb(CPUState *cs, { MicroBlazeCPU *cpu = MICROBLAZE_CPU(cs); - tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL)); + tcg_debug_assert(!tcg_cflags_has(cs, CF_PCREL)); cpu->env.pc = tb->pc; cpu->env.iflags = tb->flags & IFLAGS_TB_MASK; } diff --git a/target/microblaze/cpu.h b/target/microblaze/cpu.h index c0c7574dbd..3e5a3e5c60 100644 --- a/target/microblaze/cpu.h +++ b/target/microblaze/cpu.h @@ -24,9 +24,6 @@ #include "exec/cpu-defs.h" #include "qemu/cpu-float.h" -/* MicroBlaze is always in-order. */ -#define TCG_GUEST_DEFAULT_MO TCG_MO_ALL - typedef struct CPUArchState CPUMBState; #if !defined(CONFIG_USER_ONLY) #include "mmu.h" diff --git a/target/microblaze/helper.c b/target/microblaze/helper.c index d25c9eb4d3..5d3259ce31 100644 --- a/target/microblaze/helper.c +++ b/target/microblaze/helper.c @@ -21,6 +21,7 @@ #include "qemu/osdep.h" #include "cpu.h" #include "exec/exec-all.h" +#include "exec/page-protection.h" #include "qemu/host-utils.h" #include "exec/log.h" @@ -51,7 +52,7 @@ bool mb_cpu_tlb_fill(CPUState *cs, vaddr address, int size, if (mmu_idx == MMU_NOMMU_IDX) { /* MMU disabled or not available. */ address &= TARGET_PAGE_MASK; - prot = PAGE_BITS; + prot = PAGE_RWX; tlb_set_page_with_attrs(cs, address, address, attrs, prot, mmu_idx, TARGET_PAGE_SIZE); return true; diff --git a/target/microblaze/mmu.c b/target/microblaze/mmu.c index 234006634e..2423ac6172 100644 --- a/target/microblaze/mmu.c +++ b/target/microblaze/mmu.c @@ -22,6 +22,7 @@ #include "qemu/log.h" #include "cpu.h" #include "exec/exec-all.h" +#include "exec/page-protection.h" static unsigned int tlb_decode_size(unsigned int f) { diff --git a/target/microblaze/translate.c b/target/microblaze/translate.c index fc451befae..4beaf69e76 100644 --- a/target/microblaze/translate.c +++ b/target/microblaze/translate.c @@ -20,8 +20,8 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "disas/disas.h" #include "exec/exec-all.h" +#include "exec/cpu_ldst.h" #include "tcg/tcg-op.h" #include "exec/helper-proto.h" #include "exec/helper-gen.h" @@ -1636,7 +1636,7 @@ static void mb_tr_translate_insn(DisasContextBase *dcb, CPUState *cs) dc->tb_flags_to_set = 0; - ir = cpu_ldl_code(cpu_env(cs), dc->base.pc_next); + ir = translator_ldl(cpu_env(cs), &dc->base, dc->base.pc_next); if (!decode(dc, ir)) { trap_illegal(dc, true); } @@ -1770,20 +1770,12 @@ static void mb_tr_tb_stop(DisasContextBase *dcb, CPUState *cs) } } -static void mb_tr_disas_log(const DisasContextBase *dcb, - CPUState *cs, FILE *logfile) -{ - fprintf(logfile, "IN: %s\n", lookup_symbol(dcb->pc_first)); - target_disas(logfile, cs, dcb->pc_first, dcb->tb->size); -} - static const TranslatorOps mb_tr_ops = { .init_disas_context = mb_tr_init_disas_context, .tb_start = mb_tr_tb_start, .insn_start = mb_tr_insn_start, .translate_insn = mb_tr_translate_insn, .tb_stop = mb_tr_tb_stop, - .disas_log = mb_tr_disas_log, }; void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns, diff --git a/target/mips/cpu-param.h b/target/mips/cpu-param.h index 594c91a156..6f6ac1688f 100644 --- a/target/mips/cpu-param.h +++ b/target/mips/cpu-param.h @@ -30,4 +30,6 @@ #define TARGET_PAGE_BITS_MIN 12 #endif +#define TCG_GUEST_DEFAULT_MO (0) + #endif diff --git a/target/mips/cpu.h b/target/mips/cpu.h index 7329226d39..3e906a175a 100644 --- a/target/mips/cpu.h +++ b/target/mips/cpu.h @@ -10,8 +10,6 @@ #include "hw/clock.h" #include "mips-defs.h" -#define TCG_GUEST_DEFAULT_MO (0) - typedef struct CPUMIPSTLBContext CPUMIPSTLBContext; /* MSA Context */ diff --git a/target/mips/sysemu/physaddr.c b/target/mips/sysemu/physaddr.c index 5c5184e136..505781d84c 100644 --- a/target/mips/sysemu/physaddr.c +++ b/target/mips/sysemu/physaddr.c @@ -19,6 +19,7 @@ #include "qemu/osdep.h" #include "cpu.h" #include "exec/exec-all.h" +#include "exec/page-protection.h" #include "../internal.h" static int is_seg_am_mapped(unsigned int am, bool eu, int mmu_idx) diff --git a/target/mips/tcg/exception.c b/target/mips/tcg/exception.c index 13275d1ded..4886d087b2 100644 --- a/target/mips/tcg/exception.c +++ b/target/mips/tcg/exception.c @@ -81,7 +81,7 @@ void mips_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb) { CPUMIPSState *env = cpu_env(cs); - tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL)); + tcg_debug_assert(!tcg_cflags_has(cs, CF_PCREL)); env->active_tc.PC = tb->pc; env->hflags &= ~MIPS_HFLAG_BMASK; env->hflags |= tb->flags & MIPS_HFLAG_BMASK; diff --git a/target/mips/tcg/sysemu/special_helper.c b/target/mips/tcg/sysemu/special_helper.c index 5baa25348e..9ce5e2ceac 100644 --- a/target/mips/tcg/sysemu/special_helper.c +++ b/target/mips/tcg/sysemu/special_helper.c @@ -93,7 +93,7 @@ bool mips_io_recompile_replay_branch(CPUState *cs, const TranslationBlock *tb) CPUMIPSState *env = cpu_env(cs); if ((env->hflags & MIPS_HFLAG_BMASK) != 0 - && !(cs->tcg_cflags & CF_PCREL) && env->active_tc.PC != tb->pc) { + && !tcg_cflags_has(cs, CF_PCREL) && env->active_tc.PC != tb->pc) { env->active_tc.PC -= (env->hflags & MIPS_HFLAG_B16 ? 2 : 4); env->hflags &= ~MIPS_HFLAG_BMASK; return true; diff --git a/target/mips/tcg/sysemu/tlb_helper.c b/target/mips/tcg/sysemu/tlb_helper.c index 119eae771e..3ba6d369a6 100644 --- a/target/mips/tcg/sysemu/tlb_helper.c +++ b/target/mips/tcg/sysemu/tlb_helper.c @@ -22,6 +22,7 @@ #include "cpu.h" #include "internal.h" #include "exec/exec-all.h" +#include "exec/page-protection.h" #include "exec/cpu_ldst.h" #include "exec/log.h" #include "exec/helper-proto.h" diff --git a/target/mips/tcg/translate.c b/target/mips/tcg/translate.c index 06c108cc9c..333469b268 100644 --- a/target/mips/tcg/translate.c +++ b/target/mips/tcg/translate.c @@ -29,7 +29,6 @@ #include "exec/translation-block.h" #include "semihosting/semihost.h" #include "trace.h" -#include "disas/disas.h" #include "fpu_helper.h" #define HELPER_H "helper.h" @@ -15475,20 +15474,12 @@ static void mips_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs) } } -static void mips_tr_disas_log(const DisasContextBase *dcbase, - CPUState *cs, FILE *logfile) -{ - fprintf(logfile, "IN: %s\n", lookup_symbol(dcbase->pc_first)); - target_disas(logfile, cs, dcbase->pc_first, dcbase->tb->size); -} - static const TranslatorOps mips_tr_ops = { .init_disas_context = mips_tr_init_disas_context, .tb_start = mips_tr_tb_start, .insn_start = mips_tr_insn_start, .translate_insn = mips_tr_translate_insn, .tb_stop = mips_tr_tb_stop, - .disas_log = mips_tr_disas_log, }; void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns, diff --git a/target/openrisc/Kconfig b/target/openrisc/Kconfig index e0da4ac1df..cd66c2e3b6 100644 --- a/target/openrisc/Kconfig +++ b/target/openrisc/Kconfig @@ -1,2 +1,3 @@ config OPENRISC bool + select DEVICE_TREE # needed by boot.c diff --git a/target/openrisc/cpu-param.h b/target/openrisc/cpu-param.h index 3f08207485..fbfc0f568b 100644 --- a/target/openrisc/cpu-param.h +++ b/target/openrisc/cpu-param.h @@ -13,4 +13,6 @@ #define TARGET_PHYS_ADDR_SPACE_BITS 32 #define TARGET_VIRT_ADDR_SPACE_BITS 32 +#define TCG_GUEST_DEFAULT_MO (0) + #endif diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c index d711035cf5..fdaaa09fc8 100644 --- a/target/openrisc/cpu.c +++ b/target/openrisc/cpu.c @@ -45,7 +45,7 @@ static void openrisc_cpu_synchronize_from_tb(CPUState *cs, { OpenRISCCPU *cpu = OPENRISC_CPU(cs); - tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL)); + tcg_debug_assert(!tcg_cflags_has(cs, CF_PCREL)); cpu->env.pc = tb->pc; } diff --git a/target/openrisc/cpu.h b/target/openrisc/cpu.h index b1b7db5cbd..c9fe9ae12d 100644 --- a/target/openrisc/cpu.h +++ b/target/openrisc/cpu.h @@ -24,8 +24,6 @@ #include "exec/cpu-defs.h" #include "fpu/softfloat-types.h" -#define TCG_GUEST_DEFAULT_MO (0) - /** * OpenRISCCPUClass: * @parent_realize: The parent class' realize handler. diff --git a/target/openrisc/mmu.c b/target/openrisc/mmu.c index 603c26715e..c632d5230b 100644 --- a/target/openrisc/mmu.c +++ b/target/openrisc/mmu.c @@ -22,6 +22,7 @@ #include "qemu/log.h" #include "cpu.h" #include "exec/exec-all.h" +#include "exec/page-protection.h" #include "gdbstub/helpers.h" #include "qemu/host-utils.h" #include "hw/loader.h" diff --git a/target/openrisc/translate.c b/target/openrisc/translate.c index 23fff46084..ca566847cb 100644 --- a/target/openrisc/translate.c +++ b/target/openrisc/translate.c @@ -21,7 +21,6 @@ #include "qemu/osdep.h" #include "cpu.h" #include "exec/exec-all.h" -#include "disas/disas.h" #include "tcg/tcg-op.h" #include "qemu/log.h" #include "qemu/bitops.h" @@ -1638,22 +1637,12 @@ static void openrisc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs) } } -static void openrisc_tr_disas_log(const DisasContextBase *dcbase, - CPUState *cs, FILE *logfile) -{ - DisasContext *s = container_of(dcbase, DisasContext, base); - - fprintf(logfile, "IN: %s\n", lookup_symbol(s->base.pc_first)); - target_disas(logfile, cs, s->base.pc_first, s->base.tb->size); -} - static const TranslatorOps openrisc_tr_ops = { .init_disas_context = openrisc_tr_init_disas_context, .tb_start = openrisc_tr_tb_start, .insn_start = openrisc_tr_insn_start, .translate_insn = openrisc_tr_translate_insn, .tb_stop = openrisc_tr_tb_stop, - .disas_log = openrisc_tr_disas_log, }; void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns, diff --git a/target/ppc/Kconfig b/target/ppc/Kconfig index 3ff152051a..0283711673 100644 --- a/target/ppc/Kconfig +++ b/target/ppc/Kconfig @@ -3,3 +3,4 @@ config PPC config PPC64 bool + select PPC diff --git a/target/ppc/cpu-param.h b/target/ppc/cpu-param.h index b7ad52de03..77c5ed9a67 100644 --- a/target/ppc/cpu-param.h +++ b/target/ppc/cpu-param.h @@ -40,4 +40,6 @@ # define TARGET_PAGE_BITS 12 #endif +#define TCG_GUEST_DEFAULT_MO 0 + #endif diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h index 67e6b2effd..2015e603d4 100644 --- a/target/ppc/cpu.h +++ b/target/ppc/cpu.h @@ -29,8 +29,6 @@ #define CPU_RESOLVING_TYPE TYPE_POWERPC_CPU -#define TCG_GUEST_DEFAULT_MO 0 - #define TARGET_PAGE_BITS_64K 16 #define TARGET_PAGE_BITS_16M 24 @@ -535,6 +533,9 @@ FIELD(MSR, LE, MSR_LE, 1) #define MMCR0_FC56 PPC_BIT(59) /* PMC Freeze Counters 5-6 bit */ #define MMCR0_PMC1CE PPC_BIT(48) /* MMCR0 PMC1 Condition Enabled */ #define MMCR0_PMCjCE PPC_BIT(49) /* MMCR0 PMCj Condition Enabled */ +#define MMCR0_FCP PPC_BIT(34) /* Freeze Counters/BHRB if PR=1 */ +#define MMCR0_FCPC PPC_BIT(51) /* Condition for FCP bit */ +#define MMCR0_BHRBA_NR PPC_BIT_NR(42) /* BHRB Available */ /* MMCR0 userspace r/w mask */ #define MMCR0_UREG_MASK (MMCR0_FC | MMCR0_PMAO | MMCR0_PMAE) /* MMCR2 userspace r/w mask */ @@ -547,6 +548,10 @@ FIELD(MSR, LE, MSR_LE, 1) #define MMCR2_UREG_MASK (MMCR2_FC1P0 | MMCR2_FC2P0 | MMCR2_FC3P0 | \ MMCR2_FC4P0 | MMCR2_FC5P0 | MMCR2_FC6P0) +#define MMCRA_BHRBRD PPC_BIT(26) /* BHRB Recording Disable */ +#define MMCRA_IFM_MASK PPC_BITMASK(32, 33) /* BHRB Instruction Filtering */ +#define MMCRA_IFM_SHIFT PPC_BIT_NR(33) + #define MMCR1_EVT_SIZE 8 /* extract64() does a right shift before extracting */ #define MMCR1_PMC1SEL_START 32 @@ -630,6 +635,7 @@ FIELD(MSR, LE, MSR_LE, 1) /* HFSCR bits */ #define HFSCR_MSGP PPC_BIT(53) /* Privileged Message Send Facilities */ +#define HFSCR_BHRB PPC_BIT(59) /* BHRB Instructions */ #define HFSCR_IC_MSGP 0xA #define DBCR0_ICMP (1 << 27) @@ -772,6 +778,8 @@ enum { POWERPC_FLAG_SMT = 0x00400000, /* Using "LPAR per core" mode (as opposed to per-thread) */ POWERPC_FLAG_SMT_1LPAR = 0x00800000, + /* Has BHRB */ + POWERPC_FLAG_BHRB = 0x01000000, }; /* @@ -799,6 +807,7 @@ enum { HFLAGS_PMCJCE = 17, /* MMCR0 PMCjCE bit */ HFLAGS_PMC_OTHER = 18, /* PMC other than PMC5-6 is enabled */ HFLAGS_INSN_CNT = 19, /* PMU instruction count enabled */ + HFLAGS_BHRB_ENABLE = 20, /* Summary flag for enabling BHRB */ HFLAGS_VSX = 23, /* MSR_VSX if cpu has VSX */ HFLAGS_VR = 25, /* MSR_VR if cpu has VRE */ @@ -1154,7 +1163,11 @@ FIELD(FPSCR, FI, FPSCR_FI, 1) #define DBELL_TYPE_DBELL_SERVER (0x05 << DBELL_TYPE_SHIFT) -#define DBELL_BRDCAST PPC_BIT(37) +#define DBELL_BRDCAST_MASK PPC_BITMASK(37, 38) +#define DBELL_BRDCAST_SHIFT 25 +#define DBELL_BRDCAST_SUBPROC (0x1 << DBELL_BRDCAST_SHIFT) +#define DBELL_BRDCAST_CORE (0x2 << DBELL_BRDCAST_SHIFT) + #define DBELL_LPIDTAG_SHIFT 14 #define DBELL_LPIDTAG_MASK (0xfff << DBELL_LPIDTAG_SHIFT) #define DBELL_PIRTAG_MASK 0x3fff @@ -1212,6 +1225,9 @@ struct pnv_tod_tbst { #define PPC_CPU_OPCODES_LEN 0x40 #define PPC_CPU_INDIRECT_OPCODES_LEN 0x20 +#define BHRB_MAX_NUM_ENTRIES_LOG2 (5) +#define BHRB_MAX_NUM_ENTRIES (1 << BHRB_MAX_NUM_ENTRIES_LOG2) + struct CPUArchState { /* Most commonly used resources during translated code execution first */ target_ulong gpr[32]; /* general purpose registers */ @@ -1252,6 +1268,9 @@ struct CPUArchState { ppc_slb_t slb[MAX_SLB_ENTRIES]; /* PowerPC 64 SLB area */ struct CPUBreakpoint *ciabr_breakpoint; struct CPUWatchpoint *dawr0_watchpoint; + + /* POWER CPU regs/state */ + target_ulong scratch[8]; /* SCRATCH registers (shared across core) */ #endif target_ulong sr[32]; /* segment registers */ uint32_t nb_BATs; /* number of BATs */ @@ -1262,7 +1281,6 @@ struct CPUArchState { int tlb_per_way; /* Speed-up helper: used to avoid divisions at run time */ int nb_ways; /* Number of ways in the TLB set */ int last_way; /* Last used way used to allocate TLB in a LRU way */ - int id_tlbs; /* If 1, MMU has separated TLBs for instructions & data */ int nb_pids; /* Number of available PID registers */ int tlb_type; /* Type of TLB we're dealing with */ ppc_tlb_t tlb; /* TLB is optional. Allocate them only if needed */ @@ -1308,6 +1326,16 @@ struct CPUArchState { int dcache_line_size; int icache_line_size; +#ifdef TARGET_PPC64 + /* Branch History Rolling Buffer (BHRB) resources */ + target_ulong bhrb_num_entries; + intptr_t bhrb_base; + target_ulong bhrb_filter; + target_ulong bhrb_offset; + target_ulong bhrb_offset_mask; + uint64_t bhrb[BHRB_MAX_NUM_ENTRIES]; +#endif + /* These resources are used during exception processing */ /* CPU model definition */ target_ulong msr_mask; @@ -1353,6 +1381,9 @@ struct CPUArchState { /* Power management */ int (*check_pow)(CPUPPCState *env); + /* attn instruction enable */ + int (*check_attn)(CPUPPCState *env); + #if !defined(CONFIG_USER_ONLY) void *load_info; /* holds boot loading state */ #endif @@ -1437,6 +1468,7 @@ struct ArchCPU { int vcpu_id; uint32_t compat_pvr; PPCVirtualHypervisor *vhyp; + PPCVirtualHypervisorClass *vhyp_class; void *machine_data; int32_t node_id; /* NUMA node this CPU belongs to */ PPCHash64Options *hash64_opts; @@ -1500,6 +1532,7 @@ struct PowerPCCPUClass { int n_host_threads; void (*init_proc)(CPUPPCState *env); int (*check_pow)(CPUPPCState *env); + int (*check_attn)(CPUPPCState *env); }; ObjectClass *ppc_cpu_class_by_name(const char *name); @@ -1534,7 +1567,7 @@ DECLARE_OBJ_CHECKERS(PPCVirtualHypervisor, PPCVirtualHypervisorClass, static inline bool vhyp_cpu_in_nested(PowerPCCPU *cpu) { - return PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp)->cpu_in_nested(cpu); + return cpu->vhyp_class->cpu_in_nested(cpu); } #endif /* CONFIG_USER_ONLY */ @@ -1609,10 +1642,6 @@ void ppc_tlb_invalidate_all(CPUPPCState *env); void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr); void cpu_ppc_set_vhyp(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp); void cpu_ppc_set_1lpar(PowerPCCPU *cpu); -int ppcmas_tlb_check(CPUPPCState *env, ppcmas_tlb_t *tlb, hwaddr *raddrp, - target_ulong address, uint32_t pid); -int ppcemb_tlb_search(CPUPPCState *env, target_ulong address, uint32_t pid); -hwaddr booke206_tlb_to_page_size(CPUPPCState *env, ppcmas_tlb_t *tlb); #endif void ppc_store_fpscr(CPUPPCState *env, target_ulong val); @@ -1779,9 +1808,9 @@ void ppc_compat_add_property(Object *obj, const char *name, #define SPR_SPRG2 (0x112) #define SPR_SPRG3 (0x113) #define SPR_SPRG4 (0x114) -#define SPR_SCOMC (0x114) +#define SPR_POWER_SPRC (0x114) #define SPR_SPRG5 (0x115) -#define SPR_SCOMD (0x115) +#define SPR_POWER_SPRD (0x115) #define SPR_SPRG6 (0x116) #define SPR_SPRG7 (0x117) #define SPR_ASR (0x118) @@ -2071,6 +2100,7 @@ void ppc_compat_add_property(Object *obj, const char *name, #define SPR_DEXCR (0x33C) #define SPR_IC (0x350) #define SPR_VTB (0x351) +#define SPR_LDBAR (0x352) #define SPR_MMCRC (0x353) #define SPR_PSSCR (0x357) #define SPR_440_INV0 (0x370) @@ -2093,6 +2123,7 @@ void ppc_compat_add_property(Object *obj, const char *name, #define SPR_POWER_MMCRS (0x37E) #define SPR_WORT (0x37F) #define SPR_PPR (0x380) +#define SPR_PPR32 (0x382) #define SPR_750_GQR0 (0x390) #define SPR_440_DNV0 (0x390) #define SPR_750_GQR1 (0x391) @@ -2116,6 +2147,7 @@ void ppc_compat_add_property(Object *obj, const char *name, #define SPR_440_IVLIM (0x399) #define SPR_TSCR (0x399) #define SPR_750_DMAU (0x39A) +#define SPR_POWER_TTR (0x39A) #define SPR_750_DMAL (0x39B) #define SPR_440_RSTCFG (0x39B) #define SPR_BOOKE_DCDBTRL (0x39C) @@ -2297,6 +2329,8 @@ void ppc_compat_add_property(Object *obj, const char *name, #define HID0_NAP (1 << 22) /* pre-2.06 */ #define HID0_HILE PPC_BIT(19) /* POWER8 */ #define HID0_POWER9_HILE PPC_BIT(4) +#define HID0_ENABLE_ATTN PPC_BIT(31) /* POWER8 */ +#define HID0_POWER9_ENABLE_ATTN PPC_BIT(3) /*****************************************************************************/ /* PowerPC Instructions types definitions */ @@ -2858,6 +2892,10 @@ static inline void booke206_fixed_size_tlbn(CPUPPCState *env, const int tlbn, tlb->mas1 |= ((uint32_t)tsize) << MAS1_TSIZE_SHIFT; } +static inline bool ppc_is_split_tlb(PowerPCCPU *cpu) +{ + return cpu->env.tlb_type == TLB_6XX; +} #endif static inline bool msr_is_64bit(CPUPPCState *env, target_ulong msr) @@ -3002,6 +3040,12 @@ static inline int check_pow_nocheck(CPUPPCState *env) return 1; } +/* attn enable check */ +static inline int check_attn_none(CPUPPCState *env) +{ + return 0; +} + /*****************************************************************************/ /* PowerPC implementations definitions */ diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c index 6d82f24c87..01e358a4a5 100644 --- a/target/ppc/cpu_init.c +++ b/target/ppc/cpu_init.c @@ -246,7 +246,7 @@ static void register_amr_sprs(CPUPPCState *env) spr_register_hv(env, SPR_AMOR, "AMOR", SPR_NOACCESS, SPR_NOACCESS, SPR_NOACCESS, SPR_NOACCESS, - &spr_read_generic, &spr_write_generic, + &spr_read_generic, &spr_core_lpar_write_generic, 0); #endif /* !CONFIG_USER_ONLY */ } @@ -792,7 +792,7 @@ static void register_BookE_sprs(CPUPPCState *env, uint64_t ivor_mask) 0x00000000); spr_register(env, SPR_BOOKE_DECAR, "DECAR", SPR_NOACCESS, SPR_NOACCESS, - SPR_NOACCESS, &spr_write_generic, + SPR_NOACCESS, &spr_write_generic32, 0x00000000); /* SPRGs */ spr_register(env, SPR_USPRG0, "USPRG0", @@ -2107,19 +2107,42 @@ static int check_pow_hid0_74xx(CPUPPCState *env) return 0; } -static void init_proc_405(CPUPPCState *env) +#if defined(TARGET_PPC64) +static int check_attn_hid0(CPUPPCState *env) { - register_40x_sprs(env); - register_405_sprs(env); - register_usprgh_sprs(env); + if (env->spr[SPR_HID0] & HID0_ENABLE_ATTN) { + return 1; + } - /* Memory management */ -#if !defined(CONFIG_USER_ONLY) + return 0; +} + +static int check_attn_hid0_power9(CPUPPCState *env) +{ + if (env->spr[SPR_HID0] & HID0_POWER9_ENABLE_ATTN) { + return 1; + } + + return 0; +} +#endif + +static void init_tlbs_emb(CPUPPCState *env) +{ +#ifndef CONFIG_USER_ONLY env->nb_tlb = 64; env->nb_ways = 1; - env->id_tlbs = 0; env->tlb_type = TLB_EMB; #endif +} + +static void init_proc_405(CPUPPCState *env) +{ + register_40x_sprs(env); + register_405_sprs(env); + register_usprgh_sprs(env); + + init_tlbs_emb(env); init_excp_4xx(env); env->dcache_line_size = 32; env->icache_line_size = 32; @@ -2138,6 +2161,7 @@ POWERPC_FAMILY(405)(ObjectClass *oc, void *data) dc->desc = "PowerPC 405"; pcc->init_proc = init_proc_405; pcc->check_pow = check_pow_nocheck; + pcc->check_attn = check_attn_none; pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB | PPC_DCR | PPC_WRTEE | PPC_CACHE | PPC_CACHE_ICBI | PPC_40x_ICBT | @@ -2186,13 +2210,8 @@ static void init_proc_440EP(CPUPPCState *env) SPR_NOACCESS, SPR_NOACCESS, &spr_read_generic, &spr_write_generic, 0x00000000); - /* Memory management */ -#if !defined(CONFIG_USER_ONLY) - env->nb_tlb = 64; - env->nb_ways = 1; - env->id_tlbs = 0; - env->tlb_type = TLB_EMB; -#endif + + init_tlbs_emb(env); init_excp_BookE(env); env->dcache_line_size = 32; env->icache_line_size = 32; @@ -2210,6 +2229,7 @@ POWERPC_FAMILY(440EP)(ObjectClass *oc, void *data) dc->desc = "PowerPC 440 EP"; pcc->init_proc = init_proc_440EP; pcc->check_pow = check_pow_nocheck; + pcc->check_attn = check_attn_none; pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_FLOAT | PPC_FLOAT_FRES | PPC_FLOAT_FSEL | PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE | @@ -2248,6 +2268,7 @@ POWERPC_FAMILY(460EX)(ObjectClass *oc, void *data) dc->desc = "PowerPC 460 EX"; pcc->init_proc = init_proc_440EP; pcc->check_pow = check_pow_nocheck; + pcc->check_attn = check_attn_none; pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_FLOAT | PPC_FLOAT_FRES | PPC_FLOAT_FSEL | PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE | @@ -2284,13 +2305,7 @@ static void init_proc_440GP(CPUPPCState *env) register_440_sprs(env); register_usprgh_sprs(env); - /* Memory management */ -#if !defined(CONFIG_USER_ONLY) - env->nb_tlb = 64; - env->nb_ways = 1; - env->id_tlbs = 0; - env->tlb_type = TLB_EMB; -#endif + init_tlbs_emb(env); init_excp_BookE(env); env->dcache_line_size = 32; env->icache_line_size = 32; @@ -2308,6 +2323,7 @@ POWERPC_FAMILY(440GP)(ObjectClass *oc, void *data) dc->desc = "PowerPC 440 GP"; pcc->init_proc = init_proc_440GP; pcc->check_pow = check_pow_nocheck; + pcc->check_attn = check_attn_none; pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_DCR | PPC_DCRX | PPC_WRTEE | PPC_MFAPIDI | PPC_CACHE | PPC_CACHE_ICBI | @@ -2358,13 +2374,8 @@ static void init_proc_440x5(CPUPPCState *env) SPR_NOACCESS, SPR_NOACCESS, &spr_read_generic, &spr_write_generic, 0x00000000); - /* Memory management */ -#if !defined(CONFIG_USER_ONLY) - env->nb_tlb = 64; - env->nb_ways = 1; - env->id_tlbs = 0; - env->tlb_type = TLB_EMB; -#endif + + init_tlbs_emb(env); init_excp_BookE(env); env->dcache_line_size = 32; env->icache_line_size = 32; @@ -2382,6 +2393,7 @@ POWERPC_FAMILY(440x5)(ObjectClass *oc, void *data) dc->desc = "PowerPC 440x5"; pcc->init_proc = init_proc_440x5; pcc->check_pow = check_pow_nocheck; + pcc->check_attn = check_attn_none; pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_DCR | PPC_WRTEE | PPC_RFMCI | PPC_CACHE | PPC_CACHE_ICBI | @@ -2417,6 +2429,7 @@ POWERPC_FAMILY(440x5wDFPU)(ObjectClass *oc, void *data) dc->desc = "PowerPC 440x5 with double precision FPU"; pcc->init_proc = init_proc_440x5; pcc->check_pow = check_pow_nocheck; + pcc->check_attn = check_attn_none; pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_FLOAT | PPC_FLOAT_FSQRT | PPC_FLOAT_STFIWX | @@ -2465,6 +2478,7 @@ POWERPC_FAMILY(MPC5xx)(ObjectClass *oc, void *data) dc->desc = "Freescale 5xx cores (aka RCPU)"; pcc->init_proc = init_proc_MPC5xx; pcc->check_pow = check_pow_none; + pcc->check_attn = check_attn_none; pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MEM_EIEIO | PPC_MEM_SYNC | PPC_CACHE_ICBI | PPC_FLOAT | PPC_FLOAT_STFIWX | @@ -2507,6 +2521,7 @@ POWERPC_FAMILY(MPC8xx)(ObjectClass *oc, void *data) dc->desc = "Freescale 8xx cores (aka PowerQUICC)"; pcc->init_proc = init_proc_MPC8xx; pcc->check_pow = check_pow_none; + pcc->check_attn = check_attn_none; pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MEM_EIEIO | PPC_MEM_SYNC | PPC_CACHE_ICBI | PPC_MFTB; @@ -2557,6 +2572,7 @@ POWERPC_FAMILY(G2)(ObjectClass *oc, void *data) dc->desc = "PowerPC G2"; pcc->init_proc = init_proc_G2; pcc->check_pow = check_pow_hid0; + pcc->check_attn = check_attn_none; pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_STFIWX | @@ -2595,6 +2611,7 @@ POWERPC_FAMILY(G2LE)(ObjectClass *oc, void *data) dc->desc = "PowerPC G2LE"; pcc->init_proc = init_proc_G2; pcc->check_pow = check_pow_hid0; + pcc->check_attn = check_attn_none; pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_STFIWX | @@ -2721,12 +2738,8 @@ static void init_proc_e200(CPUPPCState *env) SPR_NOACCESS, SPR_NOACCESS, &spr_read_generic, &spr_write_generic, 0x00000000); -#if !defined(CONFIG_USER_ONLY) - env->nb_tlb = 64; - env->nb_ways = 1; - env->id_tlbs = 0; - env->tlb_type = TLB_EMB; -#endif + + init_tlbs_emb(env); init_excp_e200(env, 0xFFFF0000UL); env->dcache_line_size = 32; env->icache_line_size = 32; @@ -2741,6 +2754,7 @@ POWERPC_FAMILY(e200)(ObjectClass *oc, void *data) dc->desc = "e200 core"; pcc->init_proc = init_proc_e200; pcc->check_pow = check_pow_hid0; + pcc->check_attn = check_attn_none; /* * XXX: unimplemented instructions: * dcblc @@ -2843,7 +2857,6 @@ static void init_proc_e500(CPUPPCState *env, int version) /* Memory management */ env->nb_pids = 3; env->nb_ways = 2; - env->id_tlbs = 0; switch (version) { case fsl_e500v1: tlbncfg[0] = register_tlbncfg(2, 1, 1, 0, 256); @@ -3029,6 +3042,7 @@ POWERPC_FAMILY(e500v1)(ObjectClass *oc, void *data) dc->desc = "e500v1 core"; pcc->init_proc = init_proc_e500v1; pcc->check_pow = check_pow_hid0; + pcc->check_attn = check_attn_none; pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_SPE | PPC_SPE_SINGLE | PPC_WRTEE | PPC_RFDI | @@ -3072,6 +3086,7 @@ POWERPC_FAMILY(e500v2)(ObjectClass *oc, void *data) dc->desc = "e500v2 core"; pcc->init_proc = init_proc_e500v2; pcc->check_pow = check_pow_hid0; + pcc->check_attn = check_attn_none; pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_SPE | PPC_SPE_SINGLE | PPC_SPE_DOUBLE | PPC_WRTEE | PPC_RFDI | @@ -3115,6 +3130,7 @@ POWERPC_FAMILY(e500mc)(ObjectClass *oc, void *data) dc->desc = "e500mc core"; pcc->init_proc = init_proc_e500mc; pcc->check_pow = check_pow_none; + pcc->check_attn = check_attn_none; pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_MFTB | PPC_WRTEE | PPC_RFDI | PPC_RFMCI | PPC_CACHE | PPC_CACHE_LOCK | PPC_CACHE_ICBI | @@ -3161,6 +3177,7 @@ POWERPC_FAMILY(e5500)(ObjectClass *oc, void *data) dc->desc = "e5500 core"; pcc->init_proc = init_proc_e5500; pcc->check_pow = check_pow_none; + pcc->check_attn = check_attn_none; pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_MFTB | PPC_WRTEE | PPC_RFDI | PPC_RFMCI | PPC_CACHE | PPC_CACHE_LOCK | PPC_CACHE_ICBI | @@ -3209,6 +3226,7 @@ POWERPC_FAMILY(e6500)(ObjectClass *oc, void *data) dc->desc = "e6500 core"; pcc->init_proc = init_proc_e6500; pcc->check_pow = check_pow_none; + pcc->check_attn = check_attn_none; pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_MFTB | PPC_WRTEE | PPC_RFDI | PPC_RFMCI | PPC_CACHE | PPC_CACHE_LOCK | PPC_CACHE_ICBI | @@ -3271,6 +3289,7 @@ POWERPC_FAMILY(603)(ObjectClass *oc, void *data) dc->desc = "PowerPC 603"; pcc->init_proc = init_proc_603; pcc->check_pow = check_pow_hid0; + pcc->check_attn = check_attn_none; pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX | @@ -3310,6 +3329,7 @@ POWERPC_FAMILY(603E)(ObjectClass *oc, void *data) dc->desc = "PowerPC 603e"; pcc->init_proc = init_proc_603; pcc->check_pow = check_pow_hid0; + pcc->check_attn = check_attn_none; pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX | @@ -3355,6 +3375,7 @@ POWERPC_FAMILY(e300)(ObjectClass *oc, void *data) dc->desc = "e300 core"; pcc->init_proc = init_proc_e300; pcc->check_pow = check_pow_hid0; + pcc->check_attn = check_attn_none; pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_STFIWX | @@ -3410,6 +3431,7 @@ POWERPC_FAMILY(604)(ObjectClass *oc, void *data) dc->desc = "PowerPC 604"; pcc->init_proc = init_proc_604; pcc->check_pow = check_pow_nocheck; + pcc->check_attn = check_attn_none; pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX | @@ -3455,6 +3477,7 @@ POWERPC_FAMILY(604E)(ObjectClass *oc, void *data) dc->desc = "PowerPC 604E"; pcc->init_proc = init_proc_604E; pcc->check_pow = check_pow_nocheck; + pcc->check_attn = check_attn_none; pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX | @@ -3511,6 +3534,7 @@ POWERPC_FAMILY(740)(ObjectClass *oc, void *data) dc->desc = "PowerPC 740"; pcc->init_proc = init_proc_740; pcc->check_pow = check_pow_hid0; + pcc->check_attn = check_attn_none; pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX | @@ -3576,6 +3600,7 @@ POWERPC_FAMILY(750)(ObjectClass *oc, void *data) dc->desc = "PowerPC 750"; pcc->init_proc = init_proc_750; pcc->check_pow = check_pow_hid0; + pcc->check_attn = check_attn_none; pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX | @@ -3722,6 +3747,7 @@ POWERPC_FAMILY(750cl)(ObjectClass *oc, void *data) dc->desc = "PowerPC 750 CL"; pcc->init_proc = init_proc_750cl; pcc->check_pow = check_pow_hid0; + pcc->check_attn = check_attn_none; /* * XXX: not implemented: * cache lock instructions: @@ -3829,6 +3855,7 @@ POWERPC_FAMILY(750cx)(ObjectClass *oc, void *data) dc->desc = "PowerPC 750CX"; pcc->init_proc = init_proc_750cx; pcc->check_pow = check_pow_hid0; + pcc->check_attn = check_attn_none; pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX | @@ -3901,6 +3928,7 @@ POWERPC_FAMILY(750fx)(ObjectClass *oc, void *data) dc->desc = "PowerPC 750FX"; pcc->init_proc = init_proc_750fx; pcc->check_pow = check_pow_hid0; + pcc->check_attn = check_attn_none; pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX | @@ -3973,6 +4001,7 @@ POWERPC_FAMILY(750gx)(ObjectClass *oc, void *data) dc->desc = "PowerPC 750GX"; pcc->init_proc = init_proc_750gx; pcc->check_pow = check_pow_hid0; + pcc->check_attn = check_attn_none; pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX | @@ -4032,6 +4061,7 @@ POWERPC_FAMILY(745)(ObjectClass *oc, void *data) dc->desc = "PowerPC 745"; pcc->init_proc = init_proc_745; pcc->check_pow = check_pow_hid0; + pcc->check_attn = check_attn_none; pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX | @@ -4077,6 +4107,7 @@ POWERPC_FAMILY(755)(ObjectClass *oc, void *data) dc->desc = "PowerPC 755"; pcc->init_proc = init_proc_755; pcc->check_pow = check_pow_hid0; + pcc->check_attn = check_attn_none; pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_FRSQRTE | PPC_FLOAT_STFIWX | @@ -4143,6 +4174,7 @@ POWERPC_FAMILY(7400)(ObjectClass *oc, void *data) dc->desc = "PowerPC 7400 (aka G4)"; pcc->init_proc = init_proc_7400; pcc->check_pow = check_pow_hid0; + pcc->check_attn = check_attn_none; pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE | @@ -4222,6 +4254,7 @@ POWERPC_FAMILY(7410)(ObjectClass *oc, void *data) dc->desc = "PowerPC 7410 (aka G4)"; pcc->init_proc = init_proc_7410; pcc->check_pow = check_pow_hid0; + pcc->check_attn = check_attn_none; pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE | @@ -4322,6 +4355,7 @@ POWERPC_FAMILY(7440)(ObjectClass *oc, void *data) dc->desc = "PowerPC 7440 (aka G4)"; pcc->init_proc = init_proc_7440; pcc->check_pow = check_pow_hid0_74xx; + pcc->check_attn = check_attn_none; pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE | @@ -4444,6 +4478,7 @@ POWERPC_FAMILY(7450)(ObjectClass *oc, void *data) dc->desc = "PowerPC 7450 (aka G4)"; pcc->init_proc = init_proc_7450; pcc->check_pow = check_pow_hid0_74xx; + pcc->check_attn = check_attn_none; pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE | @@ -4573,6 +4608,7 @@ POWERPC_FAMILY(7445)(ObjectClass *oc, void *data) dc->desc = "PowerPC 7445 (aka G4)"; pcc->init_proc = init_proc_7445; pcc->check_pow = check_pow_hid0_74xx; + pcc->check_attn = check_attn_none; pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE | @@ -4704,6 +4740,7 @@ POWERPC_FAMILY(7455)(ObjectClass *oc, void *data) dc->desc = "PowerPC 7455 (aka G4)"; pcc->init_proc = init_proc_7455; pcc->check_pow = check_pow_hid0_74xx; + pcc->check_attn = check_attn_none; pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE | @@ -4855,6 +4892,7 @@ POWERPC_FAMILY(7457)(ObjectClass *oc, void *data) dc->desc = "PowerPC 7457 (aka G4)"; pcc->init_proc = init_proc_7457; pcc->check_pow = check_pow_hid0_74xx; + pcc->check_attn = check_attn_none; pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE | @@ -4989,6 +5027,7 @@ POWERPC_FAMILY(e600)(ObjectClass *oc, void *data) dc->desc = "PowerPC e600"; pcc->init_proc = init_proc_e600; pcc->check_pow = check_pow_hid0_74xx; + pcc->check_attn = check_attn_none; pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE | @@ -5152,7 +5191,7 @@ static void register_book3s_pmu_sup_sprs(CPUPPCState *env) KVM_REG_PPC_MMCR1, 0x00000000); spr_register_kvm(env, SPR_POWER_MMCRA, "MMCRA", SPR_NOACCESS, SPR_NOACCESS, - &spr_read_generic, &spr_write_generic, + &spr_read_generic, &spr_write_MMCRA, KVM_REG_PPC_MMCRA, 0x00000000); spr_register_kvm(env, SPR_POWER_PMC1, "PMC1", SPR_NOACCESS, SPR_NOACCESS, @@ -5415,7 +5454,7 @@ static void register_book3s_ids_sprs(CPUPPCState *env) spr_register_hv(env, SPR_MMCRC, "MMCRC", SPR_NOACCESS, SPR_NOACCESS, SPR_NOACCESS, SPR_NOACCESS, - &spr_read_generic, &spr_write_generic32, + &spr_read_generic, &spr_core_write_generic32, 0x00000000); spr_register_hv(env, SPR_MMCRH, "MMCRH", SPR_NOACCESS, SPR_NOACCESS, @@ -5455,7 +5494,7 @@ static void register_book3s_ids_sprs(CPUPPCState *env) spr_register_hv(env, SPR_HRMOR, "HRMOR", SPR_NOACCESS, SPR_NOACCESS, SPR_NOACCESS, SPR_NOACCESS, - &spr_read_generic, &spr_write_generic, + &spr_read_generic, &spr_core_write_generic, 0x00000000); } @@ -5549,6 +5588,14 @@ static void register_HEIR64_spr(CPUPPCState *env) 0x00000000); } +static void register_power7_common_sprs(CPUPPCState *env) +{ + spr_register(env, SPR_PPR32, "PPR32", + &spr_read_ppr32, &spr_write_ppr32, + &spr_read_ppr32, &spr_write_ppr32, + 0x00000000); +} + static void register_power8_tce_address_control_sprs(CPUPPCState *env) { spr_register_kvm(env, SPR_TAR, "TAR", @@ -5675,7 +5722,7 @@ static void register_power_common_book4_sprs(CPUPPCState *env) spr_register_hv(env, SPR_TSCR, "TSCR", SPR_NOACCESS, SPR_NOACCESS, SPR_NOACCESS, SPR_NOACCESS, - &spr_read_generic, &spr_write_generic32, + &spr_read_generic, &spr_core_write_generic32, 0x00000000); spr_register_hv(env, SPR_HMER, "HMER", SPR_NOACCESS, SPR_NOACCESS, @@ -5685,7 +5732,7 @@ static void register_power_common_book4_sprs(CPUPPCState *env) spr_register_hv(env, SPR_HMEER, "HMEER", SPR_NOACCESS, SPR_NOACCESS, SPR_NOACCESS, SPR_NOACCESS, - &spr_read_generic, &spr_write_generic, + &spr_read_generic, &spr_core_write_generic, 0x00000000); spr_register_hv(env, SPR_TFMR, "TFMR", SPR_NOACCESS, SPR_NOACCESS, @@ -5702,6 +5749,26 @@ static void register_power_common_book4_sprs(CPUPPCState *env) &spr_access_nop, &spr_write_generic, &spr_access_nop, &spr_write_generic, 0x00000000); + spr_register_hv(env, SPR_LDBAR, "LDBAR", + SPR_NOACCESS, SPR_NOACCESS, + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_core_lpar_write_generic, + 0x00000000); + spr_register_hv(env, SPR_POWER_TTR, "TTR", + SPR_NOACCESS, SPR_NOACCESS, + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_core_write_generic, + 0x00000000); + spr_register_hv(env, SPR_POWER_SPRC, "SPRC", + SPR_NOACCESS, SPR_NOACCESS, + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_generic, &spr_write_sprc, + 0x00000000); + spr_register_hv(env, SPR_POWER_SPRD, "SPRD", + SPR_NOACCESS, SPR_NOACCESS, + SPR_NOACCESS, SPR_NOACCESS, + &spr_read_sprd, &spr_write_sprd, + 0x00000000); #endif } @@ -5761,7 +5828,7 @@ static void register_power8_rpr_sprs(CPUPPCState *env) spr_register_hv(env, SPR_RPR, "RPR", SPR_NOACCESS, SPR_NOACCESS, SPR_NOACCESS, SPR_NOACCESS, - &spr_read_generic, &spr_write_generic, + &spr_read_generic, &spr_core_write_generic, 0x00000103070F1F3F); #endif } @@ -5904,6 +5971,7 @@ POWERPC_FAMILY(970)(ObjectClass *oc, void *data) dc->desc = "PowerPC 970"; pcc->init_proc = init_proc_970; pcc->check_pow = check_pow_970; + pcc->check_attn = check_attn_hid0; pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE | @@ -5979,6 +6047,7 @@ POWERPC_FAMILY(POWER5P)(ObjectClass *oc, void *data) dc->desc = "POWER5+"; pcc->init_proc = init_proc_power5plus; pcc->check_pow = check_pow_970; + pcc->check_attn = check_attn_hid0; pcc->insns_flags = PPC_INSNS_BASE | PPC_STRING | PPC_MFTB | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE | @@ -6042,6 +6111,7 @@ static void init_proc_POWER7(CPUPPCState *env) register_power6_common_sprs(env); register_HEIR32_spr(env); register_power6_dbg_sprs(env); + register_power7_common_sprs(env); register_power7_book4_sprs(env); /* env variables */ @@ -6086,6 +6156,7 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data) pcc->pcr_supported = PCR_COMPAT_2_06 | PCR_COMPAT_2_05; pcc->init_proc = init_proc_POWER7; pcc->check_pow = check_pow_nocheck; + pcc->check_attn = check_attn_hid0; pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_STRING | PPC_MFTB | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE | @@ -6142,6 +6213,28 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data) pcc->l1_icache_size = 0x8000; } +static void bhrb_init_state(CPUPPCState *env, target_long num_entries_log2) +{ + if (env->flags & POWERPC_FLAG_BHRB) { + if (num_entries_log2 > BHRB_MAX_NUM_ENTRIES_LOG2) { + num_entries_log2 = BHRB_MAX_NUM_ENTRIES_LOG2; + } + env->bhrb_num_entries = 1 << num_entries_log2; + env->bhrb_base = (intptr_t)&env->bhrb[0]; + env->bhrb_offset_mask = (env->bhrb_num_entries * sizeof(uint64_t)) - 1; + } +} + +static void bhrb_reset_state(CPUPPCState *env) +{ + if (env->flags & POWERPC_FLAG_BHRB) { + env->bhrb_offset = 0; + env->bhrb_filter = 0; + memset(env->bhrb, 0, sizeof(env->bhrb)); + } +} + +#define POWER8_BHRB_ENTRIES_LOG2 5 static void init_proc_POWER8(CPUPPCState *env) { /* Common Registers */ @@ -6165,6 +6258,7 @@ static void init_proc_POWER8(CPUPPCState *env) register_power6_common_sprs(env); register_HEIR32_spr(env); register_power6_dbg_sprs(env); + register_power7_common_sprs(env); register_power8_tce_address_control_sprs(env); register_power8_ids_sprs(env); register_power8_ebb_sprs(env); @@ -6183,6 +6277,8 @@ static void init_proc_POWER8(CPUPPCState *env) env->dcache_line_size = 128; env->icache_line_size = 128; + bhrb_init_state(env, POWER8_BHRB_ENTRIES_LOG2); + /* Allocate hardware IRQ controller */ init_excp_POWER8(env); ppcPOWER7_irq_init(env_archcpu(env)); @@ -6223,6 +6319,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data) pcc->pcr_supported = PCR_COMPAT_2_07 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05; pcc->init_proc = init_proc_POWER8; pcc->check_pow = check_pow_nocheck; + pcc->check_attn = check_attn_hid0; pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_STRING | PPC_MFTB | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE | @@ -6307,6 +6404,7 @@ static struct ppc_radix_page_info POWER9_radix_page_info = { }; #endif /* CONFIG_USER_ONLY */ +#define POWER9_BHRB_ENTRIES_LOG2 5 static void init_proc_POWER9(CPUPPCState *env) { /* Common Registers */ @@ -6328,6 +6426,7 @@ static void init_proc_POWER9(CPUPPCState *env) register_power6_common_sprs(env); register_HEIR32_spr(env); register_power6_dbg_sprs(env); + register_power7_common_sprs(env); register_power8_tce_address_control_sprs(env); register_power8_ids_sprs(env); register_power8_ebb_sprs(env); @@ -6357,6 +6456,8 @@ static void init_proc_POWER9(CPUPPCState *env) env->dcache_line_size = 128; env->icache_line_size = 128; + bhrb_init_state(env, POWER9_BHRB_ENTRIES_LOG2); + /* Allocate hardware IRQ controller */ init_excp_POWER9(env); ppcPOWER9_irq_init(env_archcpu(env)); @@ -6412,6 +6513,7 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data) PCR_COMPAT_2_05; pcc->init_proc = init_proc_POWER9; pcc->check_pow = check_pow_nocheck; + pcc->check_attn = check_attn_hid0_power9; pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_STRING | PPC_MFTB | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE | @@ -6497,6 +6599,7 @@ static struct ppc_radix_page_info POWER10_radix_page_info = { }; #endif /* !CONFIG_USER_ONLY */ +#define POWER10_BHRB_ENTRIES_LOG2 5 static void init_proc_POWER10(CPUPPCState *env) { /* Common Registers */ @@ -6518,6 +6621,7 @@ static void init_proc_POWER10(CPUPPCState *env) register_power6_common_sprs(env); register_HEIR64_spr(env); register_power6_dbg_sprs(env); + register_power7_common_sprs(env); register_power8_tce_address_control_sprs(env); register_power8_ids_sprs(env); register_power8_ebb_sprs(env); @@ -6546,6 +6650,8 @@ static void init_proc_POWER10(CPUPPCState *env) env->dcache_line_size = 128; env->icache_line_size = 128; + bhrb_init_state(env, POWER10_BHRB_ENTRIES_LOG2); + /* Allocate hardware IRQ controller */ init_excp_POWER10(env); ppcPOWER9_irq_init(env_archcpu(env)); @@ -6588,6 +6694,7 @@ POWERPC_FAMILY(POWER10)(ObjectClass *oc, void *data) PCR_COMPAT_2_06 | PCR_COMPAT_2_05; pcc->init_proc = init_proc_POWER10; pcc->check_pow = check_pow_nocheck; + pcc->check_attn = check_attn_hid0_power9; pcc->insns_flags = PPC_INSNS_BASE | PPC_ISEL | PPC_STRING | PPC_MFTB | PPC_FLOAT | PPC_FLOAT_FSEL | PPC_FLOAT_FRES | PPC_FLOAT_FSQRT | PPC_FLOAT_FRSQRTE | @@ -6650,7 +6757,8 @@ POWERPC_FAMILY(POWER10)(ObjectClass *oc, void *data) pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE | POWERPC_FLAG_BE | POWERPC_FLAG_PMM | POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR | - POWERPC_FLAG_VSX | POWERPC_FLAG_SCV; + POWERPC_FLAG_VSX | POWERPC_FLAG_SCV | + POWERPC_FLAG_BHRB; pcc->l1_dcache_size = 0x8000; pcc->l1_icache_size = 0x8000; } @@ -6661,6 +6769,7 @@ void cpu_ppc_set_vhyp(PowerPCCPU *cpu, PPCVirtualHypervisor *vhyp) CPUPPCState *env = &cpu->env; cpu->vhyp = vhyp; + cpu->vhyp_class = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(vhyp); /* * With a virtual hypervisor mode we never allow the CPU to go @@ -6800,20 +6909,17 @@ static void init_ppc_proc(PowerPCCPU *cpu) } /* Allocate TLBs buffer when needed */ #if !defined(CONFIG_USER_ONLY) - if (env->nb_tlb != 0) { - int nb_tlb = env->nb_tlb; - if (env->id_tlbs != 0) { - nb_tlb *= 2; - } + if (env->nb_tlb) { switch (env->tlb_type) { case TLB_6XX: - env->tlb.tlb6 = g_new0(ppc6xx_tlb_t, nb_tlb); + /* 6xx has separate TLBs for instructions and data hence times 2 */ + env->tlb.tlb6 = g_new0(ppc6xx_tlb_t, 2 * env->nb_tlb); break; case TLB_EMB: - env->tlb.tlbe = g_new0(ppcemb_tlb_t, nb_tlb); + env->tlb.tlbe = g_new0(ppcemb_tlb_t, env->nb_tlb); break; case TLB_MAS: - env->tlb.tlbm = g_new0(ppcmas_tlb_t, nb_tlb); + env->tlb.tlbm = g_new0(ppcmas_tlb_t, env->nb_tlb); break; } /* Pre-compute some useful values */ @@ -6824,6 +6930,11 @@ static void init_ppc_proc(PowerPCCPU *cpu) warn_report("no power management check handler registered." " Attempt QEMU to crash very soon !"); } + + if (env->check_attn == NULL) { + warn_report("no attn check handler registered." + " Attempt QEMU to crash very soon !"); + } } @@ -7063,7 +7174,7 @@ static void ppc_cpu_list_entry(gpointer data, gpointer user_data) } name = cpu_model_from_type(typename); - qemu_printf("PowerPC %-16s PVR %08x\n", name, pcc->pvr); + qemu_printf(" %-16s PVR %08x\n", name, pcc->pvr); for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) { PowerPCCPUAlias *alias = &ppc_cpu_aliases[i]; ObjectClass *alias_oc = ppc_cpu_class_by_name(alias->model); @@ -7076,10 +7187,10 @@ static void ppc_cpu_list_entry(gpointer data, gpointer user_data) * avoid printing the wrong alias here and use "preferred" instead */ if (strcmp(alias->alias, family->desc) == 0) { - qemu_printf("PowerPC %-16s (alias for preferred %s CPU)\n", + qemu_printf(" %-16s (alias for preferred %s CPU)\n", alias->alias, family->desc); } else { - qemu_printf("PowerPC %-16s (alias for %s)\n", + qemu_printf(" %-16s (alias for %s)\n", alias->alias, name); } } @@ -7090,6 +7201,7 @@ void ppc_cpu_list(void) { GSList *list; + qemu_printf("Available CPUs:\n"); list = object_class_get_list(TYPE_POWERPC_CPU, false); list = g_slist_sort(list, ppc_cpu_list_compare); g_slist_foreach(list, ppc_cpu_list_entry, NULL); @@ -7097,7 +7209,7 @@ void ppc_cpu_list(void) #ifdef CONFIG_KVM qemu_printf("\n"); - qemu_printf("PowerPC %s\n", "host"); + qemu_printf(" %s\n", "host"); #endif } @@ -7194,7 +7306,7 @@ static void ppc_cpu_reset_hold(Object *obj, ResetType type) if (env->mmu_model != POWERPC_MMU_REAL) { ppc_tlb_invalidate_all(env); } - pmu_mmcr01_updated(env); + pmu_mmcr01a_updated(env); } /* clean any pending stop state */ @@ -7220,6 +7332,10 @@ static void ppc_cpu_reset_hold(Object *obj, ResetType type) } env->spr[i] = spr->default_value; } + +#if defined(TARGET_PPC64) + bhrb_reset_state(env); +#endif } #ifndef CONFIG_USER_ONLY @@ -7247,9 +7363,7 @@ static void ppc_cpu_exec_enter(CPUState *cs) PowerPCCPU *cpu = POWERPC_CPU(cs); if (cpu->vhyp) { - PPCVirtualHypervisorClass *vhc = - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); - vhc->cpu_exec_enter(cpu->vhyp, cpu); + cpu->vhyp_class->cpu_exec_enter(cpu->vhyp, cpu); } } @@ -7258,9 +7372,7 @@ static void ppc_cpu_exec_exit(CPUState *cs) PowerPCCPU *cpu = POWERPC_CPU(cs); if (cpu->vhyp) { - PPCVirtualHypervisorClass *vhc = - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); - vhc->cpu_exec_exit(cpu->vhyp, cpu); + cpu->vhyp_class->cpu_exec_exit(cpu->vhyp, cpu); } } #endif /* CONFIG_TCG */ @@ -7284,6 +7396,7 @@ static void ppc_cpu_instance_init(Object *obj) env->flags = pcc->flags; env->bfd_mach = pcc->bfd_mach; env->check_pow = pcc->check_pow; + env->check_attn = pcc->check_attn; /* * Mark HV mode as supported if the CPU has an MSR_HV bit in the @@ -7408,6 +7521,11 @@ static void ppc_cpu_class_init(ObjectClass *oc, void *data) #ifndef CONFIG_USER_ONLY cc->sysemu_ops = &ppc_sysemu_ops; INTERRUPT_STATS_PROVIDER_CLASS(oc)->get_statistics = ppc_get_irq_stats; + + /* check_prot_access_type relies on MMU access and PAGE bits relations */ + qemu_build_assert(MMU_DATA_LOAD == 0 && MMU_DATA_STORE == 1 && + MMU_INST_FETCH == 2 && PAGE_READ == 1 && + PAGE_WRITE == 2 && PAGE_EXEC == 4); #endif cc->gdb_num_core_regs = 71; diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 674c05a2ce..0cd542675f 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -19,6 +19,8 @@ #include "qemu/osdep.h" #include "qemu/main-loop.h" #include "qemu/log.h" +#include "sysemu/sysemu.h" +#include "sysemu/runstate.h" #include "cpu.h" #include "exec/exec-all.h" #include "internal.h" @@ -142,7 +144,7 @@ static inline bool insn_need_byteswap(CPUArchState *env) return !!(env->msr & ((target_ulong)1 << MSR_LE)); } -static uint32_t ppc_ldl_code(CPUArchState *env, abi_ptr addr) +static uint32_t ppc_ldl_code(CPUArchState *env, target_ulong addr) { uint32_t insn = cpu_ldl_code(env, addr); @@ -152,6 +154,7 @@ static uint32_t ppc_ldl_code(CPUArchState *env, abi_ptr addr) return insn; } + #endif static void ppc_excp_debug_sw_tlb(CPUPPCState *env, int excp) @@ -423,23 +426,57 @@ static void powerpc_set_excp_state(PowerPCCPU *cpu, target_ulong vector, env->reserve_addr = -1; } -static void powerpc_mcheck_checkstop(CPUPPCState *env) +#ifdef CONFIG_TCG +/* + * This stops the machine and logs CPU state without killing QEMU (like + * cpu_abort()) because it is often a guest error as opposed to a QEMU error, + * so the machine can still be debugged. + */ +static G_NORETURN void powerpc_checkstop(CPUPPCState *env, const char *reason) { CPUState *cs = env_cpu(env); + FILE *f; + + f = qemu_log_trylock(); + if (f) { + fprintf(f, "Entering checkstop state: %s\n", reason); + cpu_dump_state(cs, f, CPU_DUMP_FPU | CPU_DUMP_CCOP); + qemu_log_unlock(f); + } + + /* + * This stops the machine and logs CPU state without killing QEMU + * (like cpu_abort()) so the machine can still be debugged (because + * it is often a guest error). + */ + qemu_system_guest_panicked(NULL); + cpu_loop_exit_noexc(cs); +} + +#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY) +void helper_attn(CPUPPCState *env) +{ + /* POWER attn is unprivileged when enabled by HID, otherwise illegal */ + if ((*env->check_attn)(env)) { + powerpc_checkstop(env, "host executed attn"); + } else { + raise_exception_err(env, POWERPC_EXCP_HV_EMU, + POWERPC_EXCP_INVAL | POWERPC_EXCP_INVAL_INVAL); + } +} +#endif +#endif /* CONFIG_TCG */ +static void powerpc_mcheck_checkstop(CPUPPCState *env) +{ + /* KVM guests always have MSR[ME] enabled */ +#ifdef CONFIG_TCG if (FIELD_EX64(env->msr, MSR, ME)) { return; } - /* Machine check exception is not enabled. Enter checkstop state. */ - fprintf(stderr, "Machine check while not allowed. " - "Entering checkstop state\n"); - if (qemu_log_separate()) { - qemu_log("Machine check while not allowed. " - "Entering checkstop state\n"); - } - cs->halted = 1; - cpu_interrupt_exittb(cs); + powerpc_checkstop(env, "machine check with MSR[ME]=0"); +#endif } static void powerpc_excp_40x(PowerPCCPU *cpu, int excp) @@ -794,9 +831,7 @@ static void powerpc_excp_7xx(PowerPCCPU *cpu, int excp) * HV mode, we need to keep hypercall support. */ if (lev == 1 && cpu->vhyp) { - PPCVirtualHypervisorClass *vhc = - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); - vhc->hypercall(cpu->vhyp, cpu); + cpu->vhyp_class->hypercall(cpu->vhyp, cpu); powerpc_reset_excp_state(cpu); return; } @@ -946,9 +981,7 @@ static void powerpc_excp_74xx(PowerPCCPU *cpu, int excp) * HV mode, we need to keep hypercall support. */ if (lev == 1 && cpu->vhyp) { - PPCVirtualHypervisorClass *vhc = - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); - vhc->hypercall(cpu->vhyp, cpu); + cpu->vhyp_class->hypercall(cpu->vhyp, cpu); powerpc_reset_excp_state(cpu); return; } @@ -1437,9 +1470,7 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp) /* "PAPR mode" built-in hypercall emulation */ if (lev == 1 && books_vhyp_handles_hcall(cpu)) { - PPCVirtualHypervisorClass *vhc = - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); - vhc->hypercall(cpu->vhyp, cpu); + cpu->vhyp_class->hypercall(cpu->vhyp, cpu); powerpc_reset_excp_state(cpu); return; } @@ -1574,10 +1605,8 @@ static void powerpc_excp_books(PowerPCCPU *cpu, int excp) } if ((new_msr & MSR_HVB) && books_vhyp_handles_hv_excp(cpu)) { - PPCVirtualHypervisorClass *vhc = - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); /* Deliver interrupt to L1 by returning from the H_ENTER_NESTED call */ - vhc->deliver_hv_excp(cpu, excp); + cpu->vhyp_class->deliver_hv_excp(cpu, excp); powerpc_reset_excp_state(cpu); } else { /* Sanity check */ @@ -2750,7 +2779,7 @@ void helper_rfmci(CPUPPCState *env) } #endif /* !CONFIG_USER_ONLY */ -void helper_tw(CPUPPCState *env, target_ulong arg1, target_ulong arg2, +void helper_TW(CPUPPCState *env, target_ulong arg1, target_ulong arg2, uint32_t flags) { if (!likely(!(((int32_t)arg1 < (int32_t)arg2 && (flags & 0x10)) || @@ -2764,7 +2793,7 @@ void helper_tw(CPUPPCState *env, target_ulong arg1, target_ulong arg2, } #ifdef TARGET_PPC64 -void helper_td(CPUPPCState *env, target_ulong arg1, target_ulong arg2, +void helper_TD(CPUPPCState *env, target_ulong arg1, target_ulong arg2, uint32_t flags) { if (!likely(!(((int64_t)arg1 < (int64_t)arg2 && (flags & 0x10)) || @@ -2940,7 +2969,7 @@ void helper_msgsnd(target_ulong rb) PowerPCCPU *cpu = POWERPC_CPU(cs); CPUPPCState *cenv = &cpu->env; - if ((rb & DBELL_BRDCAST) || (cenv->spr[SPR_BOOKE_PIR] == pir)) { + if ((rb & DBELL_BRDCAST_MASK) || (cenv->spr[SPR_BOOKE_PIR] == pir)) { ppc_set_irq(cpu, irq, 1); } } @@ -2959,6 +2988,16 @@ static bool dbell_type_server(target_ulong rb) return (rb & DBELL_TYPE_MASK) == DBELL_TYPE_DBELL_SERVER; } +static inline bool dbell_bcast_core(target_ulong rb) +{ + return (rb & DBELL_BRDCAST_MASK) == DBELL_BRDCAST_CORE; +} + +static inline bool dbell_bcast_subproc(target_ulong rb) +{ + return (rb & DBELL_BRDCAST_MASK) == DBELL_BRDCAST_SUBPROC; +} + void helper_book3s_msgclr(CPUPPCState *env, target_ulong rb) { if (!dbell_type_server(rb)) { @@ -2968,32 +3007,43 @@ void helper_book3s_msgclr(CPUPPCState *env, target_ulong rb) ppc_set_irq(env_archcpu(env), PPC_INTERRUPT_HDOORBELL, 0); } -static void book3s_msgsnd_common(int pir, int irq) +void helper_book3s_msgsnd(CPUPPCState *env, target_ulong rb) { - CPUState *cs; + int pir = rb & DBELL_PROCIDTAG_MASK; + bool brdcast = false; + CPUState *cs, *ccs; + PowerPCCPU *cpu; - bql_lock(); - CPU_FOREACH(cs) { - PowerPCCPU *cpu = POWERPC_CPU(cs); - CPUPPCState *cenv = &cpu->env; + if (!dbell_type_server(rb)) { + return; + } - /* TODO: broadcast message to all threads of the same processor */ - if (cenv->spr_cb[SPR_PIR].default_value == pir) { - ppc_set_irq(cpu, irq, 1); - } + cpu = ppc_get_vcpu_by_pir(pir); + if (!cpu) { + return; } - bql_unlock(); -} + cs = CPU(cpu); -void helper_book3s_msgsnd(target_ulong rb) -{ - int pir = rb & DBELL_PROCIDTAG_MASK; + if (dbell_bcast_core(rb) || (dbell_bcast_subproc(rb) && + (env->flags & POWERPC_FLAG_SMT_1LPAR))) { + brdcast = true; + } - if (!dbell_type_server(rb)) { + if (cs->nr_threads == 1 || !brdcast) { + ppc_set_irq(cpu, PPC_INTERRUPT_HDOORBELL, 1); return; } - book3s_msgsnd_common(pir, PPC_INTERRUPT_HDOORBELL); + /* + * Why is bql needed for walking CPU list? Answer seems to be because ppc + * irq handling needs it, but ppc_set_irq takes the lock itself if needed, + * so could this be removed? + */ + bql_lock(); + THREAD_SIBLING_FOREACH(cs, ccs) { + ppc_set_irq(POWERPC_CPU(ccs), PPC_INTERRUPT_HDOORBELL, 1); + } + bql_unlock(); } #ifdef TARGET_PPC64 diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c index 4b3dcad5d1..51bce99fd5 100644 --- a/target/ppc/fpu_helper.c +++ b/target/ppc/fpu_helper.c @@ -490,54 +490,12 @@ static void float_invalid_op_addsub(CPUPPCState *env, int flags, } } -/* fadd - fadd. */ -float64 helper_fadd(CPUPPCState *env, float64 arg1, float64 arg2) +static inline void addsub_flags_handler(CPUPPCState *env, int flags, + uintptr_t ra) { - float64 ret = float64_add(arg1, arg2, &env->fp_status); - int flags = get_float_exception_flags(&env->fp_status); - - if (unlikely(flags & float_flag_invalid)) { - float_invalid_op_addsub(env, flags, 1, GETPC()); - } - - return ret; -} - -/* fadds - fadds. */ -float64 helper_fadds(CPUPPCState *env, float64 arg1, float64 arg2) -{ - float64 ret = float64r32_add(arg1, arg2, &env->fp_status); - int flags = get_float_exception_flags(&env->fp_status); - - if (unlikely(flags & float_flag_invalid)) { - float_invalid_op_addsub(env, flags, 1, GETPC()); - } - return ret; -} - -/* fsub - fsub. */ -float64 helper_fsub(CPUPPCState *env, float64 arg1, float64 arg2) -{ - float64 ret = float64_sub(arg1, arg2, &env->fp_status); - int flags = get_float_exception_flags(&env->fp_status); - if (unlikely(flags & float_flag_invalid)) { - float_invalid_op_addsub(env, flags, 1, GETPC()); + float_invalid_op_addsub(env, flags, 1, ra); } - - return ret; -} - -/* fsubs - fsubs. */ -float64 helper_fsubs(CPUPPCState *env, float64 arg1, float64 arg2) -{ - float64 ret = float64r32_sub(arg1, arg2, &env->fp_status); - int flags = get_float_exception_flags(&env->fp_status); - - if (unlikely(flags & float_flag_invalid)) { - float_invalid_op_addsub(env, flags, 1, GETPC()); - } - return ret; } static void float_invalid_op_mul(CPUPPCState *env, int flags, @@ -550,29 +508,11 @@ static void float_invalid_op_mul(CPUPPCState *env, int flags, } } -/* fmul - fmul. */ -float64 helper_fmul(CPUPPCState *env, float64 arg1, float64 arg2) -{ - float64 ret = float64_mul(arg1, arg2, &env->fp_status); - int flags = get_float_exception_flags(&env->fp_status); - - if (unlikely(flags & float_flag_invalid)) { - float_invalid_op_mul(env, flags, 1, GETPC()); - } - - return ret; -} - -/* fmuls - fmuls. */ -float64 helper_fmuls(CPUPPCState *env, float64 arg1, float64 arg2) +static inline void mul_flags_handler(CPUPPCState *env, int flags, uintptr_t ra) { - float64 ret = float64r32_mul(arg1, arg2, &env->fp_status); - int flags = get_float_exception_flags(&env->fp_status); - if (unlikely(flags & float_flag_invalid)) { - float_invalid_op_mul(env, flags, 1, GETPC()); + float_invalid_op_mul(env, flags, 1, ra); } - return ret; } static void float_invalid_op_div(CPUPPCState *env, int flags, @@ -587,36 +527,14 @@ static void float_invalid_op_div(CPUPPCState *env, int flags, } } -/* fdiv - fdiv. */ -float64 helper_fdiv(CPUPPCState *env, float64 arg1, float64 arg2) -{ - float64 ret = float64_div(arg1, arg2, &env->fp_status); - int flags = get_float_exception_flags(&env->fp_status); - - if (unlikely(flags & float_flag_invalid)) { - float_invalid_op_div(env, flags, 1, GETPC()); - } - if (unlikely(flags & float_flag_divbyzero)) { - float_zero_divide_excp(env, GETPC()); - } - - return ret; -} - -/* fdivs - fdivs. */ -float64 helper_fdivs(CPUPPCState *env, float64 arg1, float64 arg2) +static inline void div_flags_handler(CPUPPCState *env, int flags, uintptr_t ra) { - float64 ret = float64r32_div(arg1, arg2, &env->fp_status); - int flags = get_float_exception_flags(&env->fp_status); - if (unlikely(flags & float_flag_invalid)) { - float_invalid_op_div(env, flags, 1, GETPC()); + float_invalid_op_div(env, flags, 1, ra); } if (unlikely(flags & float_flag_divbyzero)) { - float_zero_divide_excp(env, GETPC()); + float_zero_divide_excp(env, ra); } - - return ret; } static uint64_t float_invalid_cvt(CPUPPCState *env, int flags, @@ -755,7 +673,7 @@ static uint64_t do_fmadds(CPUPPCState *env, float64 a, float64 b, uint64_t helper_##op(CPUPPCState *env, uint64_t arg1, \ uint64_t arg2, uint64_t arg3) \ { return do_fmadd(env, arg1, arg2, arg3, madd_flags, GETPC()); } \ - uint64_t helper_##op##s(CPUPPCState *env, uint64_t arg1, \ + uint64_t helper_##op##S(CPUPPCState *env, uint64_t arg1, \ uint64_t arg2, uint64_t arg3) \ { return do_fmadds(env, arg1, arg2, arg3, madd_flags, GETPC()); } @@ -764,10 +682,10 @@ static uint64_t do_fmadds(CPUPPCState *env, float64 a, float64 b, #define NMADD_FLGS float_muladd_negate_result #define NMSUB_FLGS (float_muladd_negate_c | float_muladd_negate_result) -FPU_FMADD(fmadd, MADD_FLGS) -FPU_FMADD(fnmadd, NMADD_FLGS) -FPU_FMADD(fmsub, MSUB_FLGS) -FPU_FMADD(fnmsub, NMSUB_FLGS) +FPU_FMADD(FMADD, MADD_FLGS) +FPU_FMADD(FNMADD, NMADD_FLGS) +FPU_FMADD(FMSUB, MSUB_FLGS) +FPU_FMADD(FNMSUB, NMSUB_FLGS) /* frsp - frsp. */ static uint64_t do_frsp(CPUPPCState *env, uint64_t arg, uintptr_t retaddr) @@ -812,81 +730,66 @@ float64 helper_##name(CPUPPCState *env, float64 arg) \ FPU_FSQRT(FSQRT, float64_sqrt) FPU_FSQRT(FSQRTS, float64r32_sqrt) -/* fre - fre. */ -float64 helper_fre(CPUPPCState *env, float64 arg) -{ - /* "Estimate" the reciprocal with actual division. */ - float64 ret = float64_div(float64_one, arg, &env->fp_status); - int flags = get_float_exception_flags(&env->fp_status); - - if (unlikely(flags & float_flag_invalid_snan)) { - float_invalid_op_vxsnan(env, GETPC()); - } - if (unlikely(flags & float_flag_divbyzero)) { - float_zero_divide_excp(env, GETPC()); - /* For FPSCR.ZE == 0, the result is 1/2. */ - ret = float64_set_sign(float64_half, float64_is_neg(arg)); - } - - return ret; +#define FPU_FRE(name, op) \ +float64 helper_##name(CPUPPCState *env, float64 arg) \ +{ \ + /* "Estimate" the reciprocal with actual division. */ \ + float64 ret = op(float64_one, arg, &env->fp_status); \ + int flags = get_float_exception_flags(&env->fp_status); \ + \ + if (unlikely(flags & float_flag_invalid_snan)) { \ + float_invalid_op_vxsnan(env, GETPC()); \ + } \ + if (unlikely(flags & float_flag_divbyzero)) { \ + float_zero_divide_excp(env, GETPC()); \ + /* For FPSCR.ZE == 0, the result is 1/2. */ \ + ret = float64_set_sign(float64_half, float64_is_neg(arg)); \ + } \ + \ + return ret; \ } -/* fres - fres. */ -uint64_t helper_fres(CPUPPCState *env, uint64_t arg) -{ - /* "Estimate" the reciprocal with actual division. */ - float64 ret = float64r32_div(float64_one, arg, &env->fp_status); - int flags = get_float_exception_flags(&env->fp_status); - - if (unlikely(flags & float_flag_invalid_snan)) { - float_invalid_op_vxsnan(env, GETPC()); - } - if (unlikely(flags & float_flag_divbyzero)) { - float_zero_divide_excp(env, GETPC()); - /* For FPSCR.ZE == 0, the result is 1/2. */ - ret = float64_set_sign(float64_half, float64_is_neg(arg)); - } - - return ret; +#define FPU_FRSQRTE(name, op) \ +float64 helper_##name(CPUPPCState *env, float64 arg) \ +{ \ + /* "Estimate" the reciprocal with actual division. */ \ + float64 rets = float64_sqrt(arg, &env->fp_status); \ + float64 retd = op(float64_one, rets, &env->fp_status); \ + int flags = get_float_exception_flags(&env->fp_status); \ + \ + if (unlikely(flags & float_flag_invalid)) { \ + float_invalid_op_sqrt(env, flags, 1, GETPC()); \ + } \ + if (unlikely(flags & float_flag_divbyzero)) { \ + /* Reciprocal of (square root of) zero. */ \ + float_zero_divide_excp(env, GETPC()); \ + } \ + \ + return retd; \ } -/* frsqrte - frsqrte. */ -float64 helper_frsqrte(CPUPPCState *env, float64 arg) -{ - /* "Estimate" the reciprocal with actual division. */ - float64 rets = float64_sqrt(arg, &env->fp_status); - float64 retd = float64_div(float64_one, rets, &env->fp_status); - int flags = get_float_exception_flags(&env->fp_status); - - if (unlikely(flags & float_flag_invalid)) { - float_invalid_op_sqrt(env, flags, 1, GETPC()); - } - if (unlikely(flags & float_flag_divbyzero)) { - /* Reciprocal of (square root of) zero. */ - float_zero_divide_excp(env, GETPC()); - } - - return retd; +#define FPU_HELPER(name, op, flags_handler) \ +float64 helper_##name(CPUPPCState *env, float64 arg1, float64 arg2) \ +{ \ + float64 ret = op(arg1, arg2, &env->fp_status); \ + int flags = get_float_exception_flags(&env->fp_status); \ + uintptr_t ra = GETPC(); \ + flags_handler(env, flags, ra); \ + return ret; \ } -/* frsqrtes - frsqrtes. */ -float64 helper_frsqrtes(CPUPPCState *env, float64 arg) -{ - /* "Estimate" the reciprocal with actual division. */ - float64 rets = float64_sqrt(arg, &env->fp_status); - float64 retd = float64r32_div(float64_one, rets, &env->fp_status); - int flags = get_float_exception_flags(&env->fp_status); - - if (unlikely(flags & float_flag_invalid)) { - float_invalid_op_sqrt(env, flags, 1, GETPC()); - } - if (unlikely(flags & float_flag_divbyzero)) { - /* Reciprocal of (square root of) zero. */ - float_zero_divide_excp(env, GETPC()); - } - - return retd; -} +FPU_FRE(FRE, float64_div) +FPU_FRE(FRES, float64r32_div) +FPU_FRSQRTE(FRSQRTE, float64_div) +FPU_FRSQRTE(FRSQRTES, float64r32_div) +FPU_HELPER(FADD, float64_add, addsub_flags_handler) +FPU_HELPER(FADDS, float64r32_add, addsub_flags_handler) +FPU_HELPER(FSUB, float64_sub, addsub_flags_handler) +FPU_HELPER(FSUBS, float64r32_sub, addsub_flags_handler) +FPU_HELPER(FMUL, float64_mul, mul_flags_handler) +FPU_HELPER(FMULS, float64r32_mul, mul_flags_handler) +FPU_HELPER(FDIV, float64_div, div_flags_handler) +FPU_HELPER(FDIVS, float64r32_div, div_flags_handler) /* fsel - fsel. */ uint64_t helper_FSEL(uint64_t a, uint64_t b, uint64_t c) @@ -903,7 +806,7 @@ uint64_t helper_FSEL(uint64_t a, uint64_t b, uint64_t c) } } -uint32_t helper_ftdiv(uint64_t fra, uint64_t frb) +uint32_t helper_FTDIV(uint64_t fra, uint64_t frb) { int fe_flag = 0; int fg_flag = 0; @@ -939,7 +842,7 @@ uint32_t helper_ftdiv(uint64_t fra, uint64_t frb) return 0x8 | (fg_flag ? 4 : 0) | (fe_flag ? 2 : 0); } -uint32_t helper_ftsqrt(uint64_t frb) +uint32_t helper_FTSQRT(uint64_t frb) { int fe_flag = 0; int fg_flag = 0; diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 86f97ee1e7..76b8f25c77 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -1,8 +1,8 @@ DEF_HELPER_FLAGS_3(raise_exception_err, TCG_CALL_NO_WG, noreturn, env, i32, i32) DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_WG, noreturn, env, i32) -DEF_HELPER_FLAGS_4(tw, TCG_CALL_NO_WG, void, env, tl, tl, i32) +DEF_HELPER_FLAGS_4(TW, TCG_CALL_NO_WG, void, env, tl, tl, i32) #if defined(TARGET_PPC64) -DEF_HELPER_FLAGS_4(td, TCG_CALL_NO_WG, void, env, tl, tl, i32) +DEF_HELPER_FLAGS_4(TD, TCG_CALL_NO_WG, void, env, tl, tl, i32) #endif DEF_HELPER_4(HASHST, void, env, tl, tl, tl) DEF_HELPER_4(HASHCHK, void, env, tl, tl, tl) @@ -30,6 +30,7 @@ DEF_HELPER_2(store_dawr0, void, env, tl) DEF_HELPER_2(store_dawrx0, void, env, tl) DEF_HELPER_2(store_mmcr0, void, env, tl) DEF_HELPER_2(store_mmcr1, void, env, tl) +DEF_HELPER_2(store_mmcrA, void, env, tl) DEF_HELPER_3(store_pmc, void, env, i32, i64) DEF_HELPER_2(read_pmc, tl, env, i32) DEF_HELPER_2(insns_inc, void, env, i32) @@ -52,14 +53,14 @@ DEF_HELPER_FLAGS_2(icbiep, TCG_CALL_NO_WG, void, env, tl) DEF_HELPER_5(lscbx, tl, env, tl, i32, i32, i32) #if defined(TARGET_PPC64) -DEF_HELPER_4(divdeu, i64, env, i64, i64, i32) -DEF_HELPER_4(divde, i64, env, i64, i64, i32) +DEF_HELPER_4(DIVDEU, i64, env, i64, i64, i32) +DEF_HELPER_4(DIVDE, i64, env, i64, i64, i32) #endif -DEF_HELPER_4(divweu, tl, env, tl, tl, i32) -DEF_HELPER_4(divwe, tl, env, tl, tl, i32) +DEF_HELPER_4(DIVWEU, tl, env, tl, tl, i32) +DEF_HELPER_4(DIVWE, tl, env, tl, tl, i32) -DEF_HELPER_FLAGS_1(popcntb, TCG_CALL_NO_RWG_SE, tl, tl) -DEF_HELPER_FLAGS_2(cmpb, TCG_CALL_NO_RWG_SE, tl, tl, tl) +DEF_HELPER_FLAGS_1(POPCNTB, TCG_CALL_NO_RWG_SE, tl, tl) +DEF_HELPER_FLAGS_2(CMPB, TCG_CALL_NO_RWG_SE, tl, tl, tl) DEF_HELPER_3(sraw, tl, env, tl, tl) DEF_HELPER_FLAGS_2(CFUGED, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_2(PDEPD, TCG_CALL_NO_RWG_SE, i64, i64, i64) @@ -67,12 +68,12 @@ DEF_HELPER_FLAGS_2(PEXTD, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_1(CDTBCD, TCG_CALL_NO_RWG_SE, tl, tl) DEF_HELPER_FLAGS_1(CBCDTD, TCG_CALL_NO_RWG_SE, tl, tl) #if defined(TARGET_PPC64) -DEF_HELPER_FLAGS_2(cmpeqb, TCG_CALL_NO_RWG_SE, i32, tl, tl) -DEF_HELPER_FLAGS_1(popcntw, TCG_CALL_NO_RWG_SE, tl, tl) -DEF_HELPER_FLAGS_2(bpermd, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(CMPEQB, TCG_CALL_NO_RWG_SE, i32, tl, tl) +DEF_HELPER_FLAGS_1(POPCNTW, TCG_CALL_NO_RWG_SE, tl, tl) +DEF_HELPER_FLAGS_2(BPERMD, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_3(srad, tl, env, tl, tl) -DEF_HELPER_FLAGS_0(darn32, TCG_CALL_NO_RWG, tl) -DEF_HELPER_FLAGS_0(darn64, TCG_CALL_NO_RWG, tl) +DEF_HELPER_FLAGS_0(DARN32, TCG_CALL_NO_RWG, tl) +DEF_HELPER_FLAGS_0(DARN64, TCG_CALL_NO_RWG, tl) #endif DEF_HELPER_FLAGS_1(cntlsw32, TCG_CALL_NO_RWG_SE, i32, i32) @@ -110,32 +111,32 @@ DEF_HELPER_2(friz, i64, env, i64) DEF_HELPER_2(frip, i64, env, i64) DEF_HELPER_2(frim, i64, env, i64) -DEF_HELPER_3(fadd, f64, env, f64, f64) -DEF_HELPER_3(fadds, f64, env, f64, f64) -DEF_HELPER_3(fsub, f64, env, f64, f64) -DEF_HELPER_3(fsubs, f64, env, f64, f64) -DEF_HELPER_3(fmul, f64, env, f64, f64) -DEF_HELPER_3(fmuls, f64, env, f64, f64) -DEF_HELPER_3(fdiv, f64, env, f64, f64) -DEF_HELPER_3(fdivs, f64, env, f64, f64) -DEF_HELPER_4(fmadd, i64, env, i64, i64, i64) -DEF_HELPER_4(fmsub, i64, env, i64, i64, i64) -DEF_HELPER_4(fnmadd, i64, env, i64, i64, i64) -DEF_HELPER_4(fnmsub, i64, env, i64, i64, i64) -DEF_HELPER_4(fmadds, i64, env, i64, i64, i64) -DEF_HELPER_4(fmsubs, i64, env, i64, i64, i64) -DEF_HELPER_4(fnmadds, i64, env, i64, i64, i64) -DEF_HELPER_4(fnmsubs, i64, env, i64, i64, i64) +DEF_HELPER_3(FADD, f64, env, f64, f64) +DEF_HELPER_3(FADDS, f64, env, f64, f64) +DEF_HELPER_3(FSUB, f64, env, f64, f64) +DEF_HELPER_3(FSUBS, f64, env, f64, f64) +DEF_HELPER_3(FMUL, f64, env, f64, f64) +DEF_HELPER_3(FMULS, f64, env, f64, f64) +DEF_HELPER_3(FDIV, f64, env, f64, f64) +DEF_HELPER_3(FDIVS, f64, env, f64, f64) +DEF_HELPER_4(FMADD, i64, env, i64, i64, i64) +DEF_HELPER_4(FMSUB, i64, env, i64, i64, i64) +DEF_HELPER_4(FNMADD, i64, env, i64, i64, i64) +DEF_HELPER_4(FNMSUB, i64, env, i64, i64, i64) +DEF_HELPER_4(FMADDS, i64, env, i64, i64, i64) +DEF_HELPER_4(FMSUBS, i64, env, i64, i64, i64) +DEF_HELPER_4(FNMADDS, i64, env, i64, i64, i64) +DEF_HELPER_4(FNMSUBS, i64, env, i64, i64, i64) DEF_HELPER_2(FSQRT, f64, env, f64) DEF_HELPER_2(FSQRTS, f64, env, f64) -DEF_HELPER_2(fre, i64, env, i64) -DEF_HELPER_2(fres, i64, env, i64) -DEF_HELPER_2(frsqrte, i64, env, i64) -DEF_HELPER_2(frsqrtes, i64, env, i64) +DEF_HELPER_2(FRE, i64, env, i64) +DEF_HELPER_2(FRES, i64, env, i64) +DEF_HELPER_2(FRSQRTE, i64, env, i64) +DEF_HELPER_2(FRSQRTES, i64, env, i64) DEF_HELPER_FLAGS_3(FSEL, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64) -DEF_HELPER_FLAGS_2(ftdiv, TCG_CALL_NO_RWG_SE, i32, i64, i64) -DEF_HELPER_FLAGS_1(ftsqrt, TCG_CALL_NO_RWG_SE, i32, i64) +DEF_HELPER_FLAGS_2(FTDIV, TCG_CALL_NO_RWG_SE, i32, i64, i64) +DEF_HELPER_FLAGS_1(FTSQRT, TCG_CALL_NO_RWG_SE, i32, i64) #define dh_alias_avr ptr #define dh_ctype_avr ppc_avr_t * @@ -267,12 +268,12 @@ DEF_HELPER_5(VMSUMSHS, void, env, avr, avr, avr, avr) DEF_HELPER_FLAGS_5(VMLADDUHM, TCG_CALL_NO_RWG, void, avr, avr, avr, avr, i32) DEF_HELPER_FLAGS_2(mtvscr, TCG_CALL_NO_RWG, void, env, i32) DEF_HELPER_FLAGS_1(mfvscr, TCG_CALL_NO_RWG, i32, env) -DEF_HELPER_3(lvebx, void, env, avr, tl) -DEF_HELPER_3(lvehx, void, env, avr, tl) -DEF_HELPER_3(lvewx, void, env, avr, tl) -DEF_HELPER_3(stvebx, void, env, avr, tl) -DEF_HELPER_3(stvehx, void, env, avr, tl) -DEF_HELPER_3(stvewx, void, env, avr, tl) +DEF_HELPER_3(LVEBX, void, env, avr, tl) +DEF_HELPER_3(LVEHX, void, env, avr, tl) +DEF_HELPER_3(LVEWX, void, env, avr, tl) +DEF_HELPER_3(STVEBX, void, env, avr, tl) +DEF_HELPER_3(STVEHX, void, env, avr, tl) +DEF_HELPER_3(STVEWX, void, env, avr, tl) #if defined(TARGET_PPC64) DEF_HELPER_4(lxvl, void, env, tl, vsr, tl) DEF_HELPER_4(lxvll, void, env, tl, vsr, tl) @@ -694,14 +695,12 @@ DEF_HELPER_FLAGS_3(store_sr, TCG_CALL_NO_RWG, void, env, tl, tl) DEF_HELPER_1(msgsnd, void, tl) DEF_HELPER_2(msgclr, void, env, tl) -DEF_HELPER_1(book3s_msgsnd, void, tl) +DEF_HELPER_2(book3s_msgsnd, void, env, tl) DEF_HELPER_2(book3s_msgclr, void, env, tl) #endif DEF_HELPER_4(dlmzb, tl, env, tl, tl, i32) #if !defined(CONFIG_USER_ONLY) -DEF_HELPER_2(rac, tl, env, tl) - DEF_HELPER_2(load_dcr, tl, env, tl) DEF_HELPER_3(store_dcr, void, env, tl, tl) #endif @@ -729,6 +728,9 @@ DEF_HELPER_2(book3s_msgsndp, void, env, tl) DEF_HELPER_2(book3s_msgclrp, void, env, tl) DEF_HELPER_1(load_tfmr, tl, env) DEF_HELPER_2(store_tfmr, void, env, tl) +DEF_HELPER_FLAGS_2(store_sprc, TCG_CALL_NO_RWG, void, env, tl) +DEF_HELPER_FLAGS_1(load_sprd, TCG_CALL_NO_RWG_SE, tl, env) +DEF_HELPER_FLAGS_2(store_sprd, TCG_CALL_NO_RWG, void, env, tl) #endif DEF_HELPER_2(store_sdr1, void, env, tl) DEF_HELPER_2(store_pidr, void, env, tl) @@ -819,3 +821,11 @@ DEF_HELPER_4(DSCLIQ, void, env, fprp, fprp, i32) DEF_HELPER_1(tbegin, void, env) DEF_HELPER_FLAGS_1(fixup_thrm, TCG_CALL_NO_RWG, void, env) + +#if !defined(CONFIG_USER_ONLY) +#if defined(TARGET_PPC64) +DEF_HELPER_1(clrbhrb, void, env) +DEF_HELPER_FLAGS_2(mfbhrbe, TCG_CALL_NO_WG, i64, env, i32) +DEF_HELPER_1(attn, noreturn, env) +#endif +#endif diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c index 25258986e3..02076e96fb 100644 --- a/target/ppc/helper_regs.c +++ b/target/ppc/helper_regs.c @@ -47,6 +47,39 @@ void hreg_swap_gpr_tgpr(CPUPPCState *env) env->tgpr[3] = tmp; } +#if defined(TARGET_PPC64) +static bool hreg_check_bhrb_enable(CPUPPCState *env) +{ + bool pr = !!(env->msr & (1 << MSR_PR)); + target_long mmcr0; + bool fcp; + bool hv; + + /* ISA 3.1 adds the PMCRA[BRHBRD] and problem state checks */ + if ((env->insns_flags2 & PPC2_ISA310) && + ((env->spr[SPR_POWER_MMCRA] & MMCRA_BHRBRD) || !pr)) { + return false; + } + + /* Check for BHRB "frozen" conditions */ + mmcr0 = env->spr[SPR_POWER_MMCR0]; + fcp = !!(mmcr0 & MMCR0_FCP); + if (mmcr0 & MMCR0_FCPC) { + hv = !!(env->msr & (1ull << MSR_HV)); + if (fcp) { + if (hv && pr) { + return false; + } + } else if (!hv && pr) { + return false; + } + } else if (fcp && pr) { + return false; + } + return true; +} +#endif + static uint32_t hreg_compute_pmu_hflags_value(CPUPPCState *env) { uint32_t hflags = 0; @@ -61,6 +94,9 @@ static uint32_t hreg_compute_pmu_hflags_value(CPUPPCState *env) if (env->spr[SPR_POWER_MMCR0] & MMCR0_PMCjCE) { hflags |= 1 << HFLAGS_PMCJCE; } + if (hreg_check_bhrb_enable(env)) { + hflags |= 1 << HFLAGS_BHRB_ENABLE; + } #ifndef CONFIG_USER_ONLY if (env->pmc_ins_cnt) { @@ -85,6 +121,7 @@ static uint32_t hreg_compute_pmu_hflags_mask(CPUPPCState *env) hflags_mask |= 1 << HFLAGS_PMCJCE; hflags_mask |= 1 << HFLAGS_INSN_CNT; hflags_mask |= 1 << HFLAGS_PMC_OTHER; + hflags_mask |= 1 << HFLAGS_BHRB_ENABLE; #endif return hflags_mask; } @@ -334,7 +371,7 @@ void check_tlb_flush(CPUPPCState *env, bool global) if (global && (env->tlb_need_flush & TLB_NEED_GLOBAL_FLUSH)) { env->tlb_need_flush &= ~TLB_NEED_GLOBAL_FLUSH; env->tlb_need_flush &= ~TLB_NEED_LOCAL_FLUSH; - tlb_flush_all_cpus(cs); + tlb_flush_all_cpus_synced(cs); return; } @@ -693,7 +730,6 @@ void register_6xx_7xx_soft_tlb(CPUPPCState *env, int nb_tlbs, int nb_ways) #if !defined(CONFIG_USER_ONLY) env->nb_tlb = nb_tlbs; env->nb_ways = nb_ways; - env->id_tlbs = 1; env->tlb_type = TLB_6XX; spr_register(env, SPR_DMISS, "DMISS", SPR_NOACCESS, SPR_NOACCESS, diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode index eada59f59f..ee33141476 100644 --- a/target/ppc/insn32.decode +++ b/target/ppc/insn32.decode @@ -20,12 +20,24 @@ &A frt fra frb frc rc:bool @A ...... frt:5 fra:5 frb:5 frc:5 ..... rc:1 &A +&A_tab frt fra frb rc:bool +@A_tab ...... frt:5 fra:5 frb:5 ..... ..... rc:1 &A_tab + +&A_tac frt fra frc rc:bool +@A_tac ...... frt:5 fra:5 ..... frc:5 ..... rc:1 &A_tac + &A_tb frt frb rc:bool @A_tb ...... frt:5 ..... frb:5 ..... ..... rc:1 &A_tb +&A_tab_bc rt ra rb bc +@A_tab_bc ...... rt:5 ra:5 rb:5 bc:5 ..... . &A_tab_bc + &D rt ra si:int64_t @D ...... rt:5 ra:5 si:s16 &D +&D_ui rt ra ui:uint64_t +@D_ui ...... rt:5 ra:5 ui:16 &D_ui + &D_bf bf l:bool ra imm @D_bfs ...... bf:3 . l:1 ra:5 imm:s16 &D_bf @D_bfu ...... bf:3 . l:1 ra:5 imm:16 &D_bf @@ -93,6 +105,9 @@ &X_sa rs ra @X_sa ...... rs:5 ra:5 ..... .......... . &X_sa +&X_sa_rc rs ra rc +@X_sa_rc ...... rs:5 ra:5 ..... .......... rc:1 &X_sa_rc + %x_frtp 22:4 !function=times_2 %x_frap 17:4 !function=times_2 %x_frbp 12:4 !function=times_2 @@ -124,6 +139,9 @@ &X_bf bf ra rb @X_bf ...... bf:3 .. ra:5 rb:5 .......... . &X_bf +&X_bf_b bf rb +@X_bf_b ...... bf:3 .. ..... rb:5 .......... . &X_bf_b + @X_bf_ap_bp ...... bf:3 .. ....0 ....0 .......... . &X_bf ra=%x_frap rb=%x_frbp @X_bf_a_bp ...... bf:3 .. ra:5 ....0 .......... . &X_bf rb=%x_frbp @@ -187,12 +205,18 @@ &X_a ra @X_a ...... ra:3 .. ..... ..... .......... . &X_a +&X_tl rt l +@X_tl ...... rt:5 ... l:2 ..... .......... . &X_tl + &XO rt ra rb oe:bool rc:bool @XO ...... rt:5 ra:5 rb:5 oe:1 ......... rc:1 &XO &XO_ta rt ra oe:bool rc:bool @XO_ta ...... rt:5 ra:5 ..... oe:1 ......... rc:1 &XO_ta +&XO_tab_rc rt ra rb rc:bool +@XO_tab_rc ...... rt:5 ra:5 rb:5 . ......... rc:1 &XO_tab_rc + %xx_xt 0:1 21:5 %xx_xb 1:1 11:5 %xx_xa 2:1 16:5 @@ -325,6 +349,19 @@ CMP 011111 ... - . ..... ..... 0000000000 - @X_bfl CMPL 011111 ... - . ..... ..... 0000100000 - @X_bfl CMPI 001011 ... - . ..... ................ @D_bfs CMPLI 001010 ... - . ..... ................ @D_bfu +CMPRB 011111 ... - . ..... ..... 0011000000 - @X_bfl +CMPEQB 011111 ... -- ..... ..... 0011100000 - @X_bf + +### Fixed-Point Trap Instructions + +TW 011111 ..... ..... ..... 0000000100 - @X +TD 011111 ..... ..... ..... 0001000100 - @X +TWI 000011 ..... ..... ................ @D +TDI 000010 ..... ..... ................ @D + +### Fixed-Point Select Instruction + +ISEL 011111 ..... ..... ..... ..... 01111 - @A_tab_bc ### Fixed-Point Arithmetic Instructions @@ -353,8 +390,73 @@ SUBFE 011111 ..... ..... ..... . 010001000 . @XO SUBFME 011111 ..... ..... ----- . 011101000 . @XO_ta SUBFZE 011111 ..... ..... ----- . 011001000 . @XO_ta +MULLI 000111 ..... ..... ................ @D +MULLW 011111 ..... ..... ..... 0 011101011 . @XO_tab_rc +MULLWO 011111 ..... ..... ..... 1 011101011 . @XO_tab_rc +MULHW 011111 ..... ..... ..... - 001001011 . @XO_tab_rc +MULHWU 011111 ..... ..... ..... - 000001011 . @XO_tab_rc + +DIVW 011111 ..... ..... ..... . 111101011 . @XO +DIVWU 011111 ..... ..... ..... . 111001011 . @XO +DIVWE 011111 ..... ..... ..... . 110101011 . @XO +DIVWEU 011111 ..... ..... ..... . 110001011 . @XO + +MODSW 011111 ..... ..... ..... 1100001011 - @X +MODUW 011111 ..... ..... ..... 0100001011 - @X +DARN 011111 ..... --- .. ----- 1011110011 - @X_tl +NEG 011111 ..... ..... ----- . 001101000 . @XO_ta + +MULLD 011111 ..... ..... ..... 0 011101001 . @XO_tab_rc +MULLDO 011111 ..... ..... ..... 1 011101001 . @XO_tab_rc +MULHD 011111 ..... ..... ..... - 001001001 . @XO_tab_rc +MULHDU 011111 ..... ..... ..... - 000001001 . @XO_tab_rc + +MADDLD 000100 ..... ..... ..... ..... 110011 @VA +MADDHD 000100 ..... ..... ..... ..... 110000 @VA +MADDHDU 000100 ..... ..... ..... ..... 110001 @VA + +DIVD 011111 ..... ..... ..... . 111101001 . @XO +DIVDU 011111 ..... ..... ..... . 111001001 . @XO +DIVDE 011111 ..... ..... ..... . 110101001 . @XO +DIVDEU 011111 ..... ..... ..... . 110001001 . @XO + +MODSD 011111 ..... ..... ..... 1100001001 - @X +MODUD 011111 ..... ..... ..... 0100001001 - @X + ## Fixed-Point Logical Instructions +ANDI_ 011100 ..... ..... ................ @D_ui +ANDIS_ 011101 ..... ..... ................ @D_ui +ORI 011000 ..... ..... ................ @D_ui +ORIS 011001 ..... ..... ................ @D_ui +XORI 011010 ..... ..... ................ @D_ui +XORIS 011011 ..... ..... ................ @D_ui + +AND 011111 ..... ..... ..... 0000011100 . @X_rc +ANDC 011111 ..... ..... ..... 0000111100 . @X_rc +NAND 011111 ..... ..... ..... 0111011100 . @X_rc +OR 011111 ..... ..... ..... 0110111100 . @X_rc +ORC 011111 ..... ..... ..... 0110011100 . @X_rc +NOR 011111 ..... ..... ..... 0001111100 . @X_rc +XOR 011111 ..... ..... ..... 0100111100 . @X_rc +EQV 011111 ..... ..... ..... 0100011100 . @X_rc +CMPB 011111 ..... ..... ..... 0111111100 . @X_rc + +EXTSB 011111 ..... ..... ----- 1110111010 . @X_sa_rc +EXTSH 011111 ..... ..... ----- 1110011010 . @X_sa_rc +EXTSW 011111 ..... ..... ----- 1111011010 . @X_sa_rc +CNTLZW 011111 ..... ..... ----- 0000011010 . @X_sa_rc +CNTTZW 011111 ..... ..... ----- 1000011010 . @X_sa_rc +CNTLZD 011111 ..... ..... ----- 0000111010 . @X_sa_rc +CNTTZD 011111 ..... ..... ----- 1000111010 . @X_sa_rc +POPCNTB 011111 ..... ..... ----- 0001111010 . @X_sa_rc + +POPCNTW 011111 ..... ..... ----- 0101111010 - @X_sa +POPCNTD 011111 ..... ..... ----- 0111111010 - @X_sa +PRTYW 011111 ..... ..... ----- 0010011010 - @X_sa +PRTYD 011111 ..... ..... ----- 0010111010 - @X_sa + +BPERMD 011111 ..... ..... ..... 0011111100 - @X CFUGED 011111 ..... ..... ..... 0011011100 - @X CNTLZDM 011111 ..... ..... ..... 0000111011 - @X CNTTZDM 011111 ..... ..... ..... 1000111011 - @X @@ -400,9 +502,42 @@ STFDUX 011111 ..... ...... .... 1011110111 - @X ### Floating-Point Arithmetic Instructions +FADD 111111 ..... ..... ..... ----- 10101 . @A_tab +FADDS 111011 ..... ..... ..... ----- 10101 . @A_tab + +FSUB 111111 ..... ..... ..... ----- 10100 . @A_tab +FSUBS 111011 ..... ..... ..... ----- 10100 . @A_tab + +FMUL 111111 ..... ..... ----- ..... 11001 . @A_tac +FMULS 111011 ..... ..... ----- ..... 11001 . @A_tac + +FDIV 111111 ..... ..... ..... ----- 10010 . @A_tab +FDIVS 111011 ..... ..... ..... ----- 10010 . @A_tab + FSQRT 111111 ..... ----- ..... ----- 10110 . @A_tb FSQRTS 111011 ..... ----- ..... ----- 10110 . @A_tb +FRE 111111 ..... ----- ..... ----- 11000 . @A_tb +FRES 111011 ..... ----- ..... ----- 11000 . @A_tb + +FRSQRTE 111111 ..... ----- ..... ----- 11010 . @A_tb +FRSQRTES 111011 ..... ----- ..... ----- 11010 . @A_tb + +FTDIV 111111 ... -- ..... ..... 0010000000 - @X_bf +FTSQRT 111111 ... -- ----- ..... 0010100000 - @X_bf_b + +FMADD 111111 ..... ..... ..... ..... 11101 . @A +FMADDS 111011 ..... ..... ..... ..... 11101 . @A + +FMSUB 111111 ..... ..... ..... ..... 11100 . @A +FMSUBS 111011 ..... ..... ..... ..... 11100 . @A + +FNMADD 111111 ..... ..... ..... ..... 11111 . @A +FNMADDS 111011 ..... ..... ..... ..... 11111 . @A + +FNMSUB 111111 ..... ..... ..... ..... 11110 . @A +FNMSUBS 111011 ..... ..... ..... ..... 11110 . @A + ### Floating-Point Select Instruction FSEL 111111 ..... ..... ..... ..... 10111 . @A @@ -526,6 +661,23 @@ DSCRIQ 111111 ..... ..... ...... 001100010 . @Z22_tap_sh_rc VPMSUMD 000100 ..... ..... ..... 10011001000 @VX +## Vector Load/Store Instructions + +LVEBX 011111 ..... ..... ..... 0000000111 - @X +LVEHX 011111 ..... ..... ..... 0000100111 - @X +LVEWX 011111 ..... ..... ..... 0001000111 - @X +LVX 011111 ..... ..... ..... 0001100111 - @X +LVXL 011111 ..... ..... ..... 0101100111 - @X + +STVEBX 011111 ..... ..... ..... 0010000111 - @X +STVEHX 011111 ..... ..... ..... 0010100111 - @X +STVEWX 011111 ..... ..... ..... 0011000111 - @X +STVX 011111 ..... ..... ..... 0011100111 - @X +STVXL 011111 ..... ..... ..... 0111100111 - @X + +LVSL 011111 ..... ..... ..... 0000000110 - @X +LVSR 011111 ..... ..... ..... 0000100110 - @X + ## Vector Integer Instructions VCMPEQUB 000100 ..... ..... ..... . 0000000110 @VC @@ -557,6 +709,17 @@ VCMPNEZW 000100 ..... ..... ..... . 0110000111 @VC VCMPSQ 000100 ... -- ..... ..... 00101000001 @VX_bf VCMPUQ 000100 ... -- ..... ..... 00100000001 @VX_bf +## Vector Integer Logical Instructions + +VAND 000100 ..... ..... ..... 10000000100 @VX +VANDC 000100 ..... ..... ..... 10001000100 @VX +VNAND 000100 ..... ..... ..... 10110000100 @VX +VOR 000100 ..... ..... ..... 10010000100 @VX +VORC 000100 ..... ..... ..... 10101000100 @VX +VNOR 000100 ..... ..... ..... 10100000100 @VX +VXOR 000100 ..... ..... ..... 10011000100 @VX +VEQV 000100 ..... ..... ..... 11010000100 @VX + ## Vector Integer Average Instructions VAVGSB 000100 ..... ..... ..... 10100000010 @VX @@ -689,6 +852,28 @@ VEXTSD2Q 000100 ..... 11011 ..... 11000000010 @VX_tb VNEGD 000100 ..... 00111 ..... 11000000010 @VX_tb VNEGW 000100 ..... 00110 ..... 11000000010 @VX_tb +## Vector Integer Maximum/Minimum Instructions + +VMAXUB 000100 ..... ..... ..... 00000000010 @VX +VMAXUH 000100 ..... ..... ..... 00001000010 @VX +VMAXUW 000100 ..... ..... ..... 00010000010 @VX +VMAXUD 000100 ..... ..... ..... 00011000010 @VX + +VMAXSB 000100 ..... ..... ..... 00100000010 @VX +VMAXSH 000100 ..... ..... ..... 00101000010 @VX +VMAXSW 000100 ..... ..... ..... 00110000010 @VX +VMAXSD 000100 ..... ..... ..... 00111000010 @VX + +VMINUB 000100 ..... ..... ..... 01000000010 @VX +VMINUH 000100 ..... ..... ..... 01001000010 @VX +VMINUW 000100 ..... ..... ..... 01010000010 @VX +VMINUD 000100 ..... ..... ..... 01011000010 @VX + +VMINSB 000100 ..... ..... ..... 01100000010 @VX +VMINSH 000100 ..... ..... ..... 01101000010 @VX +VMINSW 000100 ..... ..... ..... 01110000010 @VX +VMINSD 000100 ..... ..... ..... 01111000010 @VX + ## Vector Mask Manipulation Instructions MTVSRBM 000100 ..... 10000 ..... 11001000010 @VX_tb @@ -998,3 +1183,22 @@ MSGSND 011111 ----- ----- ..... 0011001110 - @X_rb MSGCLRP 011111 ----- ----- ..... 0010101110 - @X_rb MSGSNDP 011111 ----- ----- ..... 0010001110 - @X_rb MSGSYNC 011111 ----- ----- ----- 1101110110 - + +# Memory Barrier Instructions + +&X_sync l sc +@X_sync ...... .. l:3 ... sc:2 ..... .......... . &X_sync +SYNC 011111 -- ... --- .. ----- 1001010110 - @X_sync +EIEIO 011111 ----- ----- ----- 1101010110 - + +# Branch History Rolling Buffer (BHRB) Instructions + +&XFX_bhrbe rt bhrbe +@XFX_bhrbe ...... rt:5 bhrbe:10 .......... - &XFX_bhrbe + +MFBHRBE 011111 ..... ..... ..... 0100101110 - @XFX_bhrbe +CLRBHRB 011111 ----- ----- ----- 0110101110 - + +## Misc POWER instructions + +ATTN 000000 00000 00000 00000 0100000000 0 diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c index 0a5c3e78a4..2c6b633d65 100644 --- a/target/ppc/int_helper.c +++ b/target/ppc/int_helper.c @@ -44,7 +44,7 @@ static inline void helper_update_ov_legacy(CPUPPCState *env, int ov) } } -target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, +target_ulong helper_DIVWEU(CPUPPCState *env, target_ulong ra, target_ulong rb, uint32_t oe) { uint64_t rt = 0; @@ -71,7 +71,7 @@ target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb, return (target_ulong)rt; } -target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, +target_ulong helper_DIVWE(CPUPPCState *env, target_ulong ra, target_ulong rb, uint32_t oe) { int64_t rt = 0; @@ -101,7 +101,7 @@ target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb, #if defined(TARGET_PPC64) -uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) +uint64_t helper_DIVDEU(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) { uint64_t rt = 0; int overflow = 0; @@ -120,7 +120,7 @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe) return rt; } -uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) +uint64_t helper_DIVDE(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) { uint64_t rt = 0; int64_t ra = (int64_t)rau; @@ -159,7 +159,7 @@ uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe) /* When you XOR the pattern and there is a match, that byte will be zero */ #define hasvalue(x, n) (haszero((x) ^ pattern(n))) -uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) +uint32_t helper_CMPEQB(target_ulong ra, target_ulong rb) { return hasvalue(rb, ra) ? CRF_GT : 0; } @@ -171,7 +171,7 @@ uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb) /* * Return a random number. */ -uint64_t helper_darn32(void) +uint64_t helper_DARN32(void) { Error *err = NULL; uint32_t ret; @@ -186,7 +186,7 @@ uint64_t helper_darn32(void) return ret; } -uint64_t helper_darn64(void) +uint64_t helper_DARN64(void) { Error *err = NULL; uint64_t ret; @@ -201,7 +201,7 @@ uint64_t helper_darn64(void) return ret; } -uint64_t helper_bpermd(uint64_t rs, uint64_t rb) +uint64_t helper_BPERMD(uint64_t rs, uint64_t rb) { int i; uint64_t ra = 0; @@ -219,7 +219,7 @@ uint64_t helper_bpermd(uint64_t rs, uint64_t rb) #endif -target_ulong helper_cmpb(target_ulong rs, target_ulong rb) +target_ulong helper_CMPB(target_ulong rs, target_ulong rb) { target_ulong mask = 0xff; target_ulong ra = 0; @@ -288,7 +288,7 @@ target_ulong helper_srad(CPUPPCState *env, target_ulong value, #endif #if defined(TARGET_PPC64) -target_ulong helper_popcntb(target_ulong val) +target_ulong helper_POPCNTB(target_ulong val) { /* Note that we don't fold past bytes */ val = (val & 0x5555555555555555ULL) + ((val >> 1) & @@ -300,7 +300,7 @@ target_ulong helper_popcntb(target_ulong val) return val; } -target_ulong helper_popcntw(target_ulong val) +target_ulong helper_POPCNTW(target_ulong val) { /* Note that we don't fold past words. */ val = (val & 0x5555555555555555ULL) + ((val >> 1) & @@ -316,7 +316,7 @@ target_ulong helper_popcntw(target_ulong val) return val; } #else -target_ulong helper_popcntb(target_ulong val) +target_ulong helper_POPCNTB(target_ulong val) { /* Note that we don't fold past bytes */ val = (val & 0x55555555) + ((val >> 1) & 0x55555555); diff --git a/target/ppc/internal.h b/target/ppc/internal.h index 5b20ecbd33..20fb2ec593 100644 --- a/target/ppc/internal.h +++ b/target/ppc/internal.h @@ -18,7 +18,9 @@ #ifndef PPC_INTERNAL_H #define PPC_INTERNAL_H +#include "exec/breakpoint.h" #include "hw/registerfields.h" +#include "exec/page-protection.h" /* PM instructions */ typedef enum { @@ -232,51 +234,23 @@ void destroy_ppc_opcodes(PowerPCCPU *cpu); void ppc_gdb_init(CPUState *cs, PowerPCCPUClass *ppc); const gchar *ppc_gdb_arch_name(CPUState *cs); -/** - * prot_for_access_type: - * @access_type: Access type - * - * Return the protection bit required for the given access type. - */ -static inline int prot_for_access_type(MMUAccessType access_type) +#ifndef CONFIG_USER_ONLY + +/* Check if permission bit required for the access_type is set in prot */ +static inline int check_prot_access_type(int prot, MMUAccessType access_type) { - switch (access_type) { - case MMU_INST_FETCH: - return PAGE_EXEC; - case MMU_DATA_LOAD: - return PAGE_READ; - case MMU_DATA_STORE: - return PAGE_WRITE; - } - g_assert_not_reached(); + return prot & (1 << access_type); } -#ifndef CONFIG_USER_ONLY - /* PowerPC MMU emulation */ -typedef struct mmu_ctx_t mmu_ctx_t; - bool ppc_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, hwaddr *raddrp, int *psizep, int *protp, int mmu_idx, bool guest_visible); -int get_physical_address_wtlb(CPUPPCState *env, mmu_ctx_t *ctx, - target_ulong eaddr, - MMUAccessType access_type, int type, - int mmu_idx); + /* Software driven TLB helpers */ int ppc6xx_tlb_getnum(CPUPPCState *env, target_ulong eaddr, int way, int is_code); -/* Context used internally during MMU translations */ -struct mmu_ctx_t { - hwaddr raddr; /* Real address */ - hwaddr eaddr; /* Effective address */ - int prot; /* Protection bits */ - hwaddr hash[2]; /* Pagetable hash values */ - target_ulong ptem; /* Virtual segment ID | API */ - int key; /* Access key */ - int nx; /* Non-execute area */ -}; #endif /* !CONFIG_USER_ONLY */ diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 63930d4a77..005f2239f3 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -49,6 +49,8 @@ #include "elf.h" #include "sysemu/kvm_int.h" +#include CONFIG_DEVICES + #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/" #define DEBUG_RETURN_GUEST 0 @@ -71,7 +73,6 @@ static int cap_hior; static int cap_one_reg; static int cap_epr; static int cap_ppc_watchdog; -static int cap_papr; static int cap_htab_fd; static int cap_fixup_hcalls; static int cap_htm; /* Hardware transactional memory support */ @@ -90,6 +91,12 @@ static int cap_fwnmi; static int cap_rpt_invalidate; static int cap_ail_mode_3; +#ifdef CONFIG_PSERIES +static int cap_papr; +#else +#define cap_papr (0) +#endif + static uint32_t debug_inst_opcode; /* @@ -858,9 +865,7 @@ int kvmppc_put_books_sregs(PowerPCCPU *cpu) sregs.pvr = env->spr[SPR_PVR]; if (cpu->vhyp) { - PPCVirtualHypervisorClass *vhc = - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); - sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp); + sregs.u.s.sdr1 = cpu->vhyp_class->encode_hpt_for_kvm_pr(cpu->vhyp); } else { sregs.u.s.sdr1 = env->spr[SPR_SDR1]; } @@ -1668,7 +1673,7 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) trace_kvm_handle_halt(); ret = kvmppc_handle_halt(cpu); break; -#if defined(TARGET_PPC64) +#if defined(CONFIG_PSERIES) case KVM_EXIT_PAPR_HCALL: trace_kvm_handle_papr_hcall(run->papr_hcall.nr); run->papr_hcall.ret = spapr_hypercall(cpu, @@ -1698,7 +1703,7 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) ret = 0; break; -#if defined(TARGET_PPC64) +#if defined(CONFIG_PSERIES) case KVM_EXIT_NMI: trace_kvm_handle_nmi_exception(); ret = kvm_handle_nmi(cpu, run); @@ -2054,6 +2059,7 @@ void kvmppc_enable_h_rpt_invalidate(void) kvmppc_enable_hcall(kvm_state, H_RPT_INVALIDATE); } +#ifdef CONFIG_PSERIES void kvmppc_set_papr(PowerPCCPU *cpu) { CPUState *cs = CPU(cpu); @@ -2075,6 +2081,7 @@ void kvmppc_set_papr(PowerPCCPU *cpu) */ cap_papr = 1; } +#endif int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr) { @@ -2837,7 +2844,7 @@ int kvm_arch_msi_data_to_gsi(uint32_t data) return data & 0xffff; } -#if defined(TARGET_PPC64) +#if defined(CONFIG_PSERIES) int kvm_handle_nmi(PowerPCCPU *cpu, struct kvm_run *run) { uint16_t flags = run->flags & KVM_RUN_PPC_NMI_DISP_MASK; diff --git a/target/ppc/machine.c b/target/ppc/machine.c index 203fe28e01..731dd8df35 100644 --- a/target/ppc/machine.c +++ b/target/ppc/machine.c @@ -333,7 +333,7 @@ static int cpu_post_load(void *opaque, int version_id) * triggered types (including HDEC) would need to carry more state. */ cpu_ppc_store_decr(env, env->spr[SPR_DECR]); - pmu_mmcr01_updated(env); + pmu_mmcr01a_updated(env); } return 0; @@ -711,6 +711,26 @@ static const VMStateDescription vmstate_reservation = { } }; +#ifdef TARGET_PPC64 +static bool bhrb_needed(void *opaque) +{ + PowerPCCPU *cpu = opaque; + return (cpu->env.flags & POWERPC_FLAG_BHRB) != 0; +} + +static const VMStateDescription vmstate_bhrb = { + .name = "cpu/bhrb", + .version_id = 1, + .minimum_version_id = 1, + .needed = bhrb_needed, + .fields = (VMStateField[]) { + VMSTATE_UINTTL(env.bhrb_offset, PowerPCCPU), + VMSTATE_UINT64_ARRAY(env.bhrb, PowerPCCPU, BHRB_MAX_NUM_ENTRIES), + VMSTATE_END_OF_LIST() + } +}; +#endif + const VMStateDescription vmstate_ppc_cpu = { .name = "cpu", .version_id = 5, @@ -756,6 +776,7 @@ const VMStateDescription vmstate_ppc_cpu = { #ifdef TARGET_PPC64 &vmstate_tm, &vmstate_slb, + &vmstate_bhrb, #endif /* TARGET_PPC64 */ &vmstate_tlb6xx, &vmstate_tlbemb, diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c index ea7e8443a8..f88155ad45 100644 --- a/target/ppc/mem_helper.c +++ b/target/ppc/mem_helper.c @@ -404,9 +404,9 @@ target_ulong helper_lscbx(CPUPPCState *env, target_ulong addr, uint32_t reg, } \ } #define I(x) (x) -LVE(lvebx, cpu_ldub_data_ra, I, u8) -LVE(lvehx, cpu_lduw_data_ra, bswap16, u16) -LVE(lvewx, cpu_ldl_data_ra, bswap32, u32) +LVE(LVEBX, cpu_ldub_data_ra, I, u8) +LVE(LVEHX, cpu_lduw_data_ra, bswap16, u16) +LVE(LVEWX, cpu_ldl_data_ra, bswap32, u32) #undef I #undef LVE @@ -432,9 +432,9 @@ LVE(lvewx, cpu_ldl_data_ra, bswap32, u32) } \ } #define I(x) (x) -STVE(stvebx, cpu_stb_data_ra, I, u8) -STVE(stvehx, cpu_stw_data_ra, bswap16, u16) -STVE(stvewx, cpu_stl_data_ra, bswap32, u32) +STVE(STVEBX, cpu_stb_data_ra, I, u8) +STVE(STVEHX, cpu_stw_data_ra, bswap16, u16) +STVE(STVEWX, cpu_stl_data_ra, bswap32, u32) #undef I #undef LVE diff --git a/target/ppc/meson.build b/target/ppc/meson.build index 0b89f9b89f..db3b7a0c33 100644 --- a/target/ppc/meson.build +++ b/target/ppc/meson.build @@ -37,6 +37,7 @@ ppc_system_ss.add(files( 'arch_dump.c', 'machine.c', 'mmu-hash32.c', + 'mmu-booke.c', 'mmu_common.c', 'ppc-qmp-cmds.c', )) diff --git a/target/ppc/misc_helper.c b/target/ppc/misc_helper.c index 58e808dc96..fa47be2298 100644 --- a/target/ppc/misc_helper.c +++ b/target/ppc/misc_helper.c @@ -150,6 +150,17 @@ void helper_msr_facility_check(CPUPPCState *env, uint32_t bit, #if !defined(CONFIG_USER_ONLY) +#ifdef TARGET_PPC64 +static void helper_mmcr0_facility_check(CPUPPCState *env, uint32_t bit, + uint32_t sprn, uint32_t cause) +{ + if (FIELD_EX64(env->msr, MSR, PR) && + !(env->spr[SPR_POWER_MMCR0] & (1ULL << bit))) { + raise_fu_exception(env, bit, sprn, cause, GETPC()); + } +} +#endif + void helper_store_sdr1(CPUPPCState *env, target_ulong val) { if (env->spr[SPR_SDR1] != val) { @@ -162,6 +173,7 @@ void helper_store_sdr1(CPUPPCState *env, target_ulong val) void helper_store_ptcr(CPUPPCState *env, target_ulong val) { if (env->spr[SPR_PTCR] != val) { + CPUState *cs = env_cpu(env); PowerPCCPU *cpu = env_archcpu(env); target_ulong ptcr_mask = PTCR_PATB | PTCR_PATS; target_ulong patbsize = val & PTCR_PATS; @@ -183,8 +195,19 @@ void helper_store_ptcr(CPUPPCState *env, target_ulong val) return; } - env->spr[SPR_PTCR] = val; - tlb_flush(env_cpu(env)); + if (cs->nr_threads == 1 || !(env->flags & POWERPC_FLAG_SMT_1LPAR)) { + env->spr[SPR_PTCR] = val; + tlb_flush(cs); + } else { + CPUState *ccs; + + THREAD_SIBLING_FOREACH(cs, ccs) { + PowerPCCPU *ccpu = POWERPC_CPU(ccs); + CPUPPCState *cenv = &ccpu->env; + cenv->spr[SPR_PTCR] = val; + tlb_flush(ccs); + } + } } } @@ -284,6 +307,72 @@ void helper_store_dpdes(CPUPPCState *env, target_ulong val) } bql_unlock(); } + +/* Indirect SCOM (SPRC/SPRD) access to SCRATCH0-7 are implemented. */ +void helper_store_sprc(CPUPPCState *env, target_ulong val) +{ + if (val & ~0x3f8ULL) { + qemu_log_mask(LOG_GUEST_ERROR, "Invalid SPRC register value " + TARGET_FMT_lx"\n", val); + return; + } + env->spr[SPR_POWER_SPRC] = val; +} + +target_ulong helper_load_sprd(CPUPPCState *env) +{ + target_ulong sprc = env->spr[SPR_POWER_SPRC]; + + switch (sprc & 0x3c0) { + case 0: /* SCRATCH0-7 */ + return env->scratch[(sprc >> 3) & 0x7]; + default: + qemu_log_mask(LOG_UNIMP, "mfSPRD: Unimplemented SPRC:0x" + TARGET_FMT_lx"\n", sprc); + break; + } + return 0; +} + +static void do_store_scratch(CPUPPCState *env, int nr, target_ulong val) +{ + CPUState *cs = env_cpu(env); + CPUState *ccs; + uint32_t nr_threads = cs->nr_threads; + + /* + * Log stores to SCRATCH, because some firmware uses these for debugging + * and logging, but they would normally be read by the BMC, which is + * not implemented in QEMU yet. This gives a way to get at the information. + * Could also dump these upon checkstop. + */ + qemu_log("SPRD write 0x" TARGET_FMT_lx " to SCRATCH%d\n", val, nr); + + if (nr_threads == 1) { + env->scratch[nr] = val; + return; + } + + THREAD_SIBLING_FOREACH(cs, ccs) { + CPUPPCState *cenv = &POWERPC_CPU(ccs)->env; + cenv->scratch[nr] = val; + } +} + +void helper_store_sprd(CPUPPCState *env, target_ulong val) +{ + target_ulong sprc = env->spr[SPR_POWER_SPRC]; + + switch (sprc & 0x3c0) { + case 0: /* SCRATCH0-7 */ + do_store_scratch(env, (sprc >> 3) & 0x7, val); + break; + default: + qemu_log_mask(LOG_UNIMP, "mfSPRD: Unimplemented SPRC:0x" + TARGET_FMT_lx"\n", sprc); + break; + } +} #endif /* defined(TARGET_PPC64) */ void helper_store_pidr(CPUPPCState *env, target_ulong val) @@ -363,3 +452,42 @@ void helper_fixup_thrm(CPUPPCState *env) env->spr[i] = v; } } + +#if !defined(CONFIG_USER_ONLY) +#if defined(TARGET_PPC64) +void helper_clrbhrb(CPUPPCState *env) +{ + helper_hfscr_facility_check(env, HFSCR_BHRB, "clrbhrb", FSCR_IC_BHRB); + + helper_mmcr0_facility_check(env, MMCR0_BHRBA_NR, 0, FSCR_IC_BHRB); + + if (env->flags & POWERPC_FLAG_BHRB) { + memset(env->bhrb, 0, sizeof(env->bhrb)); + } +} + +uint64_t helper_mfbhrbe(CPUPPCState *env, uint32_t bhrbe) +{ + unsigned int index; + + helper_hfscr_facility_check(env, HFSCR_BHRB, "mfbhrbe", FSCR_IC_BHRB); + + helper_mmcr0_facility_check(env, MMCR0_BHRBA_NR, 0, FSCR_IC_BHRB); + + if (!(env->flags & POWERPC_FLAG_BHRB) || + (bhrbe >= env->bhrb_num_entries) || + (env->spr[SPR_POWER_MMCR0] & MMCR0_PMAE)) { + return 0; + } + + /* + * Note: bhrb_offset is the byte offset for writing the + * next entry (over the oldest entry), which is why we + * must offset bhrbe by 1 to get to the 0th entry. + */ + index = ((env->bhrb_offset / sizeof(uint64_t)) - (bhrbe + 1)) % + env->bhrb_num_entries; + return env->bhrb[index]; +} +#endif +#endif diff --git a/target/ppc/mmu-book3s-v3.h b/target/ppc/mmu-book3s-v3.h index 674377a19e..f3f7993958 100644 --- a/target/ppc/mmu-book3s-v3.h +++ b/target/ppc/mmu-book3s-v3.h @@ -108,9 +108,7 @@ static inline hwaddr ppc_hash64_hpt_mask(PowerPCCPU *cpu) uint64_t base; if (cpu->vhyp) { - PPCVirtualHypervisorClass *vhc = - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); - return vhc->hpt_mask(cpu->vhyp); + return cpu->vhyp_class->hpt_mask(cpu->vhyp); } if (cpu->env.mmu_model == POWERPC_MMU_3_00) { ppc_v3_pate_t pate; diff --git a/target/ppc/mmu-booke.c b/target/ppc/mmu-booke.c new file mode 100644 index 0000000000..55e5dd7c6b --- /dev/null +++ b/target/ppc/mmu-booke.c @@ -0,0 +1,531 @@ +/* + * PowerPC BookE MMU, TLB emulation helpers for QEMU. + * + * Copyright (c) 2003-2007 Jocelyn Mayer + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "exec/page-protection.h" +#include "exec/log.h" +#include "cpu.h" +#include "internal.h" +#include "mmu-booke.h" + +/* Generic TLB check function for embedded PowerPC implementations */ +static bool ppcemb_tlb_check(CPUPPCState *env, ppcemb_tlb_t *tlb, + hwaddr *raddrp, + target_ulong address, uint32_t pid, int i) +{ + target_ulong mask; + + /* Check valid flag */ + if (!(tlb->prot & PAGE_VALID)) { + return false; + } + mask = ~(tlb->size - 1); + qemu_log_mask(CPU_LOG_MMU, "%s: TLB %d address " TARGET_FMT_lx + " PID %u <=> " TARGET_FMT_lx " " TARGET_FMT_lx " %u %x\n", + __func__, i, address, pid, tlb->EPN, + mask, (uint32_t)tlb->PID, tlb->prot); + /* Check PID */ + if (tlb->PID != 0 && tlb->PID != pid) { + return false; + } + /* Check effective address */ + if ((address & mask) != tlb->EPN) { + return false; + } + *raddrp = (tlb->RPN & mask) | (address & ~mask); + return true; +} + +/* Generic TLB search function for PowerPC embedded implementations */ +int ppcemb_tlb_search(CPUPPCState *env, target_ulong address, uint32_t pid) +{ + ppcemb_tlb_t *tlb; + hwaddr raddr; + int i; + + for (i = 0; i < env->nb_tlb; i++) { + tlb = &env->tlb.tlbe[i]; + if (ppcemb_tlb_check(env, tlb, &raddr, address, pid, i)) { + return i; + } + } + return -1; +} + +int mmu40x_get_physical_address(CPUPPCState *env, hwaddr *raddr, int *prot, + target_ulong address, + MMUAccessType access_type) +{ + ppcemb_tlb_t *tlb; + int i, ret, zsel, zpr, pr; + + ret = -1; + pr = FIELD_EX64(env->msr, MSR, PR); + for (i = 0; i < env->nb_tlb; i++) { + tlb = &env->tlb.tlbe[i]; + if (!ppcemb_tlb_check(env, tlb, raddr, address, + env->spr[SPR_40x_PID], i)) { + continue; + } + zsel = (tlb->attr >> 4) & 0xF; + zpr = (env->spr[SPR_40x_ZPR] >> (30 - (2 * zsel))) & 0x3; + qemu_log_mask(CPU_LOG_MMU, + "%s: TLB %d zsel %d zpr %d ty %d attr %08x\n", + __func__, i, zsel, zpr, access_type, tlb->attr); + /* Check execute enable bit */ + switch (zpr) { + case 0x2: + if (pr != 0) { + goto check_perms; + } + /* fall through */ + case 0x3: + /* All accesses granted */ + *prot = PAGE_RWX; + ret = 0; + break; + + case 0x0: + if (pr != 0) { + /* Raise Zone protection fault. */ + env->spr[SPR_40x_ESR] = 1 << 22; + *prot = 0; + ret = -2; + break; + } + /* fall through */ + case 0x1: +check_perms: + /* Check from TLB entry */ + *prot = tlb->prot; + if (check_prot_access_type(*prot, access_type)) { + ret = 0; + } else { + env->spr[SPR_40x_ESR] = 0; + ret = -2; + } + break; + } + } + qemu_log_mask(CPU_LOG_MMU, "%s: access %s " TARGET_FMT_lx " => " + HWADDR_FMT_plx " %d %d\n", __func__, + ret < 0 ? "refused" : "granted", address, + ret < 0 ? 0 : *raddr, *prot, ret); + + return ret; +} + +static bool mmubooke_check_pid(CPUPPCState *env, ppcemb_tlb_t *tlb, + hwaddr *raddr, target_ulong addr, int i) +{ + if (ppcemb_tlb_check(env, tlb, raddr, addr, env->spr[SPR_BOOKE_PID], i)) { + if (!env->nb_pids) { + /* Extend the physical address to 36 bits */ + *raddr |= (uint64_t)(tlb->RPN & 0xF) << 32; + } + return true; + } else if (!env->nb_pids) { + return false; + } + if (env->spr[SPR_BOOKE_PID1] && + ppcemb_tlb_check(env, tlb, raddr, addr, env->spr[SPR_BOOKE_PID1], i)) { + return true; + } + if (env->spr[SPR_BOOKE_PID2] && + ppcemb_tlb_check(env, tlb, raddr, addr, env->spr[SPR_BOOKE_PID2], i)) { + return true; + } + return false; +} + +static int mmubooke_check_tlb(CPUPPCState *env, ppcemb_tlb_t *tlb, + hwaddr *raddr, int *prot, target_ulong address, + MMUAccessType access_type, int i) +{ + if (!mmubooke_check_pid(env, tlb, raddr, address, i)) { + qemu_log_mask(CPU_LOG_MMU, "%s: TLB entry not found\n", __func__); + return -1; + } + + /* Check the address space */ + if ((access_type == MMU_INST_FETCH ? + FIELD_EX64(env->msr, MSR, IR) : + FIELD_EX64(env->msr, MSR, DR)) != (tlb->attr & 1)) { + qemu_log_mask(CPU_LOG_MMU, "%s: AS doesn't match\n", __func__); + return -1; + } + + if (FIELD_EX64(env->msr, MSR, PR)) { + *prot = tlb->prot & 0xF; + } else { + *prot = (tlb->prot >> 4) & 0xF; + } + if (check_prot_access_type(*prot, access_type)) { + qemu_log_mask(CPU_LOG_MMU, "%s: good TLB!\n", __func__); + return 0; + } + + qemu_log_mask(CPU_LOG_MMU, "%s: no prot match: %x\n", __func__, *prot); + return access_type == MMU_INST_FETCH ? -3 : -2; +} + +static int mmubooke_get_physical_address(CPUPPCState *env, hwaddr *raddr, + int *prot, target_ulong address, + MMUAccessType access_type) +{ + ppcemb_tlb_t *tlb; + int i, ret = -1; + + for (i = 0; i < env->nb_tlb; i++) { + tlb = &env->tlb.tlbe[i]; + ret = mmubooke_check_tlb(env, tlb, raddr, prot, address, + access_type, i); + if (ret != -1) { + break; + } + } + qemu_log_mask(CPU_LOG_MMU, + "%s: access %s " TARGET_FMT_lx " => " HWADDR_FMT_plx + " %d %d\n", __func__, ret < 0 ? "refused" : "granted", + address, ret < 0 ? -1 : *raddr, ret == -1 ? 0 : *prot, ret); + return ret; +} + +hwaddr booke206_tlb_to_page_size(CPUPPCState *env, ppcmas_tlb_t *tlb) +{ + int tlbm_size; + + tlbm_size = (tlb->mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; + + return 1024ULL << tlbm_size; +} + +/* TLB check function for MAS based SoftTLBs */ +int ppcmas_tlb_check(CPUPPCState *env, ppcmas_tlb_t *tlb, hwaddr *raddrp, + target_ulong address, uint32_t pid) +{ + hwaddr mask; + uint32_t tlb_pid; + + if (!FIELD_EX64(env->msr, MSR, CM)) { + /* In 32bit mode we can only address 32bit EAs */ + address = (uint32_t)address; + } + + /* Check valid flag */ + if (!(tlb->mas1 & MAS1_VALID)) { + return -1; + } + + mask = ~(booke206_tlb_to_page_size(env, tlb) - 1); + qemu_log_mask(CPU_LOG_MMU, "%s: TLB ADDR=0x" TARGET_FMT_lx + " PID=0x%x MAS1=0x%x MAS2=0x%" PRIx64 " mask=0x%" + HWADDR_PRIx " MAS7_3=0x%" PRIx64 " MAS8=0x%" PRIx32 "\n", + __func__, address, pid, tlb->mas1, tlb->mas2, mask, + tlb->mas7_3, tlb->mas8); + + /* Check PID */ + tlb_pid = (tlb->mas1 & MAS1_TID_MASK) >> MAS1_TID_SHIFT; + if (tlb_pid != 0 && tlb_pid != pid) { + return -1; + } + + /* Check effective address */ + if ((address & mask) != (tlb->mas2 & MAS2_EPN_MASK)) { + return -1; + } + + if (raddrp) { + *raddrp = (tlb->mas7_3 & mask) | (address & ~mask); + } + + return 0; +} + +static bool is_epid_mmu(int mmu_idx) +{ + return mmu_idx == PPC_TLB_EPID_STORE || mmu_idx == PPC_TLB_EPID_LOAD; +} + +static uint32_t mmubooke206_esr(int mmu_idx, MMUAccessType access_type) +{ + uint32_t esr = 0; + if (access_type == MMU_DATA_STORE) { + esr |= ESR_ST; + } + if (is_epid_mmu(mmu_idx)) { + esr |= ESR_EPID; + } + return esr; +} + +/* + * Get EPID register given the mmu_idx. If this is regular load, + * construct the EPID access bits from current processor state + * + * Get the effective AS and PR bits and the PID. The PID is returned + * only if EPID load is requested, otherwise the caller must detect + * the correct EPID. Return true if valid EPID is returned. + */ +static bool mmubooke206_get_as(CPUPPCState *env, + int mmu_idx, uint32_t *epid_out, + bool *as_out, bool *pr_out) +{ + if (is_epid_mmu(mmu_idx)) { + uint32_t epidr; + if (mmu_idx == PPC_TLB_EPID_STORE) { + epidr = env->spr[SPR_BOOKE_EPSC]; + } else { + epidr = env->spr[SPR_BOOKE_EPLC]; + } + *epid_out = (epidr & EPID_EPID) >> EPID_EPID_SHIFT; + *as_out = !!(epidr & EPID_EAS); + *pr_out = !!(epidr & EPID_EPR); + return true; + } else { + *as_out = FIELD_EX64(env->msr, MSR, DS); + *pr_out = FIELD_EX64(env->msr, MSR, PR); + return false; + } +} + +/* Check if the tlb found by hashing really matches */ +static int mmubooke206_check_tlb(CPUPPCState *env, ppcmas_tlb_t *tlb, + hwaddr *raddr, int *prot, + target_ulong address, + MMUAccessType access_type, int mmu_idx) +{ + uint32_t epid; + bool as, pr; + bool use_epid = mmubooke206_get_as(env, mmu_idx, &epid, &as, &pr); + + if (!use_epid) { + if (ppcmas_tlb_check(env, tlb, raddr, address, + env->spr[SPR_BOOKE_PID]) >= 0) { + goto found_tlb; + } + + if (env->spr[SPR_BOOKE_PID1] && + ppcmas_tlb_check(env, tlb, raddr, address, + env->spr[SPR_BOOKE_PID1]) >= 0) { + goto found_tlb; + } + + if (env->spr[SPR_BOOKE_PID2] && + ppcmas_tlb_check(env, tlb, raddr, address, + env->spr[SPR_BOOKE_PID2]) >= 0) { + goto found_tlb; + } + } else { + if (ppcmas_tlb_check(env, tlb, raddr, address, epid) >= 0) { + goto found_tlb; + } + } + + qemu_log_mask(CPU_LOG_MMU, "%s: No TLB entry found for effective address " + "0x" TARGET_FMT_lx "\n", __func__, address); + return -1; + +found_tlb: + + /* Check the address space and permissions */ + if (access_type == MMU_INST_FETCH) { + /* There is no way to fetch code using epid load */ + assert(!use_epid); + as = FIELD_EX64(env->msr, MSR, IR); + } + + if (as != ((tlb->mas1 & MAS1_TS) >> MAS1_TS_SHIFT)) { + qemu_log_mask(CPU_LOG_MMU, "%s: AS doesn't match\n", __func__); + return -1; + } + + *prot = 0; + if (pr) { + if (tlb->mas7_3 & MAS3_UR) { + *prot |= PAGE_READ; + } + if (tlb->mas7_3 & MAS3_UW) { + *prot |= PAGE_WRITE; + } + if (tlb->mas7_3 & MAS3_UX) { + *prot |= PAGE_EXEC; + } + } else { + if (tlb->mas7_3 & MAS3_SR) { + *prot |= PAGE_READ; + } + if (tlb->mas7_3 & MAS3_SW) { + *prot |= PAGE_WRITE; + } + if (tlb->mas7_3 & MAS3_SX) { + *prot |= PAGE_EXEC; + } + } + if (check_prot_access_type(*prot, access_type)) { + qemu_log_mask(CPU_LOG_MMU, "%s: good TLB!\n", __func__); + return 0; + } + + qemu_log_mask(CPU_LOG_MMU, "%s: no prot match: %x\n", __func__, *prot); + return access_type == MMU_INST_FETCH ? -3 : -2; +} + +static int mmubooke206_get_physical_address(CPUPPCState *env, hwaddr *raddr, + int *prot, target_ulong address, + MMUAccessType access_type, + int mmu_idx) +{ + ppcmas_tlb_t *tlb; + int i, j, ret = -1; + + for (i = 0; i < BOOKE206_MAX_TLBN; i++) { + int ways = booke206_tlb_ways(env, i); + for (j = 0; j < ways; j++) { + tlb = booke206_get_tlbm(env, i, address, j); + if (!tlb) { + continue; + } + ret = mmubooke206_check_tlb(env, tlb, raddr, prot, address, + access_type, mmu_idx); + if (ret != -1) { + goto found_tlb; + } + } + } + +found_tlb: + + qemu_log_mask(CPU_LOG_MMU, "%s: access %s " TARGET_FMT_lx " => " + HWADDR_FMT_plx " %d %d\n", __func__, + ret < 0 ? "refused" : "granted", address, + ret < 0 ? -1 : *raddr, ret == -1 ? 0 : *prot, ret); + return ret; +} + +static void booke206_update_mas_tlb_miss(CPUPPCState *env, target_ulong address, + MMUAccessType access_type, int mmu_idx) +{ + uint32_t epid; + bool as, pr; + uint32_t missed_tid = 0; + bool use_epid = mmubooke206_get_as(env, mmu_idx, &epid, &as, &pr); + + if (access_type == MMU_INST_FETCH) { + as = FIELD_EX64(env->msr, MSR, IR); + } + env->spr[SPR_BOOKE_MAS0] = env->spr[SPR_BOOKE_MAS4] & MAS4_TLBSELD_MASK; + env->spr[SPR_BOOKE_MAS1] = env->spr[SPR_BOOKE_MAS4] & MAS4_TSIZED_MASK; + env->spr[SPR_BOOKE_MAS2] = env->spr[SPR_BOOKE_MAS4] & MAS4_WIMGED_MASK; + env->spr[SPR_BOOKE_MAS3] = 0; + env->spr[SPR_BOOKE_MAS6] = 0; + env->spr[SPR_BOOKE_MAS7] = 0; + + /* AS */ + if (as) { + env->spr[SPR_BOOKE_MAS1] |= MAS1_TS; + env->spr[SPR_BOOKE_MAS6] |= MAS6_SAS; + } + + env->spr[SPR_BOOKE_MAS1] |= MAS1_VALID; + env->spr[SPR_BOOKE_MAS2] |= address & MAS2_EPN_MASK; + + if (!use_epid) { + switch (env->spr[SPR_BOOKE_MAS4] & MAS4_TIDSELD_PIDZ) { + case MAS4_TIDSELD_PID0: + missed_tid = env->spr[SPR_BOOKE_PID]; + break; + case MAS4_TIDSELD_PID1: + missed_tid = env->spr[SPR_BOOKE_PID1]; + break; + case MAS4_TIDSELD_PID2: + missed_tid = env->spr[SPR_BOOKE_PID2]; + break; + } + env->spr[SPR_BOOKE_MAS6] |= env->spr[SPR_BOOKE_PID] << 16; + } else { + missed_tid = epid; + env->spr[SPR_BOOKE_MAS6] |= missed_tid << 16; + } + env->spr[SPR_BOOKE_MAS1] |= (missed_tid << MAS1_TID_SHIFT); + + + /* next victim logic */ + env->spr[SPR_BOOKE_MAS0] |= env->last_way << MAS0_ESEL_SHIFT; + env->last_way++; + env->last_way &= booke206_tlb_ways(env, 0) - 1; + env->spr[SPR_BOOKE_MAS0] |= env->last_way << MAS0_NV_SHIFT; +} + +bool ppc_booke_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, + hwaddr *raddrp, int *psizep, int *protp, int mmu_idx, + bool guest_visible) +{ + CPUState *cs = CPU(cpu); + CPUPPCState *env = &cpu->env; + hwaddr raddr; + int prot, ret; + + if (env->mmu_model == POWERPC_MMU_BOOKE206) { + ret = mmubooke206_get_physical_address(env, &raddr, &prot, eaddr, + access_type, mmu_idx); + } else { + ret = mmubooke_get_physical_address(env, &raddr, &prot, eaddr, + access_type); + } + if (ret == 0) { + *raddrp = raddr; + *protp = prot; + *psizep = TARGET_PAGE_BITS; + return true; + } else if (!guest_visible) { + return false; + } + + log_cpu_state_mask(CPU_LOG_MMU, cs, 0); + env->error_code = 0; + switch (ret) { + case -1: + /* No matches in page tables or TLB */ + if (env->mmu_model == POWERPC_MMU_BOOKE206) { + booke206_update_mas_tlb_miss(env, eaddr, access_type, mmu_idx); + } + cs->exception_index = (access_type == MMU_INST_FETCH) ? + POWERPC_EXCP_ITLB : POWERPC_EXCP_DTLB; + env->spr[SPR_BOOKE_DEAR] = eaddr; + env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, access_type); + break; + case -2: + /* Access rights violation */ + cs->exception_index = (access_type == MMU_INST_FETCH) ? + POWERPC_EXCP_ISI : POWERPC_EXCP_DSI; + if (access_type != MMU_INST_FETCH) { + env->spr[SPR_BOOKE_DEAR] = eaddr; + env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, access_type); + } + break; + case -3: + /* No execute protection violation */ + cs->exception_index = POWERPC_EXCP_ISI; + env->spr[SPR_BOOKE_ESR] = 0; + break; + } + + return false; +} diff --git a/target/ppc/mmu-booke.h b/target/ppc/mmu-booke.h new file mode 100644 index 0000000000..f972843bbb --- /dev/null +++ b/target/ppc/mmu-booke.h @@ -0,0 +1,17 @@ +#ifndef PPC_MMU_BOOKE_H +#define PPC_MMU_BOOKE_H + +#include "cpu.h" + +int ppcemb_tlb_search(CPUPPCState *env, target_ulong address, uint32_t pid); +int mmu40x_get_physical_address(CPUPPCState *env, hwaddr *raddr, int *prot, + target_ulong address, + MMUAccessType access_type); +hwaddr booke206_tlb_to_page_size(CPUPPCState *env, ppcmas_tlb_t *tlb); +int ppcmas_tlb_check(CPUPPCState *env, ppcmas_tlb_t *tlb, hwaddr *raddrp, + target_ulong address, uint32_t pid); +bool ppc_booke_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, + hwaddr *raddrp, int *psizep, int *protp, int mmu_idx, + bool guest_visible); + +#endif diff --git a/target/ppc/mmu-hash32.c b/target/ppc/mmu-hash32.c index 3976416840..d5f2057eb1 100644 --- a/target/ppc/mmu-hash32.c +++ b/target/ppc/mmu-hash32.c @@ -21,6 +21,7 @@ #include "qemu/osdep.h" #include "cpu.h" #include "exec/exec-all.h" +#include "exec/page-protection.h" #include "sysemu/kvm.h" #include "kvm_ppc.h" #include "internal.h" @@ -36,57 +37,6 @@ # define LOG_BATS(...) do { } while (0) #endif -struct mmu_ctx_hash32 { - hwaddr raddr; /* Real address */ - int prot; /* Protection bits */ - int key; /* Access key */ -}; - -static int ppc_hash32_pp_prot(int key, int pp, int nx) -{ - int prot; - - if (key == 0) { - switch (pp) { - case 0x0: - case 0x1: - case 0x2: - prot = PAGE_READ | PAGE_WRITE; - break; - - case 0x3: - prot = PAGE_READ; - break; - - default: - abort(); - } - } else { - switch (pp) { - case 0x0: - prot = 0; - break; - - case 0x1: - case 0x3: - prot = PAGE_READ; - break; - - case 0x2: - prot = PAGE_READ | PAGE_WRITE; - break; - - default: - abort(); - } - } - if (nx == 0) { - prot |= PAGE_EXEC; - } - - return prot; -} - static int ppc_hash32_pte_prot(int mmu_idx, target_ulong sr, ppc_hash_pte32_t pte) { @@ -257,7 +207,7 @@ static bool ppc_hash32_direct_store(PowerPCCPU *cpu, target_ulong sr, } *prot = key ? PAGE_READ | PAGE_WRITE : PAGE_READ; - if (*prot & prot_for_access_type(access_type)) { + if (check_prot_access_type(*prot, access_type)) { *raddr = eaddr; return true; } @@ -391,7 +341,6 @@ bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, hwaddr pte_offset; ppc_hash_pte32_t pte; int prot; - int need_prot; hwaddr raddr; /* There are no hash32 large pages. */ @@ -405,13 +354,11 @@ bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, return true; } - need_prot = prot_for_access_type(access_type); - /* 2. Check Block Address Translation entries (BATs) */ if (env->nb_BATs != 0) { raddr = ppc_hash32_bat_lookup(cpu, eaddr, access_type, protp, mmu_idx); if (raddr != -1) { - if (need_prot & ~*protp) { + if (!check_prot_access_type(*protp, access_type)) { if (guest_visible) { if (access_type == MMU_INST_FETCH) { cs->exception_index = POWERPC_EXCP_ISI; @@ -479,7 +426,7 @@ bool ppc_hash32_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, prot = ppc_hash32_pte_prot(mmu_idx, sr, pte); - if (need_prot & ~prot) { + if (!check_prot_access_type(prot, access_type)) { /* Access right violation */ qemu_log_mask(CPU_LOG_MMU, "PTE access rejected\n"); if (guest_visible) { diff --git a/target/ppc/mmu-hash32.h b/target/ppc/mmu-hash32.h index 7119a63d97..f0ce6951b4 100644 --- a/target/ppc/mmu-hash32.h +++ b/target/ppc/mmu-hash32.h @@ -102,6 +102,51 @@ static inline void ppc_hash32_store_hpte1(PowerPCCPU *cpu, stl_phys(CPU(cpu)->as, base + pte_offset + HASH_PTE_SIZE_32 / 2, pte1); } +static inline int ppc_hash32_pp_prot(bool key, int pp, bool nx) +{ + int prot; + + if (key == 0) { + switch (pp) { + case 0x0: + case 0x1: + case 0x2: + prot = PAGE_READ | PAGE_WRITE; + break; + + case 0x3: + prot = PAGE_READ; + break; + + default: + abort(); + } + } else { + switch (pp) { + case 0x0: + prot = 0; + break; + + case 0x1: + case 0x3: + prot = PAGE_READ; + break; + + case 0x2: + prot = PAGE_READ | PAGE_WRITE; + break; + + default: + abort(); + } + } + if (nx == 0) { + prot |= PAGE_EXEC; + } + + return prot; +} + typedef struct { uint32_t pte0, pte1; } ppc_hash_pte32_t; diff --git a/target/ppc/mmu-hash64.c b/target/ppc/mmu-hash64.c index d645c0bb94..cbc8efa0c3 100644 --- a/target/ppc/mmu-hash64.c +++ b/target/ppc/mmu-hash64.c @@ -21,6 +21,7 @@ #include "qemu/units.h" #include "cpu.h" #include "exec/exec-all.h" +#include "exec/page-protection.h" #include "qemu/error-report.h" #include "qemu/qemu-print.h" #include "sysemu/hw_accel.h" @@ -516,9 +517,7 @@ const ppc_hash_pte64_t *ppc_hash64_map_hptes(PowerPCCPU *cpu, const ppc_hash_pte64_t *hptes; if (cpu->vhyp) { - PPCVirtualHypervisorClass *vhc = - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); - return vhc->map_hptes(cpu->vhyp, ptex, n); + return cpu->vhyp_class->map_hptes(cpu->vhyp, ptex, n); } base = ppc_hash64_hpt_base(cpu); @@ -538,9 +537,7 @@ void ppc_hash64_unmap_hptes(PowerPCCPU *cpu, const ppc_hash_pte64_t *hptes, hwaddr ptex, int n) { if (cpu->vhyp) { - PPCVirtualHypervisorClass *vhc = - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); - vhc->unmap_hptes(cpu->vhyp, hptes, ptex, n); + cpu->vhyp_class->unmap_hptes(cpu->vhyp, hptes, ptex, n); return; } @@ -820,9 +817,7 @@ static void ppc_hash64_set_r(PowerPCCPU *cpu, hwaddr ptex, uint64_t pte1) hwaddr base, offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_R; if (cpu->vhyp) { - PPCVirtualHypervisorClass *vhc = - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); - vhc->hpte_set_r(cpu->vhyp, ptex, pte1); + cpu->vhyp_class->hpte_set_r(cpu->vhyp, ptex, pte1); return; } base = ppc_hash64_hpt_base(cpu); @@ -837,9 +832,7 @@ static void ppc_hash64_set_c(PowerPCCPU *cpu, hwaddr ptex, uint64_t pte1) hwaddr base, offset = ptex * HASH_PTE_SIZE_64 + HPTE64_DW1_C; if (cpu->vhyp) { - PPCVirtualHypervisorClass *vhc = - PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); - vhc->hpte_set_c(cpu->vhyp, ptex, pte1); + cpu->vhyp_class->hpte_set_c(cpu->vhyp, ptex, pte1); return; } base = ppc_hash64_hpt_base(cpu); @@ -1096,7 +1089,7 @@ bool ppc_hash64_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, amr_prot = ppc_hash64_amr_prot(cpu, pte); prot = exec_prot & pp_prot & amr_prot; - need_prot = prot_for_access_type(access_type); + need_prot = check_prot_access_type(PAGE_RWX, access_type); if (need_prot & ~prot) { /* Access right violation */ qemu_log_mask(CPU_LOG_MMU, "PTE access rejected\n"); @@ -1187,7 +1180,7 @@ void ppc_hash64_init(PowerPCCPU *cpu) return; } - cpu->hash64_opts = g_memdup(pcc->hash64_opts, sizeof(*cpu->hash64_opts)); + cpu->hash64_opts = g_memdup2(pcc->hash64_opts, sizeof(*cpu->hash64_opts)); } void ppc_hash64_finalize(PowerPCCPU *cpu) diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c index 690dff7a49..5a02e4963b 100644 --- a/target/ppc/mmu-radix64.c +++ b/target/ppc/mmu-radix64.c @@ -20,6 +20,7 @@ #include "qemu/osdep.h" #include "cpu.h" #include "exec/exec-all.h" +#include "exec/page-protection.h" #include "qemu/error-report.h" #include "sysemu/kvm.h" #include "kvm_ppc.h" @@ -184,7 +185,6 @@ static bool ppc_radix64_check_prot(PowerPCCPU *cpu, MMUAccessType access_type, int mmu_idx, bool partition_scoped) { CPUPPCState *env = &cpu->env; - int need_prot; /* Check Page Attributes (pte58:59) */ if ((pte & R_PTE_ATT) == R_PTE_ATT_NI_IO && access_type == MMU_INST_FETCH) { @@ -209,8 +209,8 @@ static bool ppc_radix64_check_prot(PowerPCCPU *cpu, MMUAccessType access_type, } /* Check if requested access type is allowed */ - need_prot = prot_for_access_type(access_type); - if (need_prot & ~*prot) { /* Page Protected for that Access */ + if (!check_prot_access_type(*prot, access_type)) { + /* Page Protected for that Access */ *fault_cause |= access_type == MMU_INST_FETCH ? SRR1_NOEXEC_GUARD : DSISR_PROTFAULT; return true; @@ -677,9 +677,7 @@ static bool ppc_radix64_xlate_impl(PowerPCCPU *cpu, vaddr eaddr, /* Get Partition Table */ if (cpu->vhyp) { - PPCVirtualHypervisorClass *vhc; - vhc = PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp); - if (!vhc->get_pate(cpu->vhyp, cpu, lpid, &pate)) { + if (!cpu->vhyp_class->get_pate(cpu->vhyp, cpu, lpid, &pate)) { if (guest_visible) { ppc_radix64_raise_hsi(cpu, access_type, eaddr, eaddr, DSISR_R_BADCONFIG); diff --git a/target/ppc/mmu-radix64.h b/target/ppc/mmu-radix64.h index 4c768aa5cc..c5c04a1527 100644 --- a/target/ppc/mmu-radix64.h +++ b/target/ppc/mmu-radix64.h @@ -3,6 +3,8 @@ #ifndef CONFIG_USER_ONLY +#include "exec/page-protection.h" + /* Radix Quadrants */ #define R_EADDR_MASK 0x3FFFFFFFFFFFFFFF #define R_EADDR_VALID_MASK 0xC00FFFFFFFFFFFFF diff --git a/target/ppc/mmu_common.c b/target/ppc/mmu_common.c index 751403f1c8..e2542694f0 100644 --- a/target/ppc/mmu_common.c +++ b/target/ppc/mmu_common.c @@ -25,6 +25,7 @@ #include "mmu-hash64.h" #include "mmu-hash32.h" #include "exec/exec-all.h" +#include "exec/page-protection.h" #include "exec/log.h" #include "helper_regs.h" #include "qemu/error-report.h" @@ -32,9 +33,21 @@ #include "internal.h" #include "mmu-book3s-v3.h" #include "mmu-radix64.h" +#include "mmu-booke.h" /* #define DUMP_PAGE_TABLES */ +/* Context used internally during MMU translations */ +typedef struct { + hwaddr raddr; /* Real address */ + hwaddr eaddr; /* Effective address */ + int prot; /* Protection bits */ + hwaddr hash[2]; /* Pagetable hash values */ + target_ulong ptem; /* Virtual segment ID | API */ + int key; /* Access key */ + int nx; /* Non-execute area */ +} mmu_ctx_t; + void ppc_store_sdr1(CPUPPCState *env, target_ulong value) { PowerPCCPU *cpu = env_archcpu(env); @@ -64,49 +77,6 @@ void ppc_store_sdr1(CPUPPCState *env, target_ulong value) /*****************************************************************************/ /* PowerPC MMU emulation */ -static int pp_check(int key, int pp, int nx) -{ - int access; - - /* Compute access rights */ - access = 0; - if (key == 0) { - switch (pp) { - case 0x0: - case 0x1: - case 0x2: - access |= PAGE_WRITE; - /* fall through */ - case 0x3: - access |= PAGE_READ; - break; - } - } else { - switch (pp) { - case 0x0: - access = 0; - break; - case 0x1: - case 0x3: - access = PAGE_READ; - break; - case 0x2: - access = PAGE_READ | PAGE_WRITE; - break; - } - } - if (nx == 0) { - access |= PAGE_EXEC; - } - - return access; -} - -static int check_prot(int prot, MMUAccessType access_type) -{ - return prot & prot_for_access_type(access_type) ? 0 : -2; -} - int ppc6xx_tlb_getnum(CPUPPCState *env, target_ulong eaddr, int way, int is_code) { @@ -116,8 +86,8 @@ int ppc6xx_tlb_getnum(CPUPPCState *env, target_ulong eaddr, nr = (eaddr >> TARGET_PAGE_BITS) & (env->tlb_per_way - 1); /* Select TLB way */ nr += env->tlb_per_way * way; - /* 6xx have separate TLBs for instructions and data */ - if (is_code && env->id_tlbs == 1) { + /* 6xx has separate TLBs for instructions and data */ + if (is_code) { nr += env->nb_tlb; } @@ -129,7 +99,7 @@ static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, target_ulong pte0, MMUAccessType access_type) { target_ulong ptem, mmask; - int access, ret, pteh, ptev, pp; + int ret, pteh, ptev, pp; ret = -1; /* Check validity and table match */ @@ -148,18 +118,17 @@ static int ppc6xx_tlb_pte_check(mmu_ctx_t *ctx, target_ulong pte0, return -3; } } - /* Compute access rights */ - access = pp_check(ctx->key, pp, ctx->nx); /* Keep the matching PTE information */ ctx->raddr = pte1; - ctx->prot = access; - ret = check_prot(ctx->prot, access_type); - if (ret == 0) { + ctx->prot = ppc_hash32_pp_prot(ctx->key, pp, ctx->nx); + if (check_prot_access_type(ctx->prot, access_type)) { /* Access granted */ qemu_log_mask(CPU_LOG_MMU, "PTE access granted !\n"); + ret = 0; } else { /* Access right violation */ qemu_log_mask(CPU_LOG_MMU, "PTE access rejected\n"); + ret = -2; } } } @@ -224,17 +193,14 @@ static int ppc6xx_tlb_check(CPUPPCState *env, mmu_ctx_t *ctx, access_type == MMU_INST_FETCH ? 'I' : 'D'); switch (ppc6xx_tlb_pte_check(ctx, tlb->pte0, tlb->pte1, 0, access_type)) { - case -3: - /* TLB inconsistency */ - return -1; case -2: /* Access violation */ ret = -2; best = nr; break; - case -1: + case -1: /* No match */ + case -3: /* TLB inconsistency */ default: - /* No match */ break; case 0: /* access granted */ @@ -250,14 +216,34 @@ static int ppc6xx_tlb_check(CPUPPCState *env, mmu_ctx_t *ctx, } } if (best != -1) { - done: +done: qemu_log_mask(CPU_LOG_MMU, "found TLB at addr " HWADDR_FMT_plx " prot=%01x ret=%d\n", ctx->raddr & TARGET_PAGE_MASK, ctx->prot, ret); /* Update page flags */ pte_update_flags(ctx, &env->tlb.tlb6[best].pte1, ret, access_type); } - +#if defined(DUMP_PAGE_TABLES) + if (qemu_loglevel_mask(CPU_LOG_MMU)) { + CPUState *cs = env_cpu(env); + hwaddr base = ppc_hash32_hpt_base(env_archcpu(env)); + hwaddr len = ppc_hash32_hpt_mask(env_archcpu(env)) + 0x80; + uint32_t a0, a1, a2, a3; + + qemu_log("Page table: " HWADDR_FMT_plx " len " HWADDR_FMT_plx "\n", + base, len); + for (hwaddr curaddr = base; curaddr < base + len; curaddr += 16) { + a0 = ldl_phys(cs->as, curaddr); + a1 = ldl_phys(cs->as, curaddr + 4); + a2 = ldl_phys(cs->as, curaddr + 8); + a3 = ldl_phys(cs->as, curaddr + 12); + if (a0 != 0 || a1 != 0 || a2 != 0 || a3 != 0) { + qemu_log(HWADDR_FMT_plx ": %08x %08x %08x %08x\n", + curaddr, a0, a1, a2, a3); + } + } + } +#endif return ret; } @@ -297,8 +283,8 @@ static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx, int ret = -1; bool ifetch = access_type == MMU_INST_FETCH; - qemu_log_mask(CPU_LOG_MMU, "%s: %cBAT v " TARGET_FMT_lx "\n", __func__, - ifetch ? 'I' : 'D', virtual); + qemu_log_mask(CPU_LOG_MMU, "%s: %cBAT v " TARGET_FMT_lx "\n", __func__, + ifetch ? 'I' : 'D', virtual); if (ifetch) { BATlt = env->IBAT[1]; BATut = env->IBAT[0]; @@ -312,9 +298,9 @@ static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx, BEPIu = *BATu & 0xF0000000; BEPIl = *BATu & 0x0FFE0000; bat_size_prot(env, &bl, &valid, &prot, BATu, BATl); - qemu_log_mask(CPU_LOG_MMU, "%s: %cBAT%d v " TARGET_FMT_lx " BATu " - TARGET_FMT_lx " BATl " TARGET_FMT_lx "\n", __func__, - ifetch ? 'I' : 'D', i, virtual, *BATu, *BATl); + qemu_log_mask(CPU_LOG_MMU, "%s: %cBAT%d v " TARGET_FMT_lx " BATu " + TARGET_FMT_lx " BATl " TARGET_FMT_lx "\n", __func__, + ifetch ? 'I' : 'D', i, virtual, *BATu, *BATl); if ((virtual & 0xF0000000) == BEPIu && ((virtual & 0x0FFE0000) & ~bl) == BEPIl) { /* BAT matches */ @@ -325,12 +311,14 @@ static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx, (virtual & 0x0001F000); /* Compute access rights */ ctx->prot = prot; - ret = check_prot(ctx->prot, access_type); - if (ret == 0) { + if (check_prot_access_type(ctx->prot, access_type)) { qemu_log_mask(CPU_LOG_MMU, "BAT %d match: r " HWADDR_FMT_plx " prot=%c%c\n", i, ctx->raddr, ctx->prot & PAGE_READ ? 'R' : '-', ctx->prot & PAGE_WRITE ? 'W' : '-'); + ret = 0; + } else { + ret = -2; } break; } @@ -346,12 +334,11 @@ static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx, BEPIu = *BATu & 0xF0000000; BEPIl = *BATu & 0x0FFE0000; bl = (*BATu & 0x00001FFC) << 15; - qemu_log_mask(CPU_LOG_MMU, "%s: %cBAT%d v " - TARGET_FMT_lx " BATu " TARGET_FMT_lx - " BATl " TARGET_FMT_lx "\n\t" TARGET_FMT_lx " " - TARGET_FMT_lx " " TARGET_FMT_lx "\n", - __func__, ifetch ? 'I' : 'D', i, virtual, - *BATu, *BATl, BEPIu, BEPIl, bl); + qemu_log_mask(CPU_LOG_MMU, "%s: %cBAT%d v " TARGET_FMT_lx + " BATu " TARGET_FMT_lx " BATl " TARGET_FMT_lx + "\n\t" TARGET_FMT_lx " " TARGET_FMT_lx " " + TARGET_FMT_lx "\n", __func__, ifetch ? 'I' : 'D', + i, virtual, *BATu, *BATl, BEPIu, BEPIl, bl); } } } @@ -359,19 +346,22 @@ static int get_bat_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx, return ret; } -/* Perform segment based translation */ -static int get_segment_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx, - target_ulong eaddr, MMUAccessType access_type, - int type) +static int mmu6xx_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, + target_ulong eaddr, + MMUAccessType access_type, int type) { PowerPCCPU *cpu = env_archcpu(env); hwaddr hash; - target_ulong vsid; + target_ulong vsid, sr, pgidx; int ds, target_page_bits; bool pr; - int ret; - target_ulong sr, pgidx; + /* First try to find a BAT entry if there are any */ + if (env->nb_BATs && get_bat_6xx_tlb(env, ctx, eaddr, access_type) == 0) { + return 0; + } + + /* Perform segment based translation when no BATs matched */ pr = FIELD_EX64(env->msr, MSR, PR); ctx->eaddr = eaddr; @@ -394,527 +384,65 @@ static int get_segment_6xx_tlb(CPUPPCState *env, mmu_ctx_t *ctx, hash = vsid ^ pgidx; ctx->ptem = (vsid << 7) | (pgidx >> 10); - qemu_log_mask(CPU_LOG_MMU, - "pte segment: key=%d ds %d nx %d vsid " TARGET_FMT_lx "\n", - ctx->key, ds, ctx->nx, vsid); - ret = -1; + qemu_log_mask(CPU_LOG_MMU, "pte segment: key=%d ds %d nx %d vsid " + TARGET_FMT_lx "\n", ctx->key, ds, ctx->nx, vsid); if (!ds) { /* Check if instruction fetch is allowed, if needed */ - if (type != ACCESS_CODE || ctx->nx == 0) { - /* Page address translation */ - qemu_log_mask(CPU_LOG_MMU, "htab_base " HWADDR_FMT_plx - " htab_mask " HWADDR_FMT_plx - " hash " HWADDR_FMT_plx "\n", - ppc_hash32_hpt_base(cpu), ppc_hash32_hpt_mask(cpu), hash); - ctx->hash[0] = hash; - ctx->hash[1] = ~hash; - - /* Initialize real address with an invalid value */ - ctx->raddr = (hwaddr)-1ULL; - /* Software TLB search */ - ret = ppc6xx_tlb_check(env, ctx, eaddr, access_type); -#if defined(DUMP_PAGE_TABLES) - if (qemu_loglevel_mask(CPU_LOG_MMU)) { - CPUState *cs = env_cpu(env); - hwaddr curaddr; - uint32_t a0, a1, a2, a3; - - qemu_log("Page table: " HWADDR_FMT_plx " len " HWADDR_FMT_plx - "\n", ppc_hash32_hpt_base(cpu), - ppc_hash32_hpt_mask(cpu) + 0x80); - for (curaddr = ppc_hash32_hpt_base(cpu); - curaddr < (ppc_hash32_hpt_base(cpu) - + ppc_hash32_hpt_mask(cpu) + 0x80); - curaddr += 16) { - a0 = ldl_phys(cs->as, curaddr); - a1 = ldl_phys(cs->as, curaddr + 4); - a2 = ldl_phys(cs->as, curaddr + 8); - a3 = ldl_phys(cs->as, curaddr + 12); - if (a0 != 0 || a1 != 0 || a2 != 0 || a3 != 0) { - qemu_log(HWADDR_FMT_plx ": %08x %08x %08x %08x\n", - curaddr, a0, a1, a2, a3); - } - } - } -#endif - } else { + if (type == ACCESS_CODE && ctx->nx) { qemu_log_mask(CPU_LOG_MMU, "No access allowed\n"); - ret = -3; - } - } else { - qemu_log_mask(CPU_LOG_MMU, "direct store...\n"); - /* Direct-store segment : absolutely *BUGGY* for now */ - - switch (type) { - case ACCESS_INT: - /* Integer load/store : only access allowed */ - break; - case ACCESS_CODE: - /* No code fetch is allowed in direct-store areas */ - return -4; - case ACCESS_FLOAT: - /* Floating point load/store */ - return -4; - case ACCESS_RES: - /* lwarx, ldarx or srwcx. */ - return -4; - case ACCESS_CACHE: - /* - * dcba, dcbt, dcbtst, dcbf, dcbi, dcbst, dcbz, or icbi - * - * Should make the instruction do no-op. As it already do - * no-op, it's quite easy :-) - */ - ctx->raddr = eaddr; - return 0; - case ACCESS_EXT: - /* eciwx or ecowx */ - return -4; - default: - qemu_log_mask(CPU_LOG_MMU, "ERROR: instruction should not need " - "address translation\n"); - return -4; - } - if ((access_type == MMU_DATA_STORE || ctx->key != 1) && - (access_type == MMU_DATA_LOAD || ctx->key != 0)) { - ctx->raddr = eaddr; - ret = 2; - } else { - ret = -2; - } - } - - return ret; -} - -/* Generic TLB check function for embedded PowerPC implementations */ -static bool ppcemb_tlb_check(CPUPPCState *env, ppcemb_tlb_t *tlb, - hwaddr *raddrp, - target_ulong address, uint32_t pid, int i) -{ - target_ulong mask; - - /* Check valid flag */ - if (!(tlb->prot & PAGE_VALID)) { - return false; - } - mask = ~(tlb->size - 1); - qemu_log_mask(CPU_LOG_MMU, "%s: TLB %d address " TARGET_FMT_lx - " PID %u <=> " TARGET_FMT_lx " " TARGET_FMT_lx " %u %x\n", - __func__, i, address, pid, tlb->EPN, - mask, (uint32_t)tlb->PID, tlb->prot); - /* Check PID */ - if (tlb->PID != 0 && tlb->PID != pid) { - return false; - } - /* Check effective address */ - if ((address & mask) != tlb->EPN) { - return false; - } - *raddrp = (tlb->RPN & mask) | (address & ~mask); - return true; -} - -/* Generic TLB search function for PowerPC embedded implementations */ -int ppcemb_tlb_search(CPUPPCState *env, target_ulong address, uint32_t pid) -{ - ppcemb_tlb_t *tlb; - hwaddr raddr; - int i; - - for (i = 0; i < env->nb_tlb; i++) { - tlb = &env->tlb.tlbe[i]; - if (ppcemb_tlb_check(env, tlb, &raddr, address, pid, i)) { - return i; - } - } - return -1; -} - -static int mmu40x_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, - target_ulong address, - MMUAccessType access_type) -{ - ppcemb_tlb_t *tlb; - hwaddr raddr; - int i, ret, zsel, zpr, pr; - - ret = -1; - raddr = (hwaddr)-1ULL; - pr = FIELD_EX64(env->msr, MSR, PR); - for (i = 0; i < env->nb_tlb; i++) { - tlb = &env->tlb.tlbe[i]; - if (!ppcemb_tlb_check(env, tlb, &raddr, address, - env->spr[SPR_40x_PID], i)) { - continue; - } - zsel = (tlb->attr >> 4) & 0xF; - zpr = (env->spr[SPR_40x_ZPR] >> (30 - (2 * zsel))) & 0x3; - qemu_log_mask(CPU_LOG_MMU, - "%s: TLB %d zsel %d zpr %d ty %d attr %08x\n", - __func__, i, zsel, zpr, access_type, tlb->attr); - /* Check execute enable bit */ - switch (zpr) { - case 0x2: - if (pr != 0) { - goto check_perms; - } - /* fall through */ - case 0x3: - /* All accesses granted */ - ctx->prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; - ret = 0; - break; - case 0x0: - if (pr != 0) { - /* Raise Zone protection fault. */ - env->spr[SPR_40x_ESR] = 1 << 22; - ctx->prot = 0; - ret = -2; - break; - } - /* fall through */ - case 0x1: - check_perms: - /* Check from TLB entry */ - ctx->prot = tlb->prot; - ret = check_prot(ctx->prot, access_type); - if (ret == -2) { - env->spr[SPR_40x_ESR] = 0; - } - break; - } - if (ret >= 0) { - ctx->raddr = raddr; - qemu_log_mask(CPU_LOG_MMU, "%s: access granted " TARGET_FMT_lx - " => " HWADDR_FMT_plx - " %d %d\n", __func__, address, ctx->raddr, ctx->prot, - ret); - return 0; - } - } - qemu_log_mask(CPU_LOG_MMU, "%s: access refused " TARGET_FMT_lx - " => " HWADDR_FMT_plx - " %d %d\n", __func__, address, raddr, ctx->prot, ret); - - return ret; -} - -static bool mmubooke_check_pid(CPUPPCState *env, ppcemb_tlb_t *tlb, - hwaddr *raddr, target_ulong addr, int i) -{ - if (ppcemb_tlb_check(env, tlb, raddr, addr, env->spr[SPR_BOOKE_PID], i)) { - if (!env->nb_pids) { - /* Extend the physical address to 36 bits */ - *raddr |= (uint64_t)(tlb->RPN & 0xF) << 32; - } - return true; - } else if (!env->nb_pids) { - return false; - } - if (env->spr[SPR_BOOKE_PID1] && - ppcemb_tlb_check(env, tlb, raddr, addr, env->spr[SPR_BOOKE_PID1], i)) { - return true; - } - if (env->spr[SPR_BOOKE_PID2] && - ppcemb_tlb_check(env, tlb, raddr, addr, env->spr[SPR_BOOKE_PID2], i)) { - return true; - } - return false; -} - -static int mmubooke_check_tlb(CPUPPCState *env, ppcemb_tlb_t *tlb, - hwaddr *raddr, int *prot, target_ulong address, - MMUAccessType access_type, int i) -{ - int prot2; - - if (!mmubooke_check_pid(env, tlb, raddr, address, i)) { - qemu_log_mask(CPU_LOG_MMU, "%s: TLB entry not found\n", __func__); - return -1; - } - - if (FIELD_EX64(env->msr, MSR, PR)) { - prot2 = tlb->prot & 0xF; - } else { - prot2 = (tlb->prot >> 4) & 0xF; - } - - /* Check the address space */ - if ((access_type == MMU_INST_FETCH ? - FIELD_EX64(env->msr, MSR, IR) : - FIELD_EX64(env->msr, MSR, DR)) != (tlb->attr & 1)) { - qemu_log_mask(CPU_LOG_MMU, "%s: AS doesn't match\n", __func__); - return -1; - } - - *prot = prot2; - if (prot2 & prot_for_access_type(access_type)) { - qemu_log_mask(CPU_LOG_MMU, "%s: good TLB!\n", __func__); - return 0; - } - - qemu_log_mask(CPU_LOG_MMU, "%s: no prot match: %x\n", __func__, prot2); - return access_type == MMU_INST_FETCH ? -3 : -2; -} - -static int mmubooke_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, - target_ulong address, - MMUAccessType access_type) -{ - ppcemb_tlb_t *tlb; - hwaddr raddr; - int i, ret; - - ret = -1; - raddr = (hwaddr)-1ULL; - for (i = 0; i < env->nb_tlb; i++) { - tlb = &env->tlb.tlbe[i]; - ret = mmubooke_check_tlb(env, tlb, &raddr, &ctx->prot, address, - access_type, i); - if (ret != -1) { - break; - } - } - - if (ret >= 0) { - ctx->raddr = raddr; - qemu_log_mask(CPU_LOG_MMU, "%s: access granted " TARGET_FMT_lx - " => " HWADDR_FMT_plx " %d %d\n", __func__, - address, ctx->raddr, ctx->prot, ret); - } else { - qemu_log_mask(CPU_LOG_MMU, "%s: access refused " TARGET_FMT_lx - " => " HWADDR_FMT_plx " %d %d\n", __func__, - address, raddr, ctx->prot, ret); - } - - return ret; -} - -hwaddr booke206_tlb_to_page_size(CPUPPCState *env, ppcmas_tlb_t *tlb) -{ - int tlbm_size; - - tlbm_size = (tlb->mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; - - return 1024ULL << tlbm_size; -} - -/* TLB check function for MAS based SoftTLBs */ -int ppcmas_tlb_check(CPUPPCState *env, ppcmas_tlb_t *tlb, hwaddr *raddrp, - target_ulong address, uint32_t pid) -{ - hwaddr mask; - uint32_t tlb_pid; - - if (!FIELD_EX64(env->msr, MSR, CM)) { - /* In 32bit mode we can only address 32bit EAs */ - address = (uint32_t)address; - } - - /* Check valid flag */ - if (!(tlb->mas1 & MAS1_VALID)) { - return -1; - } - - mask = ~(booke206_tlb_to_page_size(env, tlb) - 1); - qemu_log_mask(CPU_LOG_MMU, "%s: TLB ADDR=0x" TARGET_FMT_lx - " PID=0x%x MAS1=0x%x MAS2=0x%" PRIx64 " mask=0x%" - HWADDR_PRIx " MAS7_3=0x%" PRIx64 " MAS8=0x%" PRIx32 "\n", - __func__, address, pid, tlb->mas1, tlb->mas2, mask, - tlb->mas7_3, tlb->mas8); - - /* Check PID */ - tlb_pid = (tlb->mas1 & MAS1_TID_MASK) >> MAS1_TID_SHIFT; - if (tlb_pid != 0 && tlb_pid != pid) { - return -1; - } - - /* Check effective address */ - if ((address & mask) != (tlb->mas2 & MAS2_EPN_MASK)) { - return -1; - } - - if (raddrp) { - *raddrp = (tlb->mas7_3 & mask) | (address & ~mask); - } - - return 0; -} - -static bool is_epid_mmu(int mmu_idx) -{ - return mmu_idx == PPC_TLB_EPID_STORE || mmu_idx == PPC_TLB_EPID_LOAD; -} - -static uint32_t mmubooke206_esr(int mmu_idx, MMUAccessType access_type) -{ - uint32_t esr = 0; - if (access_type == MMU_DATA_STORE) { - esr |= ESR_ST; - } - if (is_epid_mmu(mmu_idx)) { - esr |= ESR_EPID; - } - return esr; -} - -/* - * Get EPID register given the mmu_idx. If this is regular load, - * construct the EPID access bits from current processor state - * - * Get the effective AS and PR bits and the PID. The PID is returned - * only if EPID load is requested, otherwise the caller must detect - * the correct EPID. Return true if valid EPID is returned. - */ -static bool mmubooke206_get_as(CPUPPCState *env, - int mmu_idx, uint32_t *epid_out, - bool *as_out, bool *pr_out) -{ - if (is_epid_mmu(mmu_idx)) { - uint32_t epidr; - if (mmu_idx == PPC_TLB_EPID_STORE) { - epidr = env->spr[SPR_BOOKE_EPSC]; - } else { - epidr = env->spr[SPR_BOOKE_EPLC]; - } - *epid_out = (epidr & EPID_EPID) >> EPID_EPID_SHIFT; - *as_out = !!(epidr & EPID_EAS); - *pr_out = !!(epidr & EPID_EPR); - return true; - } else { - *as_out = FIELD_EX64(env->msr, MSR, DS); - *pr_out = FIELD_EX64(env->msr, MSR, PR); - return false; - } -} - -/* Check if the tlb found by hashing really matches */ -static int mmubooke206_check_tlb(CPUPPCState *env, ppcmas_tlb_t *tlb, - hwaddr *raddr, int *prot, - target_ulong address, - MMUAccessType access_type, int mmu_idx) -{ - int prot2 = 0; - uint32_t epid; - bool as, pr; - bool use_epid = mmubooke206_get_as(env, mmu_idx, &epid, &as, &pr); - - if (!use_epid) { - if (ppcmas_tlb_check(env, tlb, raddr, address, - env->spr[SPR_BOOKE_PID]) >= 0) { - goto found_tlb; + return -3; } + /* Page address translation */ + qemu_log_mask(CPU_LOG_MMU, "htab_base " HWADDR_FMT_plx " htab_mask " + HWADDR_FMT_plx " hash " HWADDR_FMT_plx "\n", + ppc_hash32_hpt_base(cpu), ppc_hash32_hpt_mask(cpu), hash); + ctx->hash[0] = hash; + ctx->hash[1] = ~hash; - if (env->spr[SPR_BOOKE_PID1] && - ppcmas_tlb_check(env, tlb, raddr, address, - env->spr[SPR_BOOKE_PID1]) >= 0) { - goto found_tlb; - } - - if (env->spr[SPR_BOOKE_PID2] && - ppcmas_tlb_check(env, tlb, raddr, address, - env->spr[SPR_BOOKE_PID2]) >= 0) { - goto found_tlb; - } - } else { - if (ppcmas_tlb_check(env, tlb, raddr, address, epid) >= 0) { - goto found_tlb; - } + /* Initialize real address with an invalid value */ + ctx->raddr = (hwaddr)-1ULL; + /* Software TLB search */ + return ppc6xx_tlb_check(env, ctx, eaddr, access_type); } - qemu_log_mask(CPU_LOG_MMU, "%s: No TLB entry found for effective address " - "0x" TARGET_FMT_lx "\n", __func__, address); - return -1; - -found_tlb: - - if (pr) { - if (tlb->mas7_3 & MAS3_UR) { - prot2 |= PAGE_READ; - } - if (tlb->mas7_3 & MAS3_UW) { - prot2 |= PAGE_WRITE; - } - if (tlb->mas7_3 & MAS3_UX) { - prot2 |= PAGE_EXEC; - } - } else { - if (tlb->mas7_3 & MAS3_SR) { - prot2 |= PAGE_READ; - } - if (tlb->mas7_3 & MAS3_SW) { - prot2 |= PAGE_WRITE; - } - if (tlb->mas7_3 & MAS3_SX) { - prot2 |= PAGE_EXEC; - } - } - - /* Check the address space and permissions */ - if (access_type == MMU_INST_FETCH) { - /* There is no way to fetch code using epid load */ - assert(!use_epid); - as = FIELD_EX64(env->msr, MSR, IR); - } - - if (as != ((tlb->mas1 & MAS1_TS) >> MAS1_TS_SHIFT)) { - qemu_log_mask(CPU_LOG_MMU, "%s: AS doesn't match\n", __func__); - return -1; - } - - *prot = prot2; - if (prot2 & prot_for_access_type(access_type)) { - qemu_log_mask(CPU_LOG_MMU, "%s: good TLB!\n", __func__); + /* Direct-store segment : absolutely *BUGGY* for now */ + qemu_log_mask(CPU_LOG_MMU, "direct store...\n"); + switch (type) { + case ACCESS_INT: + /* Integer load/store : only access allowed */ + break; + case ACCESS_CODE: + /* No code fetch is allowed in direct-store areas */ + return -4; + case ACCESS_FLOAT: + /* Floating point load/store */ + return -4; + case ACCESS_RES: + /* lwarx, ldarx or srwcx. */ + return -4; + case ACCESS_CACHE: + /* + * dcba, dcbt, dcbtst, dcbf, dcbi, dcbst, dcbz, or icbi + * + * Should make the instruction do no-op. As it already do + * no-op, it's quite easy :-) + */ + ctx->raddr = eaddr; return 0; + case ACCESS_EXT: + /* eciwx or ecowx */ + return -4; + default: + qemu_log_mask(CPU_LOG_MMU, "ERROR: instruction should not need address" + " translation\n"); + return -4; } - - qemu_log_mask(CPU_LOG_MMU, "%s: no prot match: %x\n", __func__, prot2); - return access_type == MMU_INST_FETCH ? -3 : -2; -} - -static int mmubooke206_get_physical_address(CPUPPCState *env, mmu_ctx_t *ctx, - target_ulong address, - MMUAccessType access_type, - int mmu_idx) -{ - ppcmas_tlb_t *tlb; - hwaddr raddr; - int i, j, ret; - - ret = -1; - raddr = (hwaddr)-1ULL; - - for (i = 0; i < BOOKE206_MAX_TLBN; i++) { - int ways = booke206_tlb_ways(env, i); - - for (j = 0; j < ways; j++) { - tlb = booke206_get_tlbm(env, i, address, j); - if (!tlb) { - continue; - } - ret = mmubooke206_check_tlb(env, tlb, &raddr, &ctx->prot, address, - access_type, mmu_idx); - if (ret != -1) { - goto found_tlb; - } - } - } - -found_tlb: - - if (ret >= 0) { - ctx->raddr = raddr; - qemu_log_mask(CPU_LOG_MMU, "%s: access granted " TARGET_FMT_lx - " => " HWADDR_FMT_plx " %d %d\n", __func__, address, - ctx->raddr, ctx->prot, ret); - } else { - qemu_log_mask(CPU_LOG_MMU, "%s: access refused " TARGET_FMT_lx - " => " HWADDR_FMT_plx " %d %d\n", __func__, address, - raddr, ctx->prot, ret); + if ((access_type == MMU_DATA_STORE || ctx->key != 1) && + (access_type == MMU_DATA_LOAD || ctx->key != 0)) { + ctx->raddr = eaddr; + return 2; } - - return ret; + return -2; } static const char *book3e_tsize_to_str[32] = { @@ -1103,13 +631,7 @@ static void mmu6xx_dump_mmu(CPUPPCState *env) mmu6xx_dump_BATs(env, ACCESS_INT); mmu6xx_dump_BATs(env, ACCESS_CODE); - if (env->id_tlbs != 1) { - qemu_printf("ERROR: 6xx MMU should have separated TLB" - " for code and data\n"); - } - qemu_printf("\nTLBs [EPN EPN + SIZE]\n"); - for (type = 0; type < 2; type++) { for (way = 0; way < env->nb_ways; way++) { for (entry = env->nb_tlb * type + env->tlb_per_way * way; @@ -1161,147 +683,97 @@ void dump_mmu(CPUPPCState *env) } } -static int check_physical(CPUPPCState *env, mmu_ctx_t *ctx, target_ulong eaddr, - MMUAccessType access_type) -{ - ctx->raddr = eaddr; - ctx->prot = PAGE_READ | PAGE_EXEC; - - switch (env->mmu_model) { - case POWERPC_MMU_SOFT_6xx: - case POWERPC_MMU_SOFT_4xx: - case POWERPC_MMU_REAL: - case POWERPC_MMU_BOOKE: - ctx->prot |= PAGE_WRITE; - break; - - default: - /* Caller's checks mean we should never get here for other models */ - g_assert_not_reached(); - } - - return 0; -} -int get_physical_address_wtlb(CPUPPCState *env, mmu_ctx_t *ctx, - target_ulong eaddr, - MMUAccessType access_type, int type, - int mmu_idx) +static bool ppc_real_mode_xlate(PowerPCCPU *cpu, vaddr eaddr, + MMUAccessType access_type, + hwaddr *raddrp, int *psizep, int *protp) { - int ret = -1; - bool real_mode = (type == ACCESS_CODE && !FIELD_EX64(env->msr, MSR, IR)) || - (type != ACCESS_CODE && !FIELD_EX64(env->msr, MSR, DR)); - - switch (env->mmu_model) { - case POWERPC_MMU_SOFT_6xx: - if (real_mode) { - ret = check_physical(env, ctx, eaddr, access_type); - } else { - /* Try to find a BAT */ - if (env->nb_BATs != 0) { - ret = get_bat_6xx_tlb(env, ctx, eaddr, access_type); - } - if (ret < 0) { - /* We didn't match any BAT entry or don't have BATs */ - ret = get_segment_6xx_tlb(env, ctx, eaddr, access_type, type); - } - } - break; + CPUPPCState *env = &cpu->env; - case POWERPC_MMU_SOFT_4xx: - if (real_mode) { - ret = check_physical(env, ctx, eaddr, access_type); - } else { - ret = mmu40x_get_physical_address(env, ctx, eaddr, access_type); - } - break; - case POWERPC_MMU_BOOKE: - ret = mmubooke_get_physical_address(env, ctx, eaddr, access_type); - break; - case POWERPC_MMU_BOOKE206: - ret = mmubooke206_get_physical_address(env, ctx, eaddr, access_type, - mmu_idx); - break; - case POWERPC_MMU_MPC8xx: - /* XXX: TODO */ - cpu_abort(env_cpu(env), "MPC8xx MMU model is not implemented\n"); - break; - case POWERPC_MMU_REAL: - if (real_mode) { - ret = check_physical(env, ctx, eaddr, access_type); - } else { - cpu_abort(env_cpu(env), - "PowerPC in real mode do not do any translation\n"); - } - return -1; - default: - cpu_abort(env_cpu(env), "Unknown or invalid MMU model\n"); - return -1; + if (access_type == MMU_INST_FETCH ? !FIELD_EX64(env->msr, MSR, IR) + : !FIELD_EX64(env->msr, MSR, DR)) { + *raddrp = eaddr; + *protp = PAGE_RWX; + *psizep = TARGET_PAGE_BITS; + return true; + } else if (env->mmu_model == POWERPC_MMU_REAL) { + cpu_abort(CPU(cpu), "PowerPC in real mode shold not do translation\n"); } - - return ret; + return false; } -static void booke206_update_mas_tlb_miss(CPUPPCState *env, target_ulong address, - MMUAccessType access_type, int mmu_idx) +static bool ppc_40x_xlate(PowerPCCPU *cpu, vaddr eaddr, + MMUAccessType access_type, + hwaddr *raddrp, int *psizep, int *protp, + int mmu_idx, bool guest_visible) { - uint32_t epid; - bool as, pr; - uint32_t missed_tid = 0; - bool use_epid = mmubooke206_get_as(env, mmu_idx, &epid, &as, &pr); + CPUState *cs = CPU(cpu); + CPUPPCState *env = &cpu->env; + int ret; - if (access_type == MMU_INST_FETCH) { - as = FIELD_EX64(env->msr, MSR, IR); - } - env->spr[SPR_BOOKE_MAS0] = env->spr[SPR_BOOKE_MAS4] & MAS4_TLBSELD_MASK; - env->spr[SPR_BOOKE_MAS1] = env->spr[SPR_BOOKE_MAS4] & MAS4_TSIZED_MASK; - env->spr[SPR_BOOKE_MAS2] = env->spr[SPR_BOOKE_MAS4] & MAS4_WIMGED_MASK; - env->spr[SPR_BOOKE_MAS3] = 0; - env->spr[SPR_BOOKE_MAS6] = 0; - env->spr[SPR_BOOKE_MAS7] = 0; - - /* AS */ - if (as) { - env->spr[SPR_BOOKE_MAS1] |= MAS1_TS; - env->spr[SPR_BOOKE_MAS6] |= MAS6_SAS; + if (ppc_real_mode_xlate(cpu, eaddr, access_type, raddrp, psizep, protp)) { + return true; } - env->spr[SPR_BOOKE_MAS1] |= MAS1_VALID; - env->spr[SPR_BOOKE_MAS2] |= address & MAS2_EPN_MASK; + ret = mmu40x_get_physical_address(env, raddrp, protp, eaddr, access_type); + if (ret == 0) { + *psizep = TARGET_PAGE_BITS; + return true; + } else if (!guest_visible) { + return false; + } - if (!use_epid) { - switch (env->spr[SPR_BOOKE_MAS4] & MAS4_TIDSELD_PIDZ) { - case MAS4_TIDSELD_PID0: - missed_tid = env->spr[SPR_BOOKE_PID]; - break; - case MAS4_TIDSELD_PID1: - missed_tid = env->spr[SPR_BOOKE_PID1]; + log_cpu_state_mask(CPU_LOG_MMU, cs, 0); + if (access_type == MMU_INST_FETCH) { + switch (ret) { + case -1: + /* No matches in page tables or TLB */ + cs->exception_index = POWERPC_EXCP_ITLB; + env->error_code = 0; + env->spr[SPR_40x_DEAR] = eaddr; + env->spr[SPR_40x_ESR] = 0x00000000; break; - case MAS4_TIDSELD_PID2: - missed_tid = env->spr[SPR_BOOKE_PID2]; + case -2: + /* Access rights violation */ + cs->exception_index = POWERPC_EXCP_ISI; + env->error_code = 0x08000000; break; + default: + g_assert_not_reached(); } - env->spr[SPR_BOOKE_MAS6] |= env->spr[SPR_BOOKE_PID] << 16; } else { - missed_tid = epid; - env->spr[SPR_BOOKE_MAS6] |= missed_tid << 16; + switch (ret) { + case -1: + /* No matches in page tables or TLB */ + cs->exception_index = POWERPC_EXCP_DTLB; + env->error_code = 0; + env->spr[SPR_40x_DEAR] = eaddr; + if (access_type == MMU_DATA_STORE) { + env->spr[SPR_40x_ESR] = 0x00800000; + } else { + env->spr[SPR_40x_ESR] = 0x00000000; + } + break; + case -2: + /* Access rights violation */ + cs->exception_index = POWERPC_EXCP_DSI; + env->error_code = 0; + env->spr[SPR_40x_DEAR] = eaddr; + if (access_type == MMU_DATA_STORE) { + env->spr[SPR_40x_ESR] |= 0x00800000; + } + break; + default: + g_assert_not_reached(); + } } - env->spr[SPR_BOOKE_MAS1] |= (missed_tid << MAS1_TID_SHIFT); - - - /* next victim logic */ - env->spr[SPR_BOOKE_MAS0] |= env->last_way << MAS0_ESEL_SHIFT; - env->last_way++; - env->last_way &= booke206_tlb_ways(env, 0) - 1; - env->spr[SPR_BOOKE_MAS0] |= env->last_way << MAS0_NV_SHIFT; + return false; } -/* Perform address translation */ -/* TODO: Split this by mmu_model. */ -static bool ppc_jumbo_xlate(PowerPCCPU *cpu, vaddr eaddr, - MMUAccessType access_type, - hwaddr *raddrp, int *psizep, int *protp, - int mmu_idx, bool guest_visible) +static bool ppc_6xx_xlate(PowerPCCPU *cpu, vaddr eaddr, + MMUAccessType access_type, + hwaddr *raddrp, int *psizep, int *protp, + int mmu_idx, bool guest_visible) { CPUState *cs = CPU(cpu); CPUPPCState *env = &cpu->env; @@ -1309,6 +781,10 @@ static bool ppc_jumbo_xlate(PowerPCCPU *cpu, vaddr eaddr, int type; int ret; + if (ppc_real_mode_xlate(cpu, eaddr, access_type, raddrp, psizep, protp)) { + return true; + } + if (access_type == MMU_INST_FETCH) { /* code access */ type = ACCESS_CODE; @@ -1319,199 +795,116 @@ static bool ppc_jumbo_xlate(PowerPCCPU *cpu, vaddr eaddr, type = ACCESS_INT; } - ret = get_physical_address_wtlb(env, &ctx, eaddr, access_type, - type, mmu_idx); + ctx.prot = 0; + ctx.hash[0] = 0; + ctx.hash[1] = 0; + ret = mmu6xx_get_physical_address(env, &ctx, eaddr, access_type, type); if (ret == 0) { *raddrp = ctx.raddr; *protp = ctx.prot; *psizep = TARGET_PAGE_BITS; return true; + } else if (!guest_visible) { + return false; } - if (guest_visible) { - log_cpu_state_mask(CPU_LOG_MMU, cs, 0); - if (type == ACCESS_CODE) { - switch (ret) { - case -1: - /* No matches in page tables or TLB */ - switch (env->mmu_model) { - case POWERPC_MMU_SOFT_6xx: - cs->exception_index = POWERPC_EXCP_IFTLB; - env->error_code = 1 << 18; - env->spr[SPR_IMISS] = eaddr; - env->spr[SPR_ICMP] = 0x80000000 | ctx.ptem; - goto tlb_miss; - case POWERPC_MMU_SOFT_4xx: - cs->exception_index = POWERPC_EXCP_ITLB; - env->error_code = 0; - env->spr[SPR_40x_DEAR] = eaddr; - env->spr[SPR_40x_ESR] = 0x00000000; - break; - case POWERPC_MMU_BOOKE206: - booke206_update_mas_tlb_miss(env, eaddr, 2, mmu_idx); - /* fall through */ - case POWERPC_MMU_BOOKE: - cs->exception_index = POWERPC_EXCP_ITLB; - env->error_code = 0; - env->spr[SPR_BOOKE_DEAR] = eaddr; - env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, MMU_DATA_LOAD); - break; - case POWERPC_MMU_MPC8xx: - cpu_abort(cs, "MPC8xx MMU model is not implemented\n"); - case POWERPC_MMU_REAL: - cpu_abort(cs, "PowerPC in real mode should never raise " - "any MMU exceptions\n"); - default: - cpu_abort(cs, "Unknown or invalid MMU model\n"); - } - break; - case -2: - /* Access rights violation */ - cs->exception_index = POWERPC_EXCP_ISI; - if ((env->mmu_model == POWERPC_MMU_BOOKE) || - (env->mmu_model == POWERPC_MMU_BOOKE206)) { - env->error_code = 0; - } else { - env->error_code = 0x08000000; - } - break; - case -3: - /* No execute protection violation */ - if ((env->mmu_model == POWERPC_MMU_BOOKE) || - (env->mmu_model == POWERPC_MMU_BOOKE206)) { - env->spr[SPR_BOOKE_ESR] = 0x00000000; - env->error_code = 0; - } else { - env->error_code = 0x10000000; - } - cs->exception_index = POWERPC_EXCP_ISI; + log_cpu_state_mask(CPU_LOG_MMU, cs, 0); + if (type == ACCESS_CODE) { + switch (ret) { + case -1: + /* No matches in page tables or TLB */ + cs->exception_index = POWERPC_EXCP_IFTLB; + env->error_code = 1 << 18; + env->spr[SPR_IMISS] = eaddr; + env->spr[SPR_ICMP] = 0x80000000 | ctx.ptem; + goto tlb_miss; + case -2: + /* Access rights violation */ + cs->exception_index = POWERPC_EXCP_ISI; + env->error_code = 0x08000000; + break; + case -3: + /* No execute protection violation */ + cs->exception_index = POWERPC_EXCP_ISI; + env->error_code = 0x10000000; + break; + case -4: + /* Direct store exception */ + /* No code fetch is allowed in direct-store areas */ + cs->exception_index = POWERPC_EXCP_ISI; + env->error_code = 0x10000000; + break; + } + } else { + switch (ret) { + case -1: + /* No matches in page tables or TLB */ + if (access_type == MMU_DATA_STORE) { + cs->exception_index = POWERPC_EXCP_DSTLB; + env->error_code = 1 << 16; + } else { + cs->exception_index = POWERPC_EXCP_DLTLB; + env->error_code = 0; + } + env->spr[SPR_DMISS] = eaddr; + env->spr[SPR_DCMP] = 0x80000000 | ctx.ptem; +tlb_miss: + env->error_code |= ctx.key << 19; + env->spr[SPR_HASH1] = ppc_hash32_hpt_base(cpu) + + get_pteg_offset32(cpu, ctx.hash[0]); + env->spr[SPR_HASH2] = ppc_hash32_hpt_base(cpu) + + get_pteg_offset32(cpu, ctx.hash[1]); + break; + case -2: + /* Access rights violation */ + cs->exception_index = POWERPC_EXCP_DSI; + env->error_code = 0; + env->spr[SPR_DAR] = eaddr; + if (access_type == MMU_DATA_STORE) { + env->spr[SPR_DSISR] = 0x0A000000; + } else { + env->spr[SPR_DSISR] = 0x08000000; + } + break; + case -4: + /* Direct store exception */ + switch (type) { + case ACCESS_FLOAT: + /* Floating point load/store */ + cs->exception_index = POWERPC_EXCP_ALIGN; + env->error_code = POWERPC_EXCP_ALIGN_FP; + env->spr[SPR_DAR] = eaddr; break; - case -4: - /* Direct store exception */ - /* No code fetch is allowed in direct-store areas */ - cs->exception_index = POWERPC_EXCP_ISI; - if ((env->mmu_model == POWERPC_MMU_BOOKE) || - (env->mmu_model == POWERPC_MMU_BOOKE206)) { - env->error_code = 0; + case ACCESS_RES: + /* lwarx, ldarx or stwcx. */ + cs->exception_index = POWERPC_EXCP_DSI; + env->error_code = 0; + env->spr[SPR_DAR] = eaddr; + if (access_type == MMU_DATA_STORE) { + env->spr[SPR_DSISR] = 0x06000000; } else { - env->error_code = 0x10000000; + env->spr[SPR_DSISR] = 0x04000000; } break; - } - } else { - switch (ret) { - case -1: - /* No matches in page tables or TLB */ - switch (env->mmu_model) { - case POWERPC_MMU_SOFT_6xx: - if (access_type == MMU_DATA_STORE) { - cs->exception_index = POWERPC_EXCP_DSTLB; - env->error_code = 1 << 16; - } else { - cs->exception_index = POWERPC_EXCP_DLTLB; - env->error_code = 0; - } - env->spr[SPR_DMISS] = eaddr; - env->spr[SPR_DCMP] = 0x80000000 | ctx.ptem; - tlb_miss: - env->error_code |= ctx.key << 19; - env->spr[SPR_HASH1] = ppc_hash32_hpt_base(cpu) + - get_pteg_offset32(cpu, ctx.hash[0]); - env->spr[SPR_HASH2] = ppc_hash32_hpt_base(cpu) + - get_pteg_offset32(cpu, ctx.hash[1]); - break; - case POWERPC_MMU_SOFT_4xx: - cs->exception_index = POWERPC_EXCP_DTLB; - env->error_code = 0; - env->spr[SPR_40x_DEAR] = eaddr; - if (access_type == MMU_DATA_STORE) { - env->spr[SPR_40x_ESR] = 0x00800000; - } else { - env->spr[SPR_40x_ESR] = 0x00000000; - } - break; - case POWERPC_MMU_MPC8xx: - /* XXX: TODO */ - cpu_abort(cs, "MPC8xx MMU model is not implemented\n"); - case POWERPC_MMU_BOOKE206: - booke206_update_mas_tlb_miss(env, eaddr, access_type, mmu_idx); - /* fall through */ - case POWERPC_MMU_BOOKE: - cs->exception_index = POWERPC_EXCP_DTLB; - env->error_code = 0; - env->spr[SPR_BOOKE_DEAR] = eaddr; - env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, access_type); - break; - case POWERPC_MMU_REAL: - cpu_abort(cs, "PowerPC in real mode should never raise " - "any MMU exceptions\n"); - default: - cpu_abort(cs, "Unknown or invalid MMU model\n"); - } - break; - case -2: - /* Access rights violation */ + case ACCESS_EXT: + /* eciwx or ecowx */ cs->exception_index = POWERPC_EXCP_DSI; env->error_code = 0; - if (env->mmu_model == POWERPC_MMU_SOFT_4xx) { - env->spr[SPR_40x_DEAR] = eaddr; - if (access_type == MMU_DATA_STORE) { - env->spr[SPR_40x_ESR] |= 0x00800000; - } - } else if ((env->mmu_model == POWERPC_MMU_BOOKE) || - (env->mmu_model == POWERPC_MMU_BOOKE206)) { - env->spr[SPR_BOOKE_DEAR] = eaddr; - env->spr[SPR_BOOKE_ESR] = mmubooke206_esr(mmu_idx, access_type); + env->spr[SPR_DAR] = eaddr; + if (access_type == MMU_DATA_STORE) { + env->spr[SPR_DSISR] = 0x06100000; } else { - env->spr[SPR_DAR] = eaddr; - if (access_type == MMU_DATA_STORE) { - env->spr[SPR_DSISR] = 0x0A000000; - } else { - env->spr[SPR_DSISR] = 0x08000000; - } + env->spr[SPR_DSISR] = 0x04100000; } break; - case -4: - /* Direct store exception */ - switch (type) { - case ACCESS_FLOAT: - /* Floating point load/store */ - cs->exception_index = POWERPC_EXCP_ALIGN; - env->error_code = POWERPC_EXCP_ALIGN_FP; - env->spr[SPR_DAR] = eaddr; - break; - case ACCESS_RES: - /* lwarx, ldarx or stwcx. */ - cs->exception_index = POWERPC_EXCP_DSI; - env->error_code = 0; - env->spr[SPR_DAR] = eaddr; - if (access_type == MMU_DATA_STORE) { - env->spr[SPR_DSISR] = 0x06000000; - } else { - env->spr[SPR_DSISR] = 0x04000000; - } - break; - case ACCESS_EXT: - /* eciwx or ecowx */ - cs->exception_index = POWERPC_EXCP_DSI; - env->error_code = 0; - env->spr[SPR_DAR] = eaddr; - if (access_type == MMU_DATA_STORE) { - env->spr[SPR_DSISR] = 0x06100000; - } else { - env->spr[SPR_DSISR] = 0x04100000; - } - break; - default: - printf("DSI: invalid exception (%d)\n", ret); - cs->exception_index = POWERPC_EXCP_PROGRAM; - env->error_code = - POWERPC_EXCP_INVAL | POWERPC_EXCP_INVAL_INVAL; - env->spr[SPR_DAR] = eaddr; - break; - } + default: + printf("DSI: invalid exception (%d)\n", ret); + cs->exception_index = POWERPC_EXCP_PROGRAM; + env->error_code = POWERPC_EXCP_INVAL | POWERPC_EXCP_INVAL_INVAL; + env->spr[SPR_DAR] = eaddr; break; } + break; } } return false; @@ -1542,10 +935,23 @@ bool ppc_xlate(PowerPCCPU *cpu, vaddr eaddr, MMUAccessType access_type, case POWERPC_MMU_32B: return ppc_hash32_xlate(cpu, eaddr, access_type, raddrp, psizep, protp, mmu_idx, guest_visible); - - default: - return ppc_jumbo_xlate(cpu, eaddr, access_type, raddrp, + case POWERPC_MMU_BOOKE: + case POWERPC_MMU_BOOKE206: + return ppc_booke_xlate(cpu, eaddr, access_type, raddrp, psizep, protp, mmu_idx, guest_visible); + case POWERPC_MMU_SOFT_4xx: + return ppc_40x_xlate(cpu, eaddr, access_type, raddrp, + psizep, protp, mmu_idx, guest_visible); + case POWERPC_MMU_SOFT_6xx: + return ppc_6xx_xlate(cpu, eaddr, access_type, raddrp, + psizep, protp, mmu_idx, guest_visible); + case POWERPC_MMU_REAL: + return ppc_real_mode_xlate(cpu, eaddr, access_type, raddrp, psizep, + protp); + case POWERPC_MMU_MPC8xx: + cpu_abort(env_cpu(&cpu->env), "MPC8xx MMU model is not implemented\n"); + default: + cpu_abort(CPU(cpu), "Unknown or invalid MMU model\n"); } } diff --git a/target/ppc/mmu_helper.c b/target/ppc/mmu_helper.c index c071b4d5e2..b0a0676beb 100644 --- a/target/ppc/mmu_helper.c +++ b/target/ppc/mmu_helper.c @@ -25,6 +25,7 @@ #include "mmu-hash64.h" #include "mmu-hash32.h" #include "exec/exec-all.h" +#include "exec/page-protection.h" #include "exec/log.h" #include "helper_regs.h" #include "qemu/error-report.h" @@ -32,6 +33,7 @@ #include "internal.h" #include "mmu-book3s-v3.h" #include "mmu-radix64.h" +#include "mmu-booke.h" #include "exec/helper-proto.h" #include "exec/cpu_ldst.h" @@ -44,14 +46,8 @@ static inline void ppc6xx_tlb_invalidate_all(CPUPPCState *env) { ppc6xx_tlb_t *tlb; - int nr, max; + int nr, max = 2 * env->nb_tlb; - /* LOG_SWTLB("Invalidate all TLBs\n"); */ - /* Invalidate all defined software TLB */ - max = env->nb_tlb; - if (env->id_tlbs == 1) { - max *= 2; - } for (nr = 0; nr < max; nr++) { tlb = &env->tlb.tlb6[nr]; pte_invalidate(&tlb->pte0); @@ -307,9 +303,7 @@ void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr) switch (env->mmu_model) { case POWERPC_MMU_SOFT_6xx: ppc6xx_tlb_invalidate_virt(env, addr, 0); - if (env->id_tlbs == 1) { - ppc6xx_tlb_invalidate_virt(env, addr, 1); - } + ppc6xx_tlb_invalidate_virt(env, addr, 1); break; case POWERPC_MMU_32B: /* @@ -533,7 +527,7 @@ void helper_tlbie_isa300(CPUPPCState *env, target_ulong rb, target_ulong rs, if (local) { tlb_flush_page(env_cpu(env), addr); } else { - tlb_flush_page_all_cpus(env_cpu(env), addr); + tlb_flush_page_all_cpus_synced(env_cpu(env), addr); } return; @@ -595,30 +589,6 @@ void helper_6xx_tlbi(CPUPPCState *env, target_ulong EPN) do_6xx_tlb(env, EPN, 1); } -/*****************************************************************************/ -/* PowerPC 601 specific instructions (POWER bridge) */ - -target_ulong helper_rac(CPUPPCState *env, target_ulong addr) -{ - mmu_ctx_t ctx; - int nb_BATs; - target_ulong ret = 0; - - /* - * We don't have to generate many instances of this instruction, - * as rac is supervisor only. - * - * XXX: FIX THIS: Pretend we have no BAT - */ - nb_BATs = env->nb_BATs; - env->nb_BATs = 0; - if (get_physical_address_wtlb(env, &ctx, addr, 0, ACCESS_INT, 0) == 0) { - ret = ctx.raddr; - } - env->nb_BATs = nb_BATs; - return ret; -} - static inline target_ulong booke_tlb_to_page_size(int size) { return 1024 << (2 * size); diff --git a/target/ppc/power8-pmu-regs.c.inc b/target/ppc/power8-pmu-regs.c.inc index 4956a8b350..652cf20704 100644 --- a/target/ppc/power8-pmu-regs.c.inc +++ b/target/ppc/power8-pmu-regs.c.inc @@ -175,6 +175,11 @@ void spr_write_MMCR2_ureg(DisasContext *ctx, int sprn, int gprn) gen_store_spr(SPR_POWER_MMCR2, masked_gprn); } +void spr_write_MMCRA(DisasContext *ctx, int sprn, int gprn) +{ + gen_helper_store_mmcrA(tcg_env, cpu_gpr[gprn]); +} + void spr_read_PMC(DisasContext *ctx, int gprn, int sprn) { TCGv_i32 t_sprn = tcg_constant_i32(sprn); diff --git a/target/ppc/power8-pmu.c b/target/ppc/power8-pmu.c index cbc5889d91..db9ee8e96b 100644 --- a/target/ppc/power8-pmu.c +++ b/target/ppc/power8-pmu.c @@ -82,7 +82,38 @@ static void pmu_update_summaries(CPUPPCState *env) env->pmc_cyc_cnt = cyc_cnt; } -void pmu_mmcr01_updated(CPUPPCState *env) +static void hreg_bhrb_filter_update(CPUPPCState *env) +{ + target_long ifm; + + if (!(env->spr[SPR_POWER_MMCR0] & MMCR0_PMAE)) { + /* disable recording to BHRB */ + env->bhrb_filter = BHRB_TYPE_NORECORD; + return; + } + + ifm = (env->spr[SPR_POWER_MMCRA] & MMCRA_IFM_MASK) >> MMCRA_IFM_SHIFT; + switch (ifm) { + case 0: + /* record all branches */ + env->bhrb_filter = -1; + break; + case 1: + /* only record calls (LK = 1) */ + env->bhrb_filter = BHRB_TYPE_CALL; + break; + case 2: + /* only record indirect branches */ + env->bhrb_filter = BHRB_TYPE_INDIRECT; + break; + case 3: + /* only record conditional branches */ + env->bhrb_filter = BHRB_TYPE_COND; + break; + } +} + +void pmu_mmcr01a_updated(CPUPPCState *env) { PowerPCCPU *cpu = env_archcpu(env); @@ -95,6 +126,8 @@ void pmu_mmcr01_updated(CPUPPCState *env) ppc_set_irq(cpu, PPC_INTERRUPT_PERFM, 0); } + hreg_bhrb_filter_update(env); + /* * Should this update overflow timers (if mmcr0 is updated) so they * get set in cpu_post_load? @@ -260,7 +293,7 @@ void helper_store_mmcr0(CPUPPCState *env, target_ulong value) env->spr[SPR_POWER_MMCR0] = value; - pmu_mmcr01_updated(env); + pmu_mmcr01a_updated(env); /* Update cycle overflow timers with the current MMCR0 state */ pmu_update_overflow_timers(env); @@ -272,7 +305,14 @@ void helper_store_mmcr1(CPUPPCState *env, uint64_t value) env->spr[SPR_POWER_MMCR1] = value; - pmu_mmcr01_updated(env); + pmu_mmcr01a_updated(env); +} + +void helper_store_mmcrA(CPUPPCState *env, uint64_t value) +{ + env->spr[SPR_POWER_MMCRA] = value; + + pmu_mmcr01a_updated(env); } target_ulong helper_read_pmc(CPUPPCState *env, uint32_t sprn) @@ -301,7 +341,7 @@ static void perfm_alert(PowerPCCPU *cpu) env->spr[SPR_POWER_MMCR0] |= MMCR0_FC; /* Changing MMCR0_FC requires summaries and hflags update */ - pmu_mmcr01_updated(env); + pmu_mmcr01a_updated(env); /* * Delete all pending timers if we need to freeze diff --git a/target/ppc/power8-pmu.h b/target/ppc/power8-pmu.h index 775e640053..3f79cfc45b 100644 --- a/target/ppc/power8-pmu.h +++ b/target/ppc/power8-pmu.h @@ -13,15 +13,22 @@ #ifndef POWER8_PMU_H #define POWER8_PMU_H +#define BHRB_TYPE_NORECORD 0x00 +#define BHRB_TYPE_CALL 0x01 +#define BHRB_TYPE_INDIRECT 0x02 +#define BHRB_TYPE_COND 0x04 +#define BHRB_TYPE_OTHER 0x08 +#define BHRB_TYPE_XL_FORM 0x10 + #if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY) #define PMC_COUNTER_NEGATIVE_VAL 0x80000000UL void cpu_ppc_pmu_init(CPUPPCState *env); -void pmu_mmcr01_updated(CPUPPCState *env); +void pmu_mmcr01a_updated(CPUPPCState *env); #else static inline void cpu_ppc_pmu_init(CPUPPCState *env) { } -static inline void pmu_mmcr01_updated(CPUPPCState *env) { } +static inline void pmu_mmcr01a_updated(CPUPPCState *env) { } #endif #endif diff --git a/target/ppc/spr_common.h b/target/ppc/spr_common.h index 8a9d6cd994..01aff449bc 100644 --- a/target/ppc/spr_common.h +++ b/target/ppc/spr_common.h @@ -83,8 +83,11 @@ void spr_read_generic(DisasContext *ctx, int gprn, int sprn); void spr_write_generic(DisasContext *ctx, int sprn, int gprn); void spr_write_generic32(DisasContext *ctx, int sprn, int gprn); void spr_core_write_generic(DisasContext *ctx, int sprn, int gprn); +void spr_core_write_generic32(DisasContext *ctx, int sprn, int gprn); +void spr_core_lpar_write_generic(DisasContext *ctx, int sprn, int gprn); void spr_write_MMCR0(DisasContext *ctx, int sprn, int gprn); void spr_write_MMCR1(DisasContext *ctx, int sprn, int gprn); +void spr_write_MMCRA(DisasContext *ctx, int sprn, int gprn); void spr_write_PMC(DisasContext *ctx, int sprn, int gprn); void spr_write_CTRL(DisasContext *ctx, int sprn, int gprn); void spr_read_xer(DisasContext *ctx, int gprn, int sprn); @@ -202,6 +205,11 @@ void spr_read_tfmr(DisasContext *ctx, int gprn, int sprn); void spr_write_tfmr(DisasContext *ctx, int sprn, int gprn); void spr_write_lpcr(DisasContext *ctx, int sprn, int gprn); void spr_read_dexcr_ureg(DisasContext *ctx, int gprn, int sprn); +void spr_read_ppr32(DisasContext *ctx, int sprn, int gprn); +void spr_write_ppr32(DisasContext *ctx, int sprn, int gprn); +void spr_write_sprc(DisasContext *ctx, int sprn, int gprn); +void spr_read_sprd(DisasContext *ctx, int sprn, int gprn); +void spr_write_sprd(DisasContext *ctx, int sprn, int gprn); #endif void register_low_BATs(CPUPPCState *env); diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 93ffec787c..0bc16d7251 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -21,7 +21,6 @@ #include "qemu/osdep.h" #include "cpu.h" #include "internal.h" -#include "disas/disas.h" #include "exec/exec-all.h" #include "tcg/tcg-op.h" #include "tcg/tcg-op-gvec.h" @@ -181,6 +180,7 @@ struct DisasContext { #if defined(TARGET_PPC64) bool sf_mode; bool has_cfar; + bool has_bhrb; #endif bool fpu_enabled; bool altivec_enabled; @@ -194,6 +194,7 @@ struct DisasContext { bool mmcr0_pmcjce; bool pmc_other; bool pmu_insn_cnt; + bool bhrb_enable; ppc_spr_t *spr_cb; /* Needed to check rights for mfspr/mtspr */ int singlestep_enabled; uint32_t flags; @@ -472,6 +473,34 @@ void spr_core_write_generic(DisasContext *ctx, int sprn, int gprn) spr_store_dump_spr(sprn); } +void spr_core_write_generic32(DisasContext *ctx, int sprn, int gprn) +{ + TCGv t0; + + if (!(ctx->flags & POWERPC_FLAG_SMT)) { + spr_write_generic32(ctx, sprn, gprn); + return; + } + + if (!gen_serialize(ctx)) { + return; + } + + t0 = tcg_temp_new(); + tcg_gen_ext32u_tl(t0, cpu_gpr[gprn]); + gen_helper_spr_core_write_generic(tcg_env, tcg_constant_i32(sprn), t0); + spr_store_dump_spr(sprn); +} + +void spr_core_lpar_write_generic(DisasContext *ctx, int sprn, int gprn) +{ + if (ctx->flags & POWERPC_FLAG_SMT_1LPAR) { + spr_core_write_generic(ctx, sprn, gprn); + } else { + spr_write_generic(ctx, sprn, gprn); + } +} + static void spr_write_CTRL_ST(DisasContext *ctx, int sprn, int gprn) { /* This does not implement >1 thread */ @@ -880,6 +909,10 @@ void spr_write_hior(DisasContext *ctx, int sprn, int gprn) } void spr_write_ptcr(DisasContext *ctx, int sprn, int gprn) { + if (!gen_serialize_core(ctx)) { + return; + } + gen_helper_store_ptcr(tcg_env, cpu_gpr[gprn]); } @@ -1268,6 +1301,24 @@ void spr_write_tfmr(DisasContext *ctx, int sprn, int gprn) gen_helper_store_tfmr(tcg_env, cpu_gpr[gprn]); } +void spr_write_sprc(DisasContext *ctx, int sprn, int gprn) +{ + gen_helper_store_sprc(tcg_env, cpu_gpr[gprn]); +} + +void spr_read_sprd(DisasContext *ctx, int gprn, int sprn) +{ + gen_helper_load_sprd(cpu_gpr[gprn], tcg_env); +} + +void spr_write_sprd(DisasContext *ctx, int sprn, int gprn) +{ + if (!gen_serialize_core(ctx)) { + return; + } + gen_helper_store_sprd(tcg_env, cpu_gpr[gprn]); +} + void spr_write_lpcr(DisasContext *ctx, int sprn, int gprn) { translator_io_start(&ctx->base); @@ -1351,6 +1402,30 @@ void spr_read_dexcr_ureg(DisasContext *ctx, int gprn, int sprn) gen_load_spr(t0, sprn + 16); tcg_gen_ext32u_tl(cpu_gpr[gprn], t0); } + +/* The PPR32 SPR accesses the upper 32-bits of PPR */ +void spr_read_ppr32(DisasContext *ctx, int gprn, int sprn) +{ + gen_load_spr(cpu_gpr[gprn], SPR_PPR); + tcg_gen_shri_tl(cpu_gpr[gprn], cpu_gpr[gprn], 32); + spr_load_dump_spr(SPR_PPR); +} + +void spr_write_ppr32(DisasContext *ctx, int sprn, int gprn) +{ + TCGv t0 = tcg_temp_new(); + + /* + * Don't clobber the low 32-bits of the PPR. These are all reserved bits + * but TCG does implement them, so it would be surprising to zero them + * here. "Priority nops" are similarly careful not to clobber reserved + * bits. + */ + gen_load_spr(t0, SPR_PPR); + tcg_gen_deposit_tl(t0, t0, cpu_gpr[gprn], 32, 32); + gen_store_spr(SPR_PPR, t0); + spr_store_dump_spr(SPR_PPR); +} #endif #define GEN_HANDLER(name, opc1, opc2, opc3, inval, type) \ @@ -1564,73 +1639,6 @@ static inline void gen_set_Rc0(DisasContext *ctx, TCGv reg) } } -/* cmprb - range comparison: isupper, isaplha, islower*/ -static void gen_cmprb(DisasContext *ctx) -{ - TCGv_i32 src1 = tcg_temp_new_i32(); - TCGv_i32 src2 = tcg_temp_new_i32(); - TCGv_i32 src2lo = tcg_temp_new_i32(); - TCGv_i32 src2hi = tcg_temp_new_i32(); - TCGv_i32 crf = cpu_crf[crfD(ctx->opcode)]; - - tcg_gen_trunc_tl_i32(src1, cpu_gpr[rA(ctx->opcode)]); - tcg_gen_trunc_tl_i32(src2, cpu_gpr[rB(ctx->opcode)]); - - tcg_gen_andi_i32(src1, src1, 0xFF); - tcg_gen_ext8u_i32(src2lo, src2); - tcg_gen_shri_i32(src2, src2, 8); - tcg_gen_ext8u_i32(src2hi, src2); - - tcg_gen_setcond_i32(TCG_COND_LEU, src2lo, src2lo, src1); - tcg_gen_setcond_i32(TCG_COND_LEU, src2hi, src1, src2hi); - tcg_gen_and_i32(crf, src2lo, src2hi); - - if (ctx->opcode & 0x00200000) { - tcg_gen_shri_i32(src2, src2, 8); - tcg_gen_ext8u_i32(src2lo, src2); - tcg_gen_shri_i32(src2, src2, 8); - tcg_gen_ext8u_i32(src2hi, src2); - tcg_gen_setcond_i32(TCG_COND_LEU, src2lo, src2lo, src1); - tcg_gen_setcond_i32(TCG_COND_LEU, src2hi, src1, src2hi); - tcg_gen_and_i32(src2lo, src2lo, src2hi); - tcg_gen_or_i32(crf, crf, src2lo); - } - tcg_gen_shli_i32(crf, crf, CRF_GT_BIT); -} - -#if defined(TARGET_PPC64) -/* cmpeqb */ -static void gen_cmpeqb(DisasContext *ctx) -{ - gen_helper_cmpeqb(cpu_crf[crfD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], - cpu_gpr[rB(ctx->opcode)]); -} -#endif - -/* isel (PowerPC 2.03 specification) */ -static void gen_isel(DisasContext *ctx) -{ - uint32_t bi = rC(ctx->opcode); - uint32_t mask = 0x08 >> (bi & 0x03); - TCGv t0 = tcg_temp_new(); - TCGv zr; - - tcg_gen_extu_i32_tl(t0, cpu_crf[bi >> 2]); - tcg_gen_andi_tl(t0, t0, mask); - - zr = tcg_constant_tl(0); - tcg_gen_movcond_tl(TCG_COND_NE, cpu_gpr[rD(ctx->opcode)], t0, zr, - rA(ctx->opcode) ? cpu_gpr[rA(ctx->opcode)] : zr, - cpu_gpr[rB(ctx->opcode)]); -} - -/* cmpb: PowerPC 2.05 specification */ -static void gen_cmpb(DisasContext *ctx) -{ - gen_helper_cmpb(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)], - cpu_gpr[rB(ctx->opcode)]); -} - /*** Integer arithmetic ***/ static inline void gen_op_arith_compute_ov(DisasContext *ctx, TCGv arg0, @@ -1738,8 +1746,9 @@ static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1, } } -static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret, TCGv arg1, - TCGv arg2, int sign, int compute_ov) +static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret, + TCGv arg1, TCGv arg2, bool sign, + bool compute_ov, bool compute_rc0) { TCGv_i32 t0 = tcg_temp_new_i32(); TCGv_i32 t1 = tcg_temp_new_i32(); @@ -1773,45 +1782,15 @@ static inline void gen_op_arith_divw(DisasContext *ctx, TCGv ret, TCGv arg1, tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov); } - if (unlikely(Rc(ctx->opcode) != 0)) { + if (unlikely(compute_rc0)) { gen_set_Rc0(ctx, ret); } } -/* Div functions */ -#define GEN_INT_ARITH_DIVW(name, opc3, sign, compute_ov) \ -static void glue(gen_, name)(DisasContext *ctx) \ -{ \ - gen_op_arith_divw(ctx, cpu_gpr[rD(ctx->opcode)], \ - cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)], \ - sign, compute_ov); \ -} -/* divwu divwu. divwuo divwuo. */ -GEN_INT_ARITH_DIVW(divwu, 0x0E, 0, 0); -GEN_INT_ARITH_DIVW(divwuo, 0x1E, 0, 1); -/* divw divw. divwo divwo. */ -GEN_INT_ARITH_DIVW(divw, 0x0F, 1, 0); -GEN_INT_ARITH_DIVW(divwo, 0x1F, 1, 1); - -/* div[wd]eu[o][.] */ -#define GEN_DIVE(name, hlpr, compute_ov) \ -static void gen_##name(DisasContext *ctx) \ -{ \ - TCGv_i32 t0 = tcg_constant_i32(compute_ov); \ - gen_helper_##hlpr(cpu_gpr[rD(ctx->opcode)], tcg_env, \ - cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)], t0); \ - if (unlikely(Rc(ctx->opcode) != 0)) { \ - gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]); \ - } \ -} - -GEN_DIVE(divweu, divweu, 0); -GEN_DIVE(divweuo, divweu, 1); -GEN_DIVE(divwe, divwe, 0); -GEN_DIVE(divweo, divwe, 1); #if defined(TARGET_PPC64) -static inline void gen_op_arith_divd(DisasContext *ctx, TCGv ret, TCGv arg1, - TCGv arg2, int sign, int compute_ov) +static inline void gen_op_arith_divd(DisasContext *ctx, TCGv ret, + TCGv arg1, TCGv arg2, bool sign, + bool compute_ov, bool compute_rc0) { TCGv_i64 t0 = tcg_temp_new_i64(); TCGv_i64 t1 = tcg_temp_new_i64(); @@ -1847,25 +1826,6 @@ static inline void gen_op_arith_divd(DisasContext *ctx, TCGv ret, TCGv arg1, gen_set_Rc0(ctx, ret); } } - -#define GEN_INT_ARITH_DIVD(name, opc3, sign, compute_ov) \ -static void glue(gen_, name)(DisasContext *ctx) \ -{ \ - gen_op_arith_divd(ctx, cpu_gpr[rD(ctx->opcode)], \ - cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)], \ - sign, compute_ov); \ -} -/* divdu divdu. divduo divduo. */ -GEN_INT_ARITH_DIVD(divdu, 0x0E, 0, 0); -GEN_INT_ARITH_DIVD(divduo, 0x1E, 0, 1); -/* divd divd. divdo divdo. */ -GEN_INT_ARITH_DIVD(divd, 0x0F, 1, 0); -GEN_INT_ARITH_DIVD(divdo, 0x1F, 1, 1); - -GEN_DIVE(divdeu, divdeu, 0); -GEN_DIVE(divdeuo, divdeu, 1); -GEN_DIVE(divde, divde, 0); -GEN_DIVE(divdeo, divde, 1); #endif static inline void gen_op_arith_modw(DisasContext *ctx, TCGv ret, TCGv arg1, @@ -1897,17 +1857,6 @@ static inline void gen_op_arith_modw(DisasContext *ctx, TCGv ret, TCGv arg1, } } -#define GEN_INT_ARITH_MODW(name, opc3, sign) \ -static void glue(gen_, name)(DisasContext *ctx) \ -{ \ - gen_op_arith_modw(ctx, cpu_gpr[rD(ctx->opcode)], \ - cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)], \ - sign); \ -} - -GEN_INT_ARITH_MODW(moduw, 0x08, 0); -GEN_INT_ARITH_MODW(modsw, 0x18, 1); - #if defined(TARGET_PPC64) static inline void gen_op_arith_modd(DisasContext *ctx, TCGv ret, TCGv arg1, TCGv arg2, int sign) @@ -1935,157 +1884,6 @@ static inline void gen_op_arith_modd(DisasContext *ctx, TCGv ret, TCGv arg1, tcg_gen_remu_i64(ret, t0, t1); } } - -#define GEN_INT_ARITH_MODD(name, opc3, sign) \ -static void glue(gen_, name)(DisasContext *ctx) \ -{ \ - gen_op_arith_modd(ctx, cpu_gpr[rD(ctx->opcode)], \ - cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)], \ - sign); \ -} - -GEN_INT_ARITH_MODD(modud, 0x08, 0); -GEN_INT_ARITH_MODD(modsd, 0x18, 1); -#endif - -/* mulhw mulhw. */ -static void gen_mulhw(DisasContext *ctx) -{ - TCGv_i32 t0 = tcg_temp_new_i32(); - TCGv_i32 t1 = tcg_temp_new_i32(); - - tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]); - tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]); - tcg_gen_muls2_i32(t0, t1, t0, t1); - tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t1); - if (unlikely(Rc(ctx->opcode) != 0)) { - gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]); - } -} - -/* mulhwu mulhwu. */ -static void gen_mulhwu(DisasContext *ctx) -{ - TCGv_i32 t0 = tcg_temp_new_i32(); - TCGv_i32 t1 = tcg_temp_new_i32(); - - tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]); - tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]); - tcg_gen_mulu2_i32(t0, t1, t0, t1); - tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx->opcode)], t1); - if (unlikely(Rc(ctx->opcode) != 0)) { - gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]); - } -} - -/* mullw mullw. */ -static void gen_mullw(DisasContext *ctx) -{ -#if defined(TARGET_PPC64) - TCGv_i64 t0, t1; - t0 = tcg_temp_new_i64(); - t1 = tcg_temp_new_i64(); - tcg_gen_ext32s_tl(t0, cpu_gpr[rA(ctx->opcode)]); - tcg_gen_ext32s_tl(t1, cpu_gpr[rB(ctx->opcode)]); - tcg_gen_mul_i64(cpu_gpr[rD(ctx->opcode)], t0, t1); -#else - tcg_gen_mul_i32(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], - cpu_gpr[rB(ctx->opcode)]); -#endif - if (unlikely(Rc(ctx->opcode) != 0)) { - gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]); - } -} - -/* mullwo mullwo. */ -static void gen_mullwo(DisasContext *ctx) -{ - TCGv_i32 t0 = tcg_temp_new_i32(); - TCGv_i32 t1 = tcg_temp_new_i32(); - - tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]); - tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]); - tcg_gen_muls2_i32(t0, t1, t0, t1); -#if defined(TARGET_PPC64) - tcg_gen_concat_i32_i64(cpu_gpr[rD(ctx->opcode)], t0, t1); -#else - tcg_gen_mov_i32(cpu_gpr[rD(ctx->opcode)], t0); -#endif - - tcg_gen_sari_i32(t0, t0, 31); - tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t1); - tcg_gen_extu_i32_tl(cpu_ov, t0); - if (is_isa300(ctx)) { - tcg_gen_mov_tl(cpu_ov32, cpu_ov); - } - tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov); - - if (unlikely(Rc(ctx->opcode) != 0)) { - gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]); - } -} - -/* mulli */ -static void gen_mulli(DisasContext *ctx) -{ - tcg_gen_muli_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], - SIMM(ctx->opcode)); -} - -#if defined(TARGET_PPC64) -/* mulhd mulhd. */ -static void gen_mulhd(DisasContext *ctx) -{ - TCGv lo = tcg_temp_new(); - tcg_gen_muls2_tl(lo, cpu_gpr[rD(ctx->opcode)], - cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]); - if (unlikely(Rc(ctx->opcode) != 0)) { - gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]); - } -} - -/* mulhdu mulhdu. */ -static void gen_mulhdu(DisasContext *ctx) -{ - TCGv lo = tcg_temp_new(); - tcg_gen_mulu2_tl(lo, cpu_gpr[rD(ctx->opcode)], - cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]); - if (unlikely(Rc(ctx->opcode) != 0)) { - gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]); - } -} - -/* mulld mulld. */ -static void gen_mulld(DisasContext *ctx) -{ - tcg_gen_mul_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], - cpu_gpr[rB(ctx->opcode)]); - if (unlikely(Rc(ctx->opcode) != 0)) { - gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]); - } -} - -/* mulldo mulldo. */ -static void gen_mulldo(DisasContext *ctx) -{ - TCGv_i64 t0 = tcg_temp_new_i64(); - TCGv_i64 t1 = tcg_temp_new_i64(); - - tcg_gen_muls2_i64(t0, t1, cpu_gpr[rA(ctx->opcode)], - cpu_gpr[rB(ctx->opcode)]); - tcg_gen_mov_i64(cpu_gpr[rD(ctx->opcode)], t0); - - tcg_gen_sari_i64(t0, t0, 63); - tcg_gen_setcond_i64(TCG_COND_NE, cpu_ov, t0, t1); - if (is_isa300(ctx)) { - tcg_gen_mov_tl(cpu_ov32, cpu_ov); - } - tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov); - - if (unlikely(Rc(ctx->opcode) != 0)) { - gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]); - } -} #endif /* Common subf function */ @@ -2158,104 +1956,7 @@ static inline void gen_op_arith_subf(DisasContext *ctx, TCGv ret, TCGv arg1, } } -/* neg neg. nego nego. */ -static inline void gen_op_arith_neg(DisasContext *ctx, bool compute_ov) -{ - TCGv zero = tcg_constant_tl(0); - gen_op_arith_subf(ctx, cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], - zero, 0, 0, compute_ov, Rc(ctx->opcode)); -} - -static void gen_neg(DisasContext *ctx) -{ - tcg_gen_neg_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]); - if (unlikely(Rc(ctx->opcode))) { - gen_set_Rc0(ctx, cpu_gpr[rD(ctx->opcode)]); - } -} - -static void gen_nego(DisasContext *ctx) -{ - gen_op_arith_neg(ctx, 1); -} - /*** Integer logical ***/ -#define GEN_LOGICAL2(name, tcg_op, opc, type) \ -static void glue(gen_, name)(DisasContext *ctx) \ -{ \ - tcg_op(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)], \ - cpu_gpr[rB(ctx->opcode)]); \ - if (unlikely(Rc(ctx->opcode) != 0)) \ - gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]); \ -} - -#define GEN_LOGICAL1(name, tcg_op, opc, type) \ -static void glue(gen_, name)(DisasContext *ctx) \ -{ \ - tcg_op(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]); \ - if (unlikely(Rc(ctx->opcode) != 0)) \ - gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]); \ -} - -/* and & and. */ -GEN_LOGICAL2(and, tcg_gen_and_tl, 0x00, PPC_INTEGER); -/* andc & andc. */ -GEN_LOGICAL2(andc, tcg_gen_andc_tl, 0x01, PPC_INTEGER); - -/* andi. */ -static void gen_andi_(DisasContext *ctx) -{ - tcg_gen_andi_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)], - UIMM(ctx->opcode)); - gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]); -} - -/* andis. */ -static void gen_andis_(DisasContext *ctx) -{ - tcg_gen_andi_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)], - UIMM(ctx->opcode) << 16); - gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]); -} - -/* cntlzw */ -static void gen_cntlzw(DisasContext *ctx) -{ - TCGv_i32 t = tcg_temp_new_i32(); - - tcg_gen_trunc_tl_i32(t, cpu_gpr[rS(ctx->opcode)]); - tcg_gen_clzi_i32(t, t, 32); - tcg_gen_extu_i32_tl(cpu_gpr[rA(ctx->opcode)], t); - - if (unlikely(Rc(ctx->opcode) != 0)) { - gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]); - } -} - -/* cnttzw */ -static void gen_cnttzw(DisasContext *ctx) -{ - TCGv_i32 t = tcg_temp_new_i32(); - - tcg_gen_trunc_tl_i32(t, cpu_gpr[rS(ctx->opcode)]); - tcg_gen_ctzi_i32(t, t, 32); - tcg_gen_extu_i32_tl(cpu_gpr[rA(ctx->opcode)], t); - - if (unlikely(Rc(ctx->opcode) != 0)) { - gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]); - } -} - -/* eqv & eqv. */ -GEN_LOGICAL2(eqv, tcg_gen_eqv_tl, 0x08, PPC_INTEGER); -/* extsb & extsb. */ -GEN_LOGICAL1(extsb, tcg_gen_ext8s_tl, 0x1D, PPC_INTEGER); -/* extsh & extsh. */ -GEN_LOGICAL1(extsh, tcg_gen_ext16s_tl, 0x1C, PPC_INTEGER); -/* nand & nand. */ -GEN_LOGICAL2(nand, tcg_gen_nand_tl, 0x0E, PPC_INTEGER); -/* nor & nor. */ -GEN_LOGICAL2(nor, tcg_gen_nor_tl, 0x03, PPC_INTEGER); #if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY) static void gen_pause(DisasContext *ctx) @@ -2269,261 +1970,6 @@ static void gen_pause(DisasContext *ctx) } #endif /* defined(TARGET_PPC64) */ -/* or & or. */ -static void gen_or(DisasContext *ctx) -{ - int rs, ra, rb; - - rs = rS(ctx->opcode); - ra = rA(ctx->opcode); - rb = rB(ctx->opcode); - /* Optimisation for mr. ri case */ - if (rs != ra || rs != rb) { - if (rs != rb) { - tcg_gen_or_tl(cpu_gpr[ra], cpu_gpr[rs], cpu_gpr[rb]); - } else { - tcg_gen_mov_tl(cpu_gpr[ra], cpu_gpr[rs]); - } - if (unlikely(Rc(ctx->opcode) != 0)) { - gen_set_Rc0(ctx, cpu_gpr[ra]); - } - } else if (unlikely(Rc(ctx->opcode) != 0)) { - gen_set_Rc0(ctx, cpu_gpr[rs]); -#if defined(TARGET_PPC64) - } else if (rs != 0) { /* 0 is nop */ - int prio = 0; - - switch (rs) { - case 1: - /* Set process priority to low */ - prio = 2; - break; - case 6: - /* Set process priority to medium-low */ - prio = 3; - break; - case 2: - /* Set process priority to normal */ - prio = 4; - break; -#if !defined(CONFIG_USER_ONLY) - case 31: - if (!ctx->pr) { - /* Set process priority to very low */ - prio = 1; - } - break; - case 5: - if (!ctx->pr) { - /* Set process priority to medium-hight */ - prio = 5; - } - break; - case 3: - if (!ctx->pr) { - /* Set process priority to high */ - prio = 6; - } - break; - case 7: - if (ctx->hv && !ctx->pr) { - /* Set process priority to very high */ - prio = 7; - } - break; -#endif - default: - break; - } - if (prio) { - TCGv t0 = tcg_temp_new(); - gen_load_spr(t0, SPR_PPR); - tcg_gen_andi_tl(t0, t0, ~0x001C000000000000ULL); - tcg_gen_ori_tl(t0, t0, ((uint64_t)prio) << 50); - gen_store_spr(SPR_PPR, t0); - } -#if !defined(CONFIG_USER_ONLY) - /* - * Pause out of TCG otherwise spin loops with smt_low eat too - * much CPU and the kernel hangs. This applies to all - * encodings other than no-op, e.g., miso(rs=26), yield(27), - * mdoio(29), mdoom(30), and all currently undefined. - */ - gen_pause(ctx); -#endif -#endif - } -} -/* orc & orc. */ -GEN_LOGICAL2(orc, tcg_gen_orc_tl, 0x0C, PPC_INTEGER); - -/* xor & xor. */ -static void gen_xor(DisasContext *ctx) -{ - /* Optimisation for "set to zero" case */ - if (rS(ctx->opcode) != rB(ctx->opcode)) { - tcg_gen_xor_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)], - cpu_gpr[rB(ctx->opcode)]); - } else { - tcg_gen_movi_tl(cpu_gpr[rA(ctx->opcode)], 0); - } - if (unlikely(Rc(ctx->opcode) != 0)) { - gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]); - } -} - -/* ori */ -static void gen_ori(DisasContext *ctx) -{ - target_ulong uimm = UIMM(ctx->opcode); - - if (rS(ctx->opcode) == rA(ctx->opcode) && uimm == 0) { - return; - } - tcg_gen_ori_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)], uimm); -} - -/* oris */ -static void gen_oris(DisasContext *ctx) -{ - target_ulong uimm = UIMM(ctx->opcode); - - if (rS(ctx->opcode) == rA(ctx->opcode) && uimm == 0) { - /* NOP */ - return; - } - tcg_gen_ori_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)], - uimm << 16); -} - -/* xori */ -static void gen_xori(DisasContext *ctx) -{ - target_ulong uimm = UIMM(ctx->opcode); - - if (rS(ctx->opcode) == rA(ctx->opcode) && uimm == 0) { - /* NOP */ - return; - } - tcg_gen_xori_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)], uimm); -} - -/* xoris */ -static void gen_xoris(DisasContext *ctx) -{ - target_ulong uimm = UIMM(ctx->opcode); - - if (rS(ctx->opcode) == rA(ctx->opcode) && uimm == 0) { - /* NOP */ - return; - } - tcg_gen_xori_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)], - uimm << 16); -} - -/* popcntb : PowerPC 2.03 specification */ -static void gen_popcntb(DisasContext *ctx) -{ - gen_helper_popcntb(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]); -} - -static void gen_popcntw(DisasContext *ctx) -{ -#if defined(TARGET_PPC64) - gen_helper_popcntw(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]); -#else - tcg_gen_ctpop_i32(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]); -#endif -} - -#if defined(TARGET_PPC64) -/* popcntd: PowerPC 2.06 specification */ -static void gen_popcntd(DisasContext *ctx) -{ - tcg_gen_ctpop_i64(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)]); -} -#endif - -/* prtyw: PowerPC 2.05 specification */ -static void gen_prtyw(DisasContext *ctx) -{ - TCGv ra = cpu_gpr[rA(ctx->opcode)]; - TCGv rs = cpu_gpr[rS(ctx->opcode)]; - TCGv t0 = tcg_temp_new(); - tcg_gen_shri_tl(t0, rs, 16); - tcg_gen_xor_tl(ra, rs, t0); - tcg_gen_shri_tl(t0, ra, 8); - tcg_gen_xor_tl(ra, ra, t0); - tcg_gen_andi_tl(ra, ra, (target_ulong)0x100000001ULL); -} - -#if defined(TARGET_PPC64) -/* prtyd: PowerPC 2.05 specification */ -static void gen_prtyd(DisasContext *ctx) -{ - TCGv ra = cpu_gpr[rA(ctx->opcode)]; - TCGv rs = cpu_gpr[rS(ctx->opcode)]; - TCGv t0 = tcg_temp_new(); - tcg_gen_shri_tl(t0, rs, 32); - tcg_gen_xor_tl(ra, rs, t0); - tcg_gen_shri_tl(t0, ra, 16); - tcg_gen_xor_tl(ra, ra, t0); - tcg_gen_shri_tl(t0, ra, 8); - tcg_gen_xor_tl(ra, ra, t0); - tcg_gen_andi_tl(ra, ra, 1); -} -#endif - -#if defined(TARGET_PPC64) -/* bpermd */ -static void gen_bpermd(DisasContext *ctx) -{ - gen_helper_bpermd(cpu_gpr[rA(ctx->opcode)], - cpu_gpr[rS(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]); -} -#endif - -#if defined(TARGET_PPC64) -/* extsw & extsw. */ -GEN_LOGICAL1(extsw, tcg_gen_ext32s_tl, 0x1E, PPC_64B); - -/* cntlzd */ -static void gen_cntlzd(DisasContext *ctx) -{ - tcg_gen_clzi_i64(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)], 64); - if (unlikely(Rc(ctx->opcode) != 0)) { - gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]); - } -} - -/* cnttzd */ -static void gen_cnttzd(DisasContext *ctx) -{ - tcg_gen_ctzi_i64(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rS(ctx->opcode)], 64); - if (unlikely(Rc(ctx->opcode) != 0)) { - gen_set_Rc0(ctx, cpu_gpr[rA(ctx->opcode)]); - } -} - -/* darn */ -static void gen_darn(DisasContext *ctx) -{ - int l = L(ctx->opcode); - - if (l > 2) { - tcg_gen_movi_i64(cpu_gpr[rD(ctx->opcode)], -1); - } else { - translator_io_start(&ctx->base); - if (l == 0) { - gen_helper_darn32(cpu_gpr[rD(ctx->opcode)]); - } else { - /* Return 64-bit random for both CRN and RRN */ - gen_helper_darn64(cpu_gpr[rD(ctx->opcode)]); - } - } -} -#endif - /*** Integer rotate ***/ /* rlwimi & rlwimi. */ @@ -3423,59 +2869,6 @@ static void gen_stswx(DisasContext *ctx) gen_helper_stsw(tcg_env, t0, t1, t2); } -/*** Memory synchronisation ***/ -/* eieio */ -static void gen_eieio(DisasContext *ctx) -{ - TCGBar bar = TCG_MO_ALL; - - /* - * eieio has complex semanitcs. It provides memory ordering between - * operations in the set: - * - loads from CI memory. - * - stores to CI memory. - * - stores to WT memory. - * - * It separately also orders memory for operations in the set: - * - stores to cacheble memory. - * - * It also serializes instructions: - * - dcbt and dcbst. - * - * It separately serializes: - * - tlbie and tlbsync. - * - * And separately serializes: - * - slbieg, slbiag, and slbsync. - * - * The end result is that CI memory ordering requires TCG_MO_ALL - * and it is not possible to special-case more relaxed ordering for - * cacheable accesses. TCG_BAR_SC is required to provide this - * serialization. - */ - - /* - * POWER9 has a eieio instruction variant using bit 6 as a hint to - * tell the CPU it is a store-forwarding barrier. - */ - if (ctx->opcode & 0x2000000) { - /* - * ISA says that "Reserved fields in instructions are ignored - * by the processor". So ignore the bit 6 on non-POWER9 CPU but - * as this is not an instruction software should be using, - * complain to the user. - */ - if (!(ctx->insns_flags2 & PPC2_ISA300)) { - qemu_log_mask(LOG_GUEST_ERROR, "invalid eieio using bit 6 at @" - TARGET_FMT_lx "\n", ctx->cia); - } else { - bar = TCG_MO_ST_LD; - } - } - - tcg_gen_mb(bar | TCG_BAR_SC); -} - #if !defined(CONFIG_USER_ONLY) static inline void gen_check_tlb_flush(DisasContext *ctx, bool global) { @@ -3495,6 +2888,13 @@ static inline void gen_check_tlb_flush(DisasContext *ctx, bool global) gen_helper_check_tlb_flush_local(tcg_env); } gen_set_label(l); + if (global) { + /* + * Global TLB flush uses async-work which must run before the + * next instruction, so this must be the last in the TB. + */ + ctx->base.is_jmp = DISAS_EXIT_UPDATE; + } } #else static inline void gen_check_tlb_flush(DisasContext *ctx, bool global) { } @@ -3514,8 +2914,6 @@ static void gen_isync(DisasContext *ctx) ctx->base.is_jmp = DISAS_EXIT_UPDATE; } -#define MEMOP_GET_SIZE(x) (1 << ((x) & MO_SIZE)) - static void gen_load_locked(DisasContext *ctx, MemOp memop) { TCGv gpr = cpu_gpr[rD(ctx->opcode)]; @@ -3523,7 +2921,7 @@ static void gen_load_locked(DisasContext *ctx, MemOp memop) gen_set_access_type(ctx, ACCESS_RES); gen_addr_reg_index(ctx, t0); - tcg_gen_qemu_ld_tl(gpr, t0, ctx->mem_idx, memop | MO_ALIGN); + tcg_gen_qemu_ld_tl(gpr, t0, ctx->mem_idx, DEF_MEMOP(memop) | MO_ALIGN); tcg_gen_mov_tl(cpu_reserve, t0); tcg_gen_movi_tl(cpu_reserve_length, memop_size(memop)); tcg_gen_mov_tl(cpu_reserve_val, gpr); @@ -3536,9 +2934,9 @@ static void gen_##name(DisasContext *ctx) \ } /* lwarx */ -LARX(lbarx, DEF_MEMOP(MO_UB)) -LARX(lharx, DEF_MEMOP(MO_UW)) -LARX(lwarx, DEF_MEMOP(MO_UL)) +LARX(lbarx, MO_UB) +LARX(lharx, MO_UW) +LARX(lwarx, MO_UL) static void gen_fetch_inc_conditional(DisasContext *ctx, MemOp memop, TCGv EA, TCGCond cond, int addend) @@ -3548,7 +2946,7 @@ static void gen_fetch_inc_conditional(DisasContext *ctx, MemOp memop, TCGv u = tcg_temp_new(); tcg_gen_qemu_ld_tl(t, EA, ctx->mem_idx, memop); - tcg_gen_addi_tl(t2, EA, MEMOP_GET_SIZE(memop)); + tcg_gen_addi_tl(t2, EA, memop_size(memop)); tcg_gen_qemu_ld_tl(t2, t2, ctx->mem_idx, memop); tcg_gen_addi_tl(u, t, addend); @@ -3558,7 +2956,7 @@ static void gen_fetch_inc_conditional(DisasContext *ctx, MemOp memop, tcg_gen_qemu_st_tl(u, EA, ctx->mem_idx, memop); /* RT = (t != t2 ? t : u = 1<<(s*8-1)) */ - tcg_gen_movi_tl(u, 1 << (MEMOP_GET_SIZE(memop) * 8 - 1)); + tcg_gen_movi_tl(u, 1 << (memop_size(memop) * 8 - 1)); tcg_gen_movcond_tl(cond, cpu_gpr[rD(ctx->opcode)], t, t2, t, u); } @@ -3720,7 +3118,7 @@ static void gen_st_atomic(DisasContext *ctx, MemOp memop) TCGv ea_plus_s = tcg_temp_new(); tcg_gen_qemu_ld_tl(t, EA, ctx->mem_idx, memop); - tcg_gen_addi_tl(ea_plus_s, EA, MEMOP_GET_SIZE(memop)); + tcg_gen_addi_tl(ea_plus_s, EA, memop_size(memop)); tcg_gen_qemu_ld_tl(t2, ea_plus_s, ctx->mem_idx, memop); tcg_gen_movcond_tl(TCG_COND_EQ, s, t, t2, src, t); tcg_gen_movcond_tl(TCG_COND_EQ, s2, t, t2, src, t2); @@ -3783,15 +3181,15 @@ static void gen_##name(DisasContext *ctx) \ gen_conditional_store(ctx, memop); \ } -STCX(stbcx_, DEF_MEMOP(MO_UB)) -STCX(sthcx_, DEF_MEMOP(MO_UW)) -STCX(stwcx_, DEF_MEMOP(MO_UL)) +STCX(stbcx_, MO_UB) +STCX(sthcx_, MO_UW) +STCX(stwcx_, MO_UL) #if defined(TARGET_PPC64) /* ldarx */ -LARX(ldarx, DEF_MEMOP(MO_UQ)) +LARX(ldarx, MO_UQ) /* stdcx. */ -STCX(stdcx_, DEF_MEMOP(MO_UQ)) +STCX(stdcx_, MO_UQ) /* lqarx */ static void gen_lqarx(DisasContext *ctx) @@ -3877,31 +3275,6 @@ static void gen_stqcx_(DisasContext *ctx) } #endif /* defined(TARGET_PPC64) */ -/* sync */ -static void gen_sync(DisasContext *ctx) -{ - TCGBar bar = TCG_MO_ALL; - uint32_t l = (ctx->opcode >> 21) & 3; - - if ((l == 1) && (ctx->insns_flags2 & PPC2_MEM_LWSYNC)) { - bar = TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST; - } - - /* - * We may need to check for a pending TLB flush. - * - * We do this on ptesync (l == 2) on ppc64 and any sync pn ppc32. - * - * Additionally, this can only happen in kernel mode however so - * check MSR_PR as well. - */ - if (((l == 2) || !(ctx->insns_flags & PPC_64B)) && !ctx->pr) { - gen_check_tlb_flush(ctx, true); - } - - tcg_gen_mb(bar | TCG_BAR_SC); -} - /* wait */ static void gen_wait(DisasContext *ctx) { @@ -4071,14 +3444,85 @@ static void gen_rvwinkle(DisasContext *ctx) gen_exception_nip(ctx, EXCP_HLT, ctx->base.pc_next); #endif /* defined(CONFIG_USER_ONLY) */ } + +static inline TCGv gen_write_bhrb(TCGv_ptr base, TCGv offset, TCGv mask, TCGv value) +{ + TCGv_ptr tmp = tcg_temp_new_ptr(); + + /* add base and offset to get address of bhrb entry */ + tcg_gen_add_ptr(tmp, base, (TCGv_ptr)offset); + + /* store value into bhrb at bhrb_offset */ + tcg_gen_st_i64(value, tmp, 0); + + /* add 8 to current bhrb_offset */ + tcg_gen_addi_tl(offset, offset, 8); + + /* apply offset mask */ + tcg_gen_and_tl(offset, offset, mask); + + return offset; +} #endif /* #if defined(TARGET_PPC64) */ -static inline void gen_update_cfar(DisasContext *ctx, target_ulong nip) +static inline void gen_update_branch_history(DisasContext *ctx, + target_ulong nip, + TCGv target, + target_long inst_type) { #if defined(TARGET_PPC64) + TCGv_ptr base; + TCGv tmp; + TCGv offset; + TCGv mask; + TCGLabel *no_update; + if (ctx->has_cfar) { tcg_gen_movi_tl(cpu_cfar, nip); } + + if (!ctx->has_bhrb || + !ctx->bhrb_enable || + inst_type == BHRB_TYPE_NORECORD) { + return; + } + + tmp = tcg_temp_new(); + no_update = gen_new_label(); + + /* check for bhrb filtering */ + tcg_gen_ld_tl(tmp, tcg_env, offsetof(CPUPPCState, bhrb_filter)); + tcg_gen_andi_tl(tmp, tmp, inst_type); + tcg_gen_brcondi_tl(TCG_COND_EQ, tmp, 0, no_update); + + base = tcg_temp_new_ptr(); + offset = tcg_temp_new(); + mask = tcg_temp_new(); + + /* load bhrb base address */ + tcg_gen_ld_ptr(base, tcg_env, offsetof(CPUPPCState, bhrb_base)); + + /* load current bhrb_offset */ + tcg_gen_ld_tl(offset, tcg_env, offsetof(CPUPPCState, bhrb_offset)); + + /* load a BHRB offset mask */ + tcg_gen_ld_tl(mask, tcg_env, offsetof(CPUPPCState, bhrb_offset_mask)); + + offset = gen_write_bhrb(base, offset, mask, tcg_constant_i64(nip)); + + /* Also record the target address for XL-Form branches */ + if (inst_type & BHRB_TYPE_XL_FORM) { + + /* Set the 'T' bit for target entries */ + tcg_gen_ori_tl(tmp, target, 0x2); + + offset = gen_write_bhrb(base, offset, mask, tmp); + } + + /* save updated bhrb_offset for next time */ + tcg_gen_st_tl(offset, tcg_env, offsetof(CPUPPCState, bhrb_offset)); + + gen_set_label(no_update); #endif } @@ -4208,8 +3652,10 @@ static void gen_b(DisasContext *ctx) } if (LK(ctx->opcode)) { gen_setlr(ctx, ctx->base.pc_next); + gen_update_branch_history(ctx, ctx->cia, NULL, BHRB_TYPE_CALL); + } else { + gen_update_branch_history(ctx, ctx->cia, NULL, BHRB_TYPE_OTHER); } - gen_update_cfar(ctx, ctx->cia); gen_goto_tb(ctx, 0, target); ctx->base.is_jmp = DISAS_NORETURN; } @@ -4224,6 +3670,7 @@ static void gen_bcond(DisasContext *ctx, int type) uint32_t bo = BO(ctx->opcode); TCGLabel *l1; TCGv target; + target_long bhrb_type = BHRB_TYPE_OTHER; if (type == BCOND_LR || type == BCOND_CTR || type == BCOND_TAR) { target = tcg_temp_new(); @@ -4234,11 +3681,16 @@ static void gen_bcond(DisasContext *ctx, int type) } else { tcg_gen_mov_tl(target, cpu_lr); } + if (!LK(ctx->opcode)) { + bhrb_type |= BHRB_TYPE_INDIRECT; + } + bhrb_type |= BHRB_TYPE_XL_FORM; } else { target = NULL; } if (LK(ctx->opcode)) { gen_setlr(ctx, ctx->base.pc_next); + bhrb_type |= BHRB_TYPE_CALL; } l1 = gen_new_label(); if ((bo & 0x4) == 0) { @@ -4289,6 +3741,7 @@ static void gen_bcond(DisasContext *ctx, int type) tcg_gen_brcondi_tl(TCG_COND_EQ, temp, 0, l1); } } + bhrb_type |= BHRB_TYPE_COND; } if ((bo & 0x10) == 0) { /* Test CR */ @@ -4303,8 +3756,11 @@ static void gen_bcond(DisasContext *ctx, int type) tcg_gen_andi_i32(temp, cpu_crf[bi >> 2], mask); tcg_gen_brcondi_i32(TCG_COND_NE, temp, 0, l1); } + bhrb_type |= BHRB_TYPE_COND; } - gen_update_cfar(ctx, ctx->cia); + + gen_update_branch_history(ctx, ctx->cia, target, bhrb_type); + if (type == BCOND_IM) { target_ulong li = (target_long)((int16_t)(BD(ctx->opcode))); if (likely(AA(ctx->opcode) == 0)) { @@ -4420,7 +3876,7 @@ static void gen_rfi(DisasContext *ctx) /* Restore CPU state */ CHK_SV(ctx); translator_io_start(&ctx->base); - gen_update_cfar(ctx, ctx->cia); + gen_update_branch_history(ctx, ctx->cia, NULL, BHRB_TYPE_NORECORD); gen_helper_rfi(tcg_env); ctx->base.is_jmp = DISAS_EXIT; #endif @@ -4435,7 +3891,7 @@ static void gen_rfid(DisasContext *ctx) /* Restore CPU state */ CHK_SV(ctx); translator_io_start(&ctx->base); - gen_update_cfar(ctx, ctx->cia); + gen_update_branch_history(ctx, ctx->cia, NULL, BHRB_TYPE_NORECORD); gen_helper_rfid(tcg_env); ctx->base.is_jmp = DISAS_EXIT; #endif @@ -4450,7 +3906,7 @@ static void gen_rfscv(DisasContext *ctx) /* Restore CPU state */ CHK_SV(ctx); translator_io_start(&ctx->base); - gen_update_cfar(ctx, ctx->cia); + gen_update_branch_history(ctx, ctx->cia, NULL, BHRB_TYPE_NORECORD); gen_helper_rfscv(tcg_env); ctx->base.is_jmp = DISAS_EXIT; #endif @@ -4508,76 +3964,20 @@ static void gen_scv(DisasContext *ctx) /*** Trap ***/ /* Check for unconditional traps (always or never) */ -static bool check_unconditional_trap(DisasContext *ctx) +static bool check_unconditional_trap(DisasContext *ctx, int to) { /* Trap never */ - if (TO(ctx->opcode) == 0) { + if (to == 0) { return true; } /* Trap always */ - if (TO(ctx->opcode) == 31) { + if (to == 31) { gen_exception_err(ctx, POWERPC_EXCP_PROGRAM, POWERPC_EXCP_TRAP); return true; } return false; } -/* tw */ -static void gen_tw(DisasContext *ctx) -{ - TCGv_i32 t0; - - if (check_unconditional_trap(ctx)) { - return; - } - t0 = tcg_constant_i32(TO(ctx->opcode)); - gen_helper_tw(tcg_env, cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)], - t0); -} - -/* twi */ -static void gen_twi(DisasContext *ctx) -{ - TCGv t0; - TCGv_i32 t1; - - if (check_unconditional_trap(ctx)) { - return; - } - t0 = tcg_constant_tl(SIMM(ctx->opcode)); - t1 = tcg_constant_i32(TO(ctx->opcode)); - gen_helper_tw(tcg_env, cpu_gpr[rA(ctx->opcode)], t0, t1); -} - -#if defined(TARGET_PPC64) -/* td */ -static void gen_td(DisasContext *ctx) -{ - TCGv_i32 t0; - - if (check_unconditional_trap(ctx)) { - return; - } - t0 = tcg_constant_i32(TO(ctx->opcode)); - gen_helper_td(tcg_env, cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)], - t0); -} - -/* tdi */ -static void gen_tdi(DisasContext *ctx) -{ - TCGv t0; - TCGv_i32 t1; - - if (check_unconditional_trap(ctx)) { - return; - } - t0 = tcg_constant_tl(SIMM(ctx->opcode)); - t1 = tcg_constant_i32(TO(ctx->opcode)); - gen_helper_td(tcg_env, cpu_gpr[rA(ctx->opcode)], t0, t1); -} -#endif - /*** Processor control ***/ /* mcrxr */ @@ -6010,23 +5410,6 @@ static void gen_dlmzb(DisasContext *ctx) cpu_gpr[rS(ctx->opcode)], cpu_gpr[rB(ctx->opcode)], t0); } -/* mbar replaces eieio on 440 */ -static void gen_mbar(DisasContext *ctx) -{ - /* interpreted as no-op */ -} - -/* msync replaces sync on 440 */ -static void gen_msync_4xx(DisasContext *ctx) -{ - /* Only e500 seems to treat reserved bits as invalid */ - if ((ctx->insns_flags2 & PPC2_BOOKE206) && - (ctx->opcode & 0x03FFF801)) { - gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL); - } - /* otherwise interpreted as no-op */ -} - /* icbt */ static void gen_icbt_440(DisasContext *ctx) { @@ -6037,36 +5420,6 @@ static void gen_icbt_440(DisasContext *ctx) */ } -#if defined(TARGET_PPC64) -static void gen_maddld(DisasContext *ctx) -{ - TCGv_i64 t1 = tcg_temp_new_i64(); - - tcg_gen_mul_i64(t1, cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]); - tcg_gen_add_i64(cpu_gpr[rD(ctx->opcode)], t1, cpu_gpr[rC(ctx->opcode)]); -} - -/* maddhd maddhdu */ -static void gen_maddhd_maddhdu(DisasContext *ctx) -{ - TCGv_i64 lo = tcg_temp_new_i64(); - TCGv_i64 hi = tcg_temp_new_i64(); - TCGv_i64 t1 = tcg_temp_new_i64(); - - if (Rc(ctx->opcode)) { - tcg_gen_mulu2_i64(lo, hi, cpu_gpr[rA(ctx->opcode)], - cpu_gpr[rB(ctx->opcode)]); - tcg_gen_movi_i64(t1, 0); - } else { - tcg_gen_muls2_i64(lo, hi, cpu_gpr[rA(ctx->opcode)], - cpu_gpr[rB(ctx->opcode)]); - tcg_gen_sari_i64(t1, cpu_gpr[rC(ctx->opcode)], 63); - } - tcg_gen_add2_i64(t1, cpu_gpr[rD(ctx->opcode)], lo, hi, - cpu_gpr[rC(ctx->opcode)], t1); -} -#endif /* defined(TARGET_PPC64) */ - static void gen_tbegin(DisasContext *ctx) { if (unlikely(!ctx->tm_enabled)) { @@ -6364,6 +5717,10 @@ static bool resolve_PLS_D(DisasContext *ctx, arg_D *d, arg_PLS_D *a) #include "translate/storage-ctrl-impl.c.inc" +#include "translate/misc-impl.c.inc" + +#include "translate/bhrb-impl.c.inc" + /* Handles lfdp */ static void gen_dform39(DisasContext *ctx) { @@ -6424,46 +5781,9 @@ GEN_HANDLER_E(brw, 0x1F, 0x1B, 0x04, 0x0000F801, PPC_NONE, PPC2_ISA310), GEN_HANDLER_E(brh, 0x1F, 0x1B, 0x06, 0x0000F801, PPC_NONE, PPC2_ISA310), #endif GEN_HANDLER(invalid, 0x00, 0x00, 0x00, 0xFFFFFFFF, PPC_NONE), -#if defined(TARGET_PPC64) -GEN_HANDLER_E(cmpeqb, 0x1F, 0x00, 0x07, 0x00600000, PPC_NONE, PPC2_ISA300), -#endif -GEN_HANDLER_E(cmpb, 0x1F, 0x1C, 0x0F, 0x00000001, PPC_NONE, PPC2_ISA205), -GEN_HANDLER_E(cmprb, 0x1F, 0x00, 0x06, 0x00400001, PPC_NONE, PPC2_ISA300), -GEN_HANDLER(isel, 0x1F, 0x0F, 0xFF, 0x00000001, PPC_ISEL), -GEN_HANDLER(mulhw, 0x1F, 0x0B, 0x02, 0x00000400, PPC_INTEGER), -GEN_HANDLER(mulhwu, 0x1F, 0x0B, 0x00, 0x00000400, PPC_INTEGER), -GEN_HANDLER(mullw, 0x1F, 0x0B, 0x07, 0x00000000, PPC_INTEGER), -GEN_HANDLER(mullwo, 0x1F, 0x0B, 0x17, 0x00000000, PPC_INTEGER), -GEN_HANDLER(mulli, 0x07, 0xFF, 0xFF, 0x00000000, PPC_INTEGER), -#if defined(TARGET_PPC64) -GEN_HANDLER(mulld, 0x1F, 0x09, 0x07, 0x00000000, PPC_64B), -#endif -GEN_HANDLER(neg, 0x1F, 0x08, 0x03, 0x0000F800, PPC_INTEGER), -GEN_HANDLER(nego, 0x1F, 0x08, 0x13, 0x0000F800, PPC_INTEGER), -GEN_HANDLER2(andi_, "andi.", 0x1C, 0xFF, 0xFF, 0x00000000, PPC_INTEGER), -GEN_HANDLER2(andis_, "andis.", 0x1D, 0xFF, 0xFF, 0x00000000, PPC_INTEGER), -GEN_HANDLER(cntlzw, 0x1F, 0x1A, 0x00, 0x00000000, PPC_INTEGER), -GEN_HANDLER_E(cnttzw, 0x1F, 0x1A, 0x10, 0x00000000, PPC_NONE, PPC2_ISA300), GEN_HANDLER_E(copy, 0x1F, 0x06, 0x18, 0x03C00001, PPC_NONE, PPC2_ISA300), GEN_HANDLER_E(cp_abort, 0x1F, 0x06, 0x1A, 0x03FFF801, PPC_NONE, PPC2_ISA300), GEN_HANDLER_E(paste, 0x1F, 0x06, 0x1C, 0x03C00000, PPC_NONE, PPC2_ISA300), -GEN_HANDLER(or, 0x1F, 0x1C, 0x0D, 0x00000000, PPC_INTEGER), -GEN_HANDLER(xor, 0x1F, 0x1C, 0x09, 0x00000000, PPC_INTEGER), -GEN_HANDLER(ori, 0x18, 0xFF, 0xFF, 0x00000000, PPC_INTEGER), -GEN_HANDLER(oris, 0x19, 0xFF, 0xFF, 0x00000000, PPC_INTEGER), -GEN_HANDLER(xori, 0x1A, 0xFF, 0xFF, 0x00000000, PPC_INTEGER), -GEN_HANDLER(xoris, 0x1B, 0xFF, 0xFF, 0x00000000, PPC_INTEGER), -GEN_HANDLER(popcntb, 0x1F, 0x1A, 0x03, 0x0000F801, PPC_POPCNTB), -GEN_HANDLER(popcntw, 0x1F, 0x1A, 0x0b, 0x0000F801, PPC_POPCNTWD), -GEN_HANDLER_E(prtyw, 0x1F, 0x1A, 0x04, 0x0000F801, PPC_NONE, PPC2_ISA205), -#if defined(TARGET_PPC64) -GEN_HANDLER(popcntd, 0x1F, 0x1A, 0x0F, 0x0000F801, PPC_POPCNTWD), -GEN_HANDLER(cntlzd, 0x1F, 0x1A, 0x01, 0x00000000, PPC_64B), -GEN_HANDLER_E(cnttzd, 0x1F, 0x1A, 0x11, 0x00000000, PPC_NONE, PPC2_ISA300), -GEN_HANDLER_E(darn, 0x1F, 0x13, 0x17, 0x001CF801, PPC_NONE, PPC2_ISA300), -GEN_HANDLER_E(prtyd, 0x1F, 0x1A, 0x05, 0x0000F801, PPC_NONE, PPC2_ISA205), -GEN_HANDLER_E(bpermd, 0x1F, 0x1C, 0x07, 0x00000001, PPC_NONE, PPC2_PERM_ISA206), -#endif GEN_HANDLER(rlwimi, 0x14, 0xFF, 0xFF, 0x00000000, PPC_INTEGER), GEN_HANDLER(rlwinm, 0x15, 0xFF, 0xFF, 0x00000000, PPC_INTEGER), GEN_HANDLER(rlwnm, 0x17, 0xFF, 0xFF, 0x00000000, PPC_INTEGER), @@ -6492,7 +5812,6 @@ GEN_HANDLER(lswi, 0x1F, 0x15, 0x12, 0x00000001, PPC_STRING), GEN_HANDLER(lswx, 0x1F, 0x15, 0x10, 0x00000001, PPC_STRING), GEN_HANDLER(stswi, 0x1F, 0x15, 0x16, 0x00000001, PPC_STRING), GEN_HANDLER(stswx, 0x1F, 0x15, 0x14, 0x00000001, PPC_STRING), -GEN_HANDLER(eieio, 0x1F, 0x16, 0x1A, 0x01FFF801, PPC_MEM_EIEIO), GEN_HANDLER(isync, 0x13, 0x16, 0x04, 0x03FFF801, PPC_MEM), GEN_HANDLER_E(lbarx, 0x1F, 0x14, 0x01, 0, PPC_NONE, PPC2_ATOMIC_ISA206), GEN_HANDLER_E(lharx, 0x1F, 0x14, 0x03, 0, PPC_NONE, PPC2_ATOMIC_ISA206), @@ -6510,7 +5829,6 @@ GEN_HANDLER_E(lqarx, 0x1F, 0x14, 0x08, 0, PPC_NONE, PPC2_LSQ_ISA207), GEN_HANDLER2(stdcx_, "stdcx.", 0x1F, 0x16, 0x06, 0x00000000, PPC_64B), GEN_HANDLER_E(stqcx_, 0x1F, 0x16, 0x05, 0, PPC_NONE, PPC2_LSQ_ISA207), #endif -GEN_HANDLER(sync, 0x1F, 0x16, 0x12, 0x039FF801, PPC_MEM_SYNC), /* ISA v3.0 changed the extended opcode from 62 to 30 */ GEN_HANDLER(wait, 0x1F, 0x1E, 0x01, 0x039FF801, PPC_WAIT), GEN_HANDLER_E(wait, 0x1F, 0x1E, 0x00, 0x039CF801, PPC_NONE, PPC2_ISA300), @@ -6539,12 +5857,6 @@ GEN_HANDLER(hrfid, 0x13, 0x12, 0x08, 0x03FF8001, PPC_64H), /* Top bit of opc2 corresponds with low bit of LEV, so use two handlers */ GEN_HANDLER(sc, 0x11, 0x11, 0xFF, 0x03FFF01D, PPC_FLOW), GEN_HANDLER(sc, 0x11, 0x01, 0xFF, 0x03FFF01D, PPC_FLOW), -GEN_HANDLER(tw, 0x1F, 0x04, 0x00, 0x00000001, PPC_FLOW), -GEN_HANDLER(twi, 0x03, 0xFF, 0xFF, 0x00000000, PPC_FLOW), -#if defined(TARGET_PPC64) -GEN_HANDLER(td, 0x1F, 0x04, 0x02, 0x00000001, PPC_64B), -GEN_HANDLER(tdi, 0x02, 0xFF, 0xFF, 0x00000000, PPC_64B), -#endif GEN_HANDLER(mcrxr, 0x1F, 0x00, 0x10, 0x007FF801, PPC_MISC), GEN_HANDLER(mfcr, 0x1F, 0x13, 0x00, 0x00000801, PPC_MISC), GEN_HANDLER(mfmsr, 0x1F, 0x13, 0x02, 0x001FF801, PPC_MISC), @@ -6633,78 +5945,12 @@ GEN_HANDLER2_E(tlbilx_booke206, "tlbilx", 0x1F, 0x12, 0x00, 0x03800001, GEN_HANDLER(wrtee, 0x1F, 0x03, 0x04, 0x000FFC01, PPC_WRTEE), GEN_HANDLER(wrteei, 0x1F, 0x03, 0x05, 0x000E7C01, PPC_WRTEE), GEN_HANDLER(dlmzb, 0x1F, 0x0E, 0x02, 0x00000000, PPC_440_SPEC), -GEN_HANDLER_E(mbar, 0x1F, 0x16, 0x1a, 0x001FF801, - PPC_BOOKE, PPC2_BOOKE206), -GEN_HANDLER(msync_4xx, 0x1F, 0x16, 0x12, 0x039FF801, PPC_BOOKE), GEN_HANDLER2_E(icbt_440, "icbt", 0x1F, 0x16, 0x00, 0x03E00001, PPC_BOOKE, PPC2_BOOKE206), GEN_HANDLER2(icbt_440, "icbt", 0x1F, 0x06, 0x08, 0x03E00001, PPC_440_SPEC), -GEN_HANDLER(lvsl, 0x1f, 0x06, 0x00, 0x00000001, PPC_ALTIVEC), -GEN_HANDLER(lvsr, 0x1f, 0x06, 0x01, 0x00000001, PPC_ALTIVEC), GEN_HANDLER(mfvscr, 0x04, 0x2, 0x18, 0x001ff800, PPC_ALTIVEC), GEN_HANDLER(mtvscr, 0x04, 0x2, 0x19, 0x03ff0000, PPC_ALTIVEC), -#if defined(TARGET_PPC64) -GEN_HANDLER_E(maddhd_maddhdu, 0x04, 0x18, 0xFF, 0x00000000, PPC_NONE, - PPC2_ISA300), -GEN_HANDLER_E(maddld, 0x04, 0x19, 0xFF, 0x00000000, PPC_NONE, PPC2_ISA300), -#endif - -#undef GEN_INT_ARITH_DIVW -#define GEN_INT_ARITH_DIVW(name, opc3, sign, compute_ov) \ -GEN_HANDLER(name, 0x1F, 0x0B, opc3, 0x00000000, PPC_INTEGER) -GEN_INT_ARITH_DIVW(divwu, 0x0E, 0, 0), -GEN_INT_ARITH_DIVW(divwuo, 0x1E, 0, 1), -GEN_INT_ARITH_DIVW(divw, 0x0F, 1, 0), -GEN_INT_ARITH_DIVW(divwo, 0x1F, 1, 1), -GEN_HANDLER_E(divwe, 0x1F, 0x0B, 0x0D, 0, PPC_NONE, PPC2_DIVE_ISA206), -GEN_HANDLER_E(divweo, 0x1F, 0x0B, 0x1D, 0, PPC_NONE, PPC2_DIVE_ISA206), -GEN_HANDLER_E(divweu, 0x1F, 0x0B, 0x0C, 0, PPC_NONE, PPC2_DIVE_ISA206), -GEN_HANDLER_E(divweuo, 0x1F, 0x0B, 0x1C, 0, PPC_NONE, PPC2_DIVE_ISA206), -GEN_HANDLER_E(modsw, 0x1F, 0x0B, 0x18, 0x00000001, PPC_NONE, PPC2_ISA300), -GEN_HANDLER_E(moduw, 0x1F, 0x0B, 0x08, 0x00000001, PPC_NONE, PPC2_ISA300), - -#if defined(TARGET_PPC64) -#undef GEN_INT_ARITH_DIVD -#define GEN_INT_ARITH_DIVD(name, opc3, sign, compute_ov) \ -GEN_HANDLER(name, 0x1F, 0x09, opc3, 0x00000000, PPC_64B) -GEN_INT_ARITH_DIVD(divdu, 0x0E, 0, 0), -GEN_INT_ARITH_DIVD(divduo, 0x1E, 0, 1), -GEN_INT_ARITH_DIVD(divd, 0x0F, 1, 0), -GEN_INT_ARITH_DIVD(divdo, 0x1F, 1, 1), - -GEN_HANDLER_E(divdeu, 0x1F, 0x09, 0x0C, 0, PPC_NONE, PPC2_DIVE_ISA206), -GEN_HANDLER_E(divdeuo, 0x1F, 0x09, 0x1C, 0, PPC_NONE, PPC2_DIVE_ISA206), -GEN_HANDLER_E(divde, 0x1F, 0x09, 0x0D, 0, PPC_NONE, PPC2_DIVE_ISA206), -GEN_HANDLER_E(divdeo, 0x1F, 0x09, 0x1D, 0, PPC_NONE, PPC2_DIVE_ISA206), -GEN_HANDLER_E(modsd, 0x1F, 0x09, 0x18, 0x00000001, PPC_NONE, PPC2_ISA300), -GEN_HANDLER_E(modud, 0x1F, 0x09, 0x08, 0x00000001, PPC_NONE, PPC2_ISA300), - -#undef GEN_INT_ARITH_MUL_HELPER -#define GEN_INT_ARITH_MUL_HELPER(name, opc3) \ -GEN_HANDLER(name, 0x1F, 0x09, opc3, 0x00000000, PPC_64B) -GEN_INT_ARITH_MUL_HELPER(mulhdu, 0x00), -GEN_INT_ARITH_MUL_HELPER(mulhd, 0x02), -GEN_INT_ARITH_MUL_HELPER(mulldo, 0x17), -#endif - -#undef GEN_LOGICAL1 -#undef GEN_LOGICAL2 -#define GEN_LOGICAL2(name, tcg_op, opc, type) \ -GEN_HANDLER(name, 0x1F, 0x1C, opc, 0x00000000, type) -#define GEN_LOGICAL1(name, tcg_op, opc, type) \ -GEN_HANDLER(name, 0x1F, 0x1A, opc, 0x00000000, type) -GEN_LOGICAL2(and, tcg_gen_and_tl, 0x00, PPC_INTEGER), -GEN_LOGICAL2(andc, tcg_gen_andc_tl, 0x01, PPC_INTEGER), -GEN_LOGICAL2(eqv, tcg_gen_eqv_tl, 0x08, PPC_INTEGER), -GEN_LOGICAL1(extsb, tcg_gen_ext8s_tl, 0x1D, PPC_INTEGER), -GEN_LOGICAL1(extsh, tcg_gen_ext16s_tl, 0x1C, PPC_INTEGER), -GEN_LOGICAL2(nand, tcg_gen_nand_tl, 0x0E, PPC_INTEGER), -GEN_LOGICAL2(nor, tcg_gen_nor_tl, 0x03, PPC_INTEGER), -GEN_LOGICAL2(orc, tcg_gen_orc_tl, 0x0C, PPC_INTEGER), -#if defined(TARGET_PPC64) -GEN_LOGICAL1(extsw, tcg_gen_ext32s_tl, 0x1E, PPC_64B), -#endif #if defined(TARGET_PPC64) #undef GEN_PPC64_R2 @@ -7242,6 +6488,7 @@ static void ppc_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) #if defined(TARGET_PPC64) ctx->sf_mode = (hflags >> HFLAGS_64) & 1; ctx->has_cfar = !!(env->flags & POWERPC_FLAG_CFAR); + ctx->has_bhrb = !!(env->flags & POWERPC_FLAG_BHRB); #endif ctx->lazy_tlb_flush = env->mmu_model == POWERPC_MMU_32B || env->mmu_model & POWERPC_MMU_64; @@ -7258,6 +6505,7 @@ static void ppc_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) ctx->mmcr0_pmcjce = (hflags >> HFLAGS_PMCJCE) & 1; ctx->pmc_other = (hflags >> HFLAGS_PMC_OTHER) & 1; ctx->pmu_insn_cnt = (hflags >> HFLAGS_INSN_CNT) & 1; + ctx->bhrb_enable = (hflags >> HFLAGS_BHRB_ENABLE) & 1; ctx->singlestep_enabled = 0; if ((hflags >> HFLAGS_SE) & 1) { @@ -7405,20 +6653,12 @@ static void ppc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs) } } -static void ppc_tr_disas_log(const DisasContextBase *dcbase, - CPUState *cs, FILE *logfile) -{ - fprintf(logfile, "IN: %s\n", lookup_symbol(dcbase->pc_first)); - target_disas(logfile, cs, dcbase->pc_first, dcbase->tb->size); -} - static const TranslatorOps ppc_tr_ops = { .init_disas_context = ppc_tr_init_disas_context, .tb_start = ppc_tr_tb_start, .insn_start = ppc_tr_insn_start, .translate_insn = ppc_tr_translate_insn, .tb_stop = ppc_tr_tb_stop, - .disas_log = ppc_tr_disas_log, }; void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns, diff --git a/target/ppc/translate/bhrb-impl.c.inc b/target/ppc/translate/bhrb-impl.c.inc new file mode 100644 index 0000000000..3a19bc4555 --- /dev/null +++ b/target/ppc/translate/bhrb-impl.c.inc @@ -0,0 +1,43 @@ +/* + * Power ISA Decode For BHRB Instructions + * + * Copyright IBM Corp. 2023 + * + * Authors: + * Glenn Miles <milesg@linux.vnet.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY) + +static bool trans_MFBHRBE(DisasContext *ctx, arg_XFX_bhrbe *arg) +{ + REQUIRE_INSNS_FLAGS2(ctx, ISA207S); + TCGv_i32 bhrbe = tcg_constant_i32(arg->bhrbe); + gen_helper_mfbhrbe(cpu_gpr[arg->rt], tcg_env, bhrbe); + return true; +} + +static bool trans_CLRBHRB(DisasContext *ctx, arg_CLRBHRB *arg) +{ + REQUIRE_INSNS_FLAGS2(ctx, ISA207S); + gen_helper_clrbhrb(tcg_env); + return true; +} + +#else + +static bool trans_MFBHRBE(DisasContext *ctx, arg_XFX_bhrbe *arg) +{ + gen_invalid(ctx); + return true; +} + +static bool trans_CLRBHRB(DisasContext *ctx, arg_CLRBHRB *arg) +{ + gen_invalid(ctx); + return true; +} +#endif diff --git a/target/ppc/translate/branch-impl.c.inc b/target/ppc/translate/branch-impl.c.inc index fb0fcf30cc..9ade0c659a 100644 --- a/target/ppc/translate/branch-impl.c.inc +++ b/target/ppc/translate/branch-impl.c.inc @@ -17,7 +17,7 @@ static bool trans_RFEBB(DisasContext *ctx, arg_XL_s *arg) REQUIRE_INSNS_FLAGS2(ctx, ISA207S); translator_io_start(&ctx->base); - gen_update_cfar(ctx, ctx->cia); + gen_update_branch_history(ctx, ctx->cia, NULL, BHRB_TYPE_NORECORD); gen_helper_rfebb(tcg_env, cpu_gpr[arg->s]); ctx->base.is_jmp = DISAS_CHAIN; diff --git a/target/ppc/translate/fixedpoint-impl.c.inc b/target/ppc/translate/fixedpoint-impl.c.inc index 0c66465d96..fa0191e866 100644 --- a/target/ppc/translate/fixedpoint-impl.c.inc +++ b/target/ppc/translate/fixedpoint-impl.c.inc @@ -289,6 +289,50 @@ TRANS(CMPL, do_cmp_X, false); TRANS(CMPI, do_cmp_D, true); TRANS(CMPLI, do_cmp_D, false); +static bool trans_CMPRB(DisasContext *ctx, arg_CMPRB *a) +{ + TCGv_i32 src1 = tcg_temp_new_i32(); + TCGv_i32 src2 = tcg_temp_new_i32(); + TCGv_i32 src2lo = tcg_temp_new_i32(); + TCGv_i32 src2hi = tcg_temp_new_i32(); + TCGv_i32 crf = cpu_crf[a->bf]; + + REQUIRE_INSNS_FLAGS2(ctx, ISA300); + tcg_gen_trunc_tl_i32(src1, cpu_gpr[a->ra]); + tcg_gen_trunc_tl_i32(src2, cpu_gpr[a->rb]); + + tcg_gen_andi_i32(src1, src1, 0xFF); + tcg_gen_ext8u_i32(src2lo, src2); + tcg_gen_extract_i32(src2hi, src2, 8, 8); + + tcg_gen_setcond_i32(TCG_COND_LEU, src2lo, src2lo, src1); + tcg_gen_setcond_i32(TCG_COND_LEU, src2hi, src1, src2hi); + tcg_gen_and_i32(crf, src2lo, src2hi); + + if (a->l) { + tcg_gen_extract_i32(src2lo, src2, 16, 8); + tcg_gen_extract_i32(src2hi, src2, 24, 8); + tcg_gen_setcond_i32(TCG_COND_LEU, src2lo, src2lo, src1); + tcg_gen_setcond_i32(TCG_COND_LEU, src2hi, src1, src2hi); + tcg_gen_and_i32(src2lo, src2lo, src2hi); + tcg_gen_or_i32(crf, crf, src2lo); + } + tcg_gen_shli_i32(crf, crf, CRF_GT_BIT); + return true; +} + +static bool trans_CMPEQB(DisasContext *ctx, arg_CMPEQB *a) +{ + REQUIRE_64BIT(ctx); + REQUIRE_INSNS_FLAGS2(ctx, ISA300); +#if defined(TARGET_PPC64) + gen_helper_CMPEQB(cpu_crf[a->bf], cpu_gpr[a->ra], cpu_gpr[a->rb]); +#else + qemu_build_not_reached(); +#endif + return true; +} + /* * Fixed-Point Arithmetic Instructions */ @@ -395,6 +439,389 @@ TRANS(SUBFE, do_subf_XO, true, true) TRANS(SUBFME, do_subf_const_XO, tcg_constant_tl(-1LL), true, true) TRANS(SUBFZE, do_subf_const_XO, tcg_constant_tl(0), true, true) +static bool trans_MULLI(DisasContext *ctx, arg_MULLI *a) +{ + tcg_gen_muli_tl(cpu_gpr[a->rt], cpu_gpr[a->ra], a->si); + return true; +} + +static bool trans_MULLW(DisasContext *ctx, arg_MULLW *a) +{ + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + + tcg_gen_ext32s_tl(t0, cpu_gpr[a->ra]); + tcg_gen_ext32s_tl(t1, cpu_gpr[a->rb]); + tcg_gen_mul_tl(cpu_gpr[a->rt], t0, t1); + if (unlikely(a->rc)) { + gen_set_Rc0(ctx, cpu_gpr[a->rt]); + } + return true; +} + +static bool trans_MULLWO(DisasContext *ctx, arg_MULLWO *a) +{ + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + +#if defined(TARGET_PPC64) + tcg_gen_ext32s_i64(t0, cpu_gpr[a->ra]); + tcg_gen_ext32s_i64(t1, cpu_gpr[a->rb]); + tcg_gen_mul_i64(cpu_gpr[a->rt], t0, t1); + tcg_gen_sextract_i64(t0, cpu_gpr[a->rt], 31, 1); + tcg_gen_sari_i64(t1, cpu_gpr[a->rt], 32); +#else + tcg_gen_muls2_i32(cpu_gpr[a->rt], t1, cpu_gpr[a->ra], cpu_gpr[a->rb]); + tcg_gen_sari_i32(t0, cpu_gpr[a->rt], 31); +#endif + tcg_gen_setcond_tl(TCG_COND_NE, cpu_ov, t0, t1); + if (is_isa300(ctx)) { + tcg_gen_mov_tl(cpu_ov32, cpu_ov); + } + tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov); + + if (unlikely(a->rc)) { + gen_set_Rc0(ctx, cpu_gpr[a->rt]); + } + return true; +} + +static bool do_mulhw(DisasContext *ctx, arg_XO_tab_rc *a, + void (*helper)(TCGv_i32 rl, TCGv_i32 rh, TCGv_i32 arg1, + TCGv_i32 arg2)) +{ + TCGv_i32 t0 = tcg_temp_new_i32(); + TCGv_i32 t1 = tcg_temp_new_i32(); + tcg_gen_trunc_tl_i32(t0, cpu_gpr[a->ra]); + tcg_gen_trunc_tl_i32(t1, cpu_gpr[a->rb]); + helper(t0, t1, t0, t1); + tcg_gen_extu_i32_tl(cpu_gpr[a->rt], t1); + if (unlikely(a->rc)) { + gen_set_Rc0(ctx, cpu_gpr[a->rt]); + } + return true; +} + +TRANS(MULHW, do_mulhw, tcg_gen_muls2_i32) +TRANS(MULHWU, do_mulhw, tcg_gen_mulu2_i32) + +static bool do_divw(DisasContext *ctx, arg_XO *a, int sign) +{ + gen_op_arith_divw(ctx, cpu_gpr[a->rt], cpu_gpr[a->ra], cpu_gpr[a->rb], + sign, a->oe, a->rc); + return true; +} + +static bool do_dive(DisasContext *ctx, arg_XO *a, + void (*helper)(TCGv, TCGv_ptr, TCGv, TCGv, TCGv_i32)) +{ + REQUIRE_INSNS_FLAGS2(ctx, DIVE_ISA206); + helper(cpu_gpr[a->rt], tcg_env, cpu_gpr[a->ra], cpu_gpr[a->rb], + tcg_constant_i32(a->oe)); + if (unlikely(a->rc)) { + gen_set_Rc0(ctx, cpu_gpr[a->rt]); + } + return true; +} + +TRANS(DIVW, do_divw, 1); +TRANS(DIVWU, do_divw, 0); +TRANS(DIVWE, do_dive, gen_helper_DIVWE); +TRANS(DIVWEU, do_dive, gen_helper_DIVWEU); + +static bool do_modw(DisasContext *ctx, arg_X *a, bool sign) +{ + REQUIRE_INSNS_FLAGS2(ctx, ISA300); + gen_op_arith_modw(ctx, cpu_gpr[a->rt], cpu_gpr[a->ra], cpu_gpr[a->rb], + sign); + return true; +} + +TRANS(MODUW, do_modw, false); +TRANS(MODSW, do_modw, true); + +static bool trans_NEG(DisasContext *ctx, arg_NEG *a) +{ + if (a->oe) { + TCGv zero = tcg_constant_tl(0); + gen_op_arith_subf(ctx, cpu_gpr[a->rt], cpu_gpr[a->ra], zero, + false, false, true, a->rc); + } else { + tcg_gen_neg_tl(cpu_gpr[a->rt], cpu_gpr[a->ra]); + if (unlikely(a->rc)) { + gen_set_Rc0(ctx, cpu_gpr[a->rt]); + } + } + return true; +} + +static bool trans_DARN(DisasContext *ctx, arg_DARN *a) +{ + REQUIRE_64BIT(ctx); + REQUIRE_INSNS_FLAGS2(ctx, ISA300); +#if defined(TARGET_PPC64) + if (a->l > 2) { + tcg_gen_movi_i64(cpu_gpr[a->rt], -1); + } else { + translator_io_start(&ctx->base); + if (a->l == 0) { + gen_helper_DARN32(cpu_gpr[a->rt]); + } else { + /* Return 64-bit random for both CRN and RRN */ + gen_helper_DARN64(cpu_gpr[a->rt]); + } + } +#else + qemu_build_not_reached(); +#endif + return true; +} + +static bool trans_MULLD(DisasContext *ctx, arg_MULLD *a) +{ + REQUIRE_64BIT(ctx); +#if defined(TARGET_PPC64) + tcg_gen_mul_tl(cpu_gpr[a->rt], cpu_gpr[a->ra], cpu_gpr[a->rb]); + if (unlikely(a->rc)) { + gen_set_Rc0(ctx, cpu_gpr[a->rt]); + } +#else + qemu_build_not_reached(); +#endif + return true; +} + +static bool trans_MULLDO(DisasContext *ctx, arg_MULLD *a) +{ + REQUIRE_64BIT(ctx); +#if defined(TARGET_PPC64) + TCGv_i64 t0 = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + + tcg_gen_muls2_i64(t0, t1, cpu_gpr[a->ra], cpu_gpr[a->rb]); + tcg_gen_mov_i64(cpu_gpr[a->rt], t0); + + tcg_gen_sari_i64(t0, t0, 63); + tcg_gen_setcond_i64(TCG_COND_NE, cpu_ov, t0, t1); + if (is_isa300(ctx)) { + tcg_gen_mov_tl(cpu_ov32, cpu_ov); + } + tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov); + + if (unlikely(a->rc)) { + gen_set_Rc0(ctx, cpu_gpr[a->rt]); + } +#else + qemu_build_not_reached(); +#endif + return true; +} + +static bool do_mulhd(DisasContext *ctx, arg_XO_tab_rc *a, + void (*helper)(TCGv, TCGv, TCGv, TCGv)) +{ + TCGv lo = tcg_temp_new(); + helper(lo, cpu_gpr[a->rt], cpu_gpr[a->ra], cpu_gpr[a->rb]); + if (unlikely(a->rc)) { + gen_set_Rc0(ctx, cpu_gpr[a->rt]); + } + return true; +} + +TRANS64(MULHD, do_mulhd, tcg_gen_muls2_tl); +TRANS64(MULHDU, do_mulhd, tcg_gen_mulu2_tl); + +static bool trans_MADDLD(DisasContext *ctx, arg_MADDLD *a) +{ + REQUIRE_64BIT(ctx); + REQUIRE_INSNS_FLAGS2(ctx, ISA300); +#if defined(TARGET_PPC64) + TCGv_i64 t1 = tcg_temp_new_i64(); + + tcg_gen_mul_i64(t1, cpu_gpr[a->vra], cpu_gpr[a->vrb]); + tcg_gen_add_i64(cpu_gpr[a->vrt], t1, cpu_gpr[a->rc]); +#else + qemu_build_not_reached(); +#endif + return true; +} + +static bool trans_MADDHD(DisasContext *ctx, arg_MADDHD *a) +{ + REQUIRE_64BIT(ctx); + REQUIRE_INSNS_FLAGS2(ctx, ISA300); +#if defined(TARGET_PPC64) + TCGv_i64 lo = tcg_temp_new_i64(); + TCGv_i64 hi = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + + tcg_gen_muls2_i64(lo, hi, cpu_gpr[a->vra], cpu_gpr[a->vrb]); + tcg_gen_sari_i64(t1, cpu_gpr[a->rc], 63); + tcg_gen_add2_i64(t1, cpu_gpr[a->vrt], lo, hi, cpu_gpr[a->rc], t1); +#else + qemu_build_not_reached(); +#endif + return true; +} + +static bool trans_MADDHDU(DisasContext *ctx, arg_MADDHDU *a) +{ + REQUIRE_64BIT(ctx); + REQUIRE_INSNS_FLAGS2(ctx, ISA300); +#if defined(TARGET_PPC64) + TCGv_i64 lo = tcg_temp_new_i64(); + TCGv_i64 hi = tcg_temp_new_i64(); + TCGv_i64 t1 = tcg_temp_new_i64(); + + tcg_gen_mulu2_i64(lo, hi, cpu_gpr[a->vra], cpu_gpr[a->vrb]); + tcg_gen_add2_i64(t1, cpu_gpr[a->vrt], lo, hi, cpu_gpr[a->rc], + tcg_constant_i64(0)); +#else + qemu_build_not_reached(); +#endif + return true; +} + +static bool do_divd(DisasContext *ctx, arg_XO *a, bool sign) +{ + REQUIRE_64BIT(ctx); +#if defined(TARGET_PPC64) + gen_op_arith_divd(ctx, cpu_gpr[a->rt], cpu_gpr[a->ra], cpu_gpr[a->rb], + sign, a->oe, a->rc); +#else + qemu_build_not_reached(); +#endif + return true; +} + +static bool do_modd(DisasContext *ctx, arg_X *a, bool sign) +{ + REQUIRE_64BIT(ctx); + REQUIRE_INSNS_FLAGS2(ctx, ISA300); +#if defined(TARGET_PPC64) + gen_op_arith_modd(ctx, cpu_gpr[a->rt], cpu_gpr[a->ra], cpu_gpr[a->rb], + sign); +#else + qemu_build_not_reached(); +#endif + return true; +} + +TRANS64(DIVD, do_divd, true); +TRANS64(DIVDU, do_divd, false); + +static bool trans_DIVDE(DisasContext *ctx, arg_DIVDE *a) +{ + REQUIRE_64BIT(ctx); +#if defined(TARGET_PPC64) + return do_dive(ctx, a, gen_helper_DIVDE); +#else + qemu_build_not_reached(); +#endif +} + +static bool trans_DIVDEU(DisasContext *ctx, arg_DIVDEU *a) +{ + REQUIRE_64BIT(ctx); +#if defined(TARGET_PPC64) + return do_dive(ctx, a, gen_helper_DIVDEU); +#else + qemu_build_not_reached(); +#endif + return true; +} + +TRANS64(MODSD, do_modd, true); +TRANS64(MODUD, do_modd, false); + +/* + * Fixed-Point Select Instructions + */ + +static bool trans_ISEL(DisasContext *ctx, arg_ISEL *a) +{ + REQUIRE_INSNS_FLAGS(ctx, ISEL); + uint32_t bi = a->bc; + uint32_t mask = 0x08 >> (bi & 0x03); + TCGv t0 = tcg_temp_new(); + TCGv zr; + + tcg_gen_extu_i32_tl(t0, cpu_crf[bi >> 2]); + tcg_gen_andi_tl(t0, t0, mask); + + zr = tcg_constant_tl(0); + tcg_gen_movcond_tl(TCG_COND_NE, cpu_gpr[a->rt], t0, zr, + a->ra ? cpu_gpr[a->ra] : zr, + cpu_gpr[a->rb]); + return true; +} + +/* + * Fixed-Point Trap Instructions + */ + +static bool trans_TW(DisasContext *ctx, arg_TW *a) +{ + TCGv_i32 t0; + + if (check_unconditional_trap(ctx, a->rt)) { + return true; + } + t0 = tcg_constant_i32(a->rt); + gen_helper_TW(tcg_env, cpu_gpr[a->ra], cpu_gpr[a->rb], t0); + return true; +} + +static bool trans_TWI(DisasContext *ctx, arg_TWI *a) +{ + TCGv t0; + TCGv_i32 t1; + + if (check_unconditional_trap(ctx, a->rt)) { + return true; + } + t0 = tcg_constant_tl(a->si); + t1 = tcg_constant_i32(a->rt); + gen_helper_TW(tcg_env, cpu_gpr[a->ra], t0, t1); + return true; +} + +static bool trans_TD(DisasContext *ctx, arg_TD *a) +{ + REQUIRE_64BIT(ctx); +#if defined(TARGET_PPC64) + TCGv_i32 t0; + + if (check_unconditional_trap(ctx, a->rt)) { + return true; + } + t0 = tcg_constant_i32(a->rt); + gen_helper_TD(tcg_env, cpu_gpr[a->ra], cpu_gpr[a->rb], t0); +#else + qemu_build_not_reached(); +#endif + return true; +} + +static bool trans_TDI(DisasContext *ctx, arg_TDI *a) +{ + REQUIRE_64BIT(ctx); +#if defined(TARGET_PPC64) + TCGv t0; + TCGv_i32 t1; + + if (check_unconditional_trap(ctx, a->rt)) { + return true; + } + t0 = tcg_constant_tl(a->si); + t1 = tcg_constant_i32(a->rt); + gen_helper_TD(tcg_env, cpu_gpr[a->ra], t0, t1); +#else + qemu_build_not_reached(); +#endif + return true; +} + static bool trans_INVALID(DisasContext *ctx, arg_INVALID *a) { gen_invalid(ctx); @@ -429,6 +856,285 @@ TRANS(SETBCR, do_set_bool_cond, false, true) TRANS(SETNBC, do_set_bool_cond, true, false) TRANS(SETNBCR, do_set_bool_cond, true, true) +/* + * Fixed-Point Logical Instructions + */ + +static bool do_addi_(DisasContext *ctx, arg_D_ui *a, bool shift) +{ + tcg_gen_andi_tl(cpu_gpr[a->ra], cpu_gpr[a->rt], shift ? a->ui << 16 : a->ui); + gen_set_Rc0(ctx, cpu_gpr[a->ra]); + return true; +} + +static bool do_ori(DisasContext *ctx, arg_D_ui *a, bool shift) +{ + if (a->rt == a->ra && a->ui == 0) { + /* NOP */ + return true; + } + tcg_gen_ori_tl(cpu_gpr[a->ra], cpu_gpr[a->rt], shift ? a->ui << 16 : a->ui); + return true; +} + +static bool do_xori(DisasContext *ctx, arg_D_ui *a, bool shift) +{ + if (a->rt == a->ra && a->ui == 0) { + /* NOP */ + return true; + } + tcg_gen_xori_tl(cpu_gpr[a->ra], cpu_gpr[a->rt], shift ? a->ui << 16 : a->ui); + return true; +} + +static bool do_logical1(DisasContext *ctx, arg_X_sa_rc *a, + void (*helper)(TCGv, TCGv)) +{ + helper(cpu_gpr[a->ra], cpu_gpr[a->rs]); + if (unlikely(a->rc)) { + gen_set_Rc0(ctx, cpu_gpr[a->ra]); + } + return true; +} + +static bool do_logical2(DisasContext *ctx, arg_X_rc *a, + void (*helper)(TCGv, TCGv, TCGv)) +{ + helper(cpu_gpr[a->ra], cpu_gpr[a->rt], cpu_gpr[a->rb]); + if (unlikely(a->rc)) { + gen_set_Rc0(ctx, cpu_gpr[a->ra]); + } + return true; +} + +static bool trans_OR(DisasContext *ctx, arg_OR *a) +{ + /* Optimisation for mr. ri case */ + if (a->rt != a->ra || a->rt != a->rb) { + if (a->rt != a->rb) { + tcg_gen_or_tl(cpu_gpr[a->ra], cpu_gpr[a->rt], cpu_gpr[a->rb]); + } else { + tcg_gen_mov_tl(cpu_gpr[a->ra], cpu_gpr[a->rt]); + } + if (unlikely(a->rc)) { + gen_set_Rc0(ctx, cpu_gpr[a->ra]); + } + } else if (unlikely(a->rc)) { + gen_set_Rc0(ctx, cpu_gpr[a->rt]); +#if defined(TARGET_PPC64) + } else if (a->rt != 0) { /* 0 is nop */ + int prio = 0; + + switch (a->rt) { + case 1: + /* Set process priority to low */ + prio = 2; + break; + case 6: + /* Set process priority to medium-low */ + prio = 3; + break; + case 2: + /* Set process priority to normal */ + prio = 4; + break; +#if !defined(CONFIG_USER_ONLY) + case 31: + if (!ctx->pr) { + /* Set process priority to very low */ + prio = 1; + } + break; + case 5: + if (!ctx->pr) { + /* Set process priority to medium-hight */ + prio = 5; + } + break; + case 3: + if (!ctx->pr) { + /* Set process priority to high */ + prio = 6; + } + break; + case 7: + if (ctx->hv && !ctx->pr) { + /* Set process priority to very high */ + prio = 7; + } + break; +#endif + default: + break; + } + if (prio) { + TCGv t0 = tcg_temp_new(); + gen_load_spr(t0, SPR_PPR); + tcg_gen_andi_tl(t0, t0, ~0x001C000000000000ULL); + tcg_gen_ori_tl(t0, t0, ((uint64_t)prio) << 50); + gen_store_spr(SPR_PPR, t0); + } +#if !defined(CONFIG_USER_ONLY) + /* + * Pause out of TCG otherwise spin loops with smt_low eat too + * much CPU and the kernel hangs. This applies to all + * encodings other than no-op, e.g., miso(rs=26), yield(27), + * mdoio(29), mdoom(30), and all currently undefined. + */ + gen_pause(ctx); +#endif +#endif + } + + return true; +} + +static bool trans_XOR(DisasContext *ctx, arg_XOR *a) +{ + /* Optimisation for "set to zero" case */ + if (a->rt != a->rb) { + tcg_gen_xor_tl(cpu_gpr[a->ra], cpu_gpr[a->rt], cpu_gpr[a->rb]); + } else { + tcg_gen_movi_tl(cpu_gpr[a->ra], 0); + } + if (unlikely(a->rc)) { + gen_set_Rc0(ctx, cpu_gpr[a->ra]); + } + return true; +} + +static bool trans_CMPB(DisasContext *ctx, arg_CMPB *a) +{ + REQUIRE_INSNS_FLAGS2(ctx, ISA205); + gen_helper_CMPB(cpu_gpr[a->ra], cpu_gpr[a->rt], cpu_gpr[a->rb]); + return true; +} + +static bool do_cntzw(DisasContext *ctx, arg_X_sa_rc *a, + void (*helper)(TCGv_i32, TCGv_i32, uint32_t)) +{ + TCGv_i32 t = tcg_temp_new_i32(); + + tcg_gen_trunc_tl_i32(t, cpu_gpr[a->rs]); + helper(t, t, 32); + tcg_gen_extu_i32_tl(cpu_gpr[a->ra], t); + + if (unlikely(a->rc)) { + gen_set_Rc0(ctx, cpu_gpr[a->ra]); + } + return true; +} + +#if defined(TARGET_PPC64) +static bool do_cntzd(DisasContext *ctx, arg_X_sa_rc *a, + void (*helper)(TCGv_i64, TCGv_i64, uint64_t)) +{ + helper(cpu_gpr[a->ra], cpu_gpr[a->rs], 64); + if (unlikely(a->rc)) { + gen_set_Rc0(ctx, cpu_gpr[a->ra]); + } + return true; +} +#endif + +static bool trans_CNTLZD(DisasContext *ctx, arg_CNTLZD *a) +{ + REQUIRE_64BIT(ctx); +#if defined(TARGET_PPC64) + do_cntzd(ctx, a, tcg_gen_clzi_i64); +#else + qemu_build_not_reached(); +#endif + return true; +} + +static bool trans_CNTTZD(DisasContext *ctx, arg_CNTTZD *a) +{ + REQUIRE_64BIT(ctx); + REQUIRE_INSNS_FLAGS2(ctx, ISA300); +#if defined(TARGET_PPC64) + do_cntzd(ctx, a, tcg_gen_ctzi_i64); +#else + qemu_build_not_reached(); +#endif + return true; +} + +static bool trans_POPCNTB(DisasContext *ctx, arg_POPCNTB *a) +{ + REQUIRE_INSNS_FLAGS(ctx, POPCNTB); + gen_helper_POPCNTB(cpu_gpr[a->ra], cpu_gpr[a->rs]); + return true; +} + +static bool trans_POPCNTW(DisasContext *ctx, arg_POPCNTW *a) +{ + REQUIRE_INSNS_FLAGS(ctx, POPCNTWD); +#if defined(TARGET_PPC64) + gen_helper_POPCNTW(cpu_gpr[a->ra], cpu_gpr[a->rs]); +#else + tcg_gen_ctpop_i32(cpu_gpr[a->ra], cpu_gpr[a->rs]); +#endif + return true; +} + +static bool trans_POPCNTD(DisasContext *ctx, arg_POPCNTD *a) +{ + REQUIRE_64BIT(ctx); + REQUIRE_INSNS_FLAGS(ctx, POPCNTWD); +#if defined(TARGET_PPC64) + tcg_gen_ctpop_i64(cpu_gpr[a->ra], cpu_gpr[a->rs]); +#else + qemu_build_not_reached(); +#endif + return true; +} + +static bool trans_PRTYW(DisasContext *ctx, arg_PRTYW *a) +{ + TCGv ra = cpu_gpr[a->ra]; + TCGv rs = cpu_gpr[a->rs]; + TCGv t0 = tcg_temp_new(); + + REQUIRE_INSNS_FLAGS2(ctx, ISA205); + tcg_gen_shri_tl(t0, rs, 16); + tcg_gen_xor_tl(ra, rs, t0); + tcg_gen_shri_tl(t0, ra, 8); + tcg_gen_xor_tl(ra, ra, t0); + tcg_gen_andi_tl(ra, ra, (target_ulong)0x100000001ULL); + return true; +} + +static bool trans_PRTYD(DisasContext *ctx, arg_PRTYD *a) +{ + TCGv ra = cpu_gpr[a->ra]; + TCGv rs = cpu_gpr[a->rs]; + TCGv t0 = tcg_temp_new(); + + REQUIRE_64BIT(ctx); + REQUIRE_INSNS_FLAGS2(ctx, ISA205); + tcg_gen_shri_tl(t0, rs, 32); + tcg_gen_xor_tl(ra, rs, t0); + tcg_gen_shri_tl(t0, ra, 16); + tcg_gen_xor_tl(ra, ra, t0); + tcg_gen_shri_tl(t0, ra, 8); + tcg_gen_xor_tl(ra, ra, t0); + tcg_gen_andi_tl(ra, ra, 1); + return true; +} + +static bool trans_BPERMD(DisasContext *ctx, arg_BPERMD *a) +{ + REQUIRE_64BIT(ctx); + REQUIRE_INSNS_FLAGS2(ctx, PERM_ISA206); +#if defined(TARGET_PPC64) + gen_helper_BPERMD(cpu_gpr[a->ra], cpu_gpr[a->rt], cpu_gpr[a->rb]); +#else + qemu_build_not_reached(); +#endif + return true; +} + static bool trans_CFUGED(DisasContext *ctx, arg_X *a) { REQUIRE_64BIT(ctx); @@ -517,6 +1223,27 @@ static bool trans_PEXTD(DisasContext *ctx, arg_X *a) return true; } +TRANS(ANDI_, do_addi_, false); +TRANS(ANDIS_, do_addi_, true); +TRANS(ORI, do_ori, false); +TRANS(ORIS, do_ori, true); +TRANS(XORI, do_xori, false); +TRANS(XORIS, do_xori, true); + +TRANS(AND, do_logical2, tcg_gen_and_tl); +TRANS(ANDC, do_logical2, tcg_gen_andc_tl); +TRANS(NAND, do_logical2, tcg_gen_nand_tl); +TRANS(ORC, do_logical2, tcg_gen_orc_tl); +TRANS(NOR, do_logical2, tcg_gen_nor_tl); +TRANS(EQV, do_logical2, tcg_gen_eqv_tl); +TRANS(EXTSB, do_logical1, tcg_gen_ext8s_tl); +TRANS(EXTSH, do_logical1, tcg_gen_ext16s_tl); + +TRANS(CNTLZW, do_cntzw, tcg_gen_clzi_i32); +TRANS_FLAGS2(ISA300, CNTTZW, do_cntzw, tcg_gen_ctzi_i32); + +TRANS64(EXTSW, do_logical1, tcg_gen_ext32s_tl); + static bool trans_ADDG6S(DisasContext *ctx, arg_X *a) { const target_ulong carry_bits = (target_ulong)-1 / 0xf; diff --git a/target/ppc/translate/fp-impl.c.inc b/target/ppc/translate/fp-impl.c.inc index 189cd8c979..a66b83398b 100644 --- a/target/ppc/translate/fp-impl.c.inc +++ b/target/ppc/translate/fp-impl.c.inc @@ -30,96 +30,73 @@ static void gen_set_cr1_from_fpscr(DisasContext *ctx) #endif /*** Floating-Point arithmetic ***/ -#define _GEN_FLOAT_ACB(name, op1, op2, set_fprf, type) \ -static void gen_f##name(DisasContext *ctx) \ -{ \ - TCGv_i64 t0; \ - TCGv_i64 t1; \ - TCGv_i64 t2; \ - TCGv_i64 t3; \ - if (unlikely(!ctx->fpu_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_FPU); \ - return; \ - } \ - t0 = tcg_temp_new_i64(); \ - t1 = tcg_temp_new_i64(); \ - t2 = tcg_temp_new_i64(); \ - t3 = tcg_temp_new_i64(); \ - gen_reset_fpstatus(); \ - get_fpr(t0, rA(ctx->opcode)); \ - get_fpr(t1, rC(ctx->opcode)); \ - get_fpr(t2, rB(ctx->opcode)); \ - gen_helper_f##name(t3, tcg_env, t0, t1, t2); \ - set_fpr(rD(ctx->opcode), t3); \ - if (set_fprf) { \ - gen_compute_fprf_float64(t3); \ - } \ - if (unlikely(Rc(ctx->opcode) != 0)) { \ - gen_set_cr1_from_fpscr(ctx); \ - } \ +static bool do_helper_acb(DisasContext *ctx, arg_A *a, + void (*helper)(TCGv_i64, TCGv_ptr, TCGv_i64, + TCGv_i64, TCGv_i64)) +{ + TCGv_i64 t0, t1, t2, t3; + REQUIRE_INSNS_FLAGS(ctx, FLOAT); + REQUIRE_FPU(ctx); + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + t2 = tcg_temp_new_i64(); + t3 = tcg_temp_new_i64(); + gen_reset_fpstatus(); + get_fpr(t0, a->fra); + get_fpr(t1, a->frc); + get_fpr(t2, a->frb); + helper(t3, tcg_env, t0, t1, t2); + set_fpr(a->frt, t3); + gen_compute_fprf_float64(t3); + if (unlikely(a->rc)) { + gen_set_cr1_from_fpscr(ctx); + } + return true; } -#define GEN_FLOAT_ACB(name, op2, set_fprf, type) \ -_GEN_FLOAT_ACB(name, 0x3F, op2, set_fprf, type); \ -_GEN_FLOAT_ACB(name##s, 0x3B, op2, set_fprf, type); - -#define _GEN_FLOAT_AB(name, op1, op2, inval, set_fprf, type) \ -static void gen_f##name(DisasContext *ctx) \ -{ \ - TCGv_i64 t0; \ - TCGv_i64 t1; \ - TCGv_i64 t2; \ - if (unlikely(!ctx->fpu_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_FPU); \ - return; \ - } \ - t0 = tcg_temp_new_i64(); \ - t1 = tcg_temp_new_i64(); \ - t2 = tcg_temp_new_i64(); \ - gen_reset_fpstatus(); \ - get_fpr(t0, rA(ctx->opcode)); \ - get_fpr(t1, rB(ctx->opcode)); \ - gen_helper_f##name(t2, tcg_env, t0, t1); \ - set_fpr(rD(ctx->opcode), t2); \ - if (set_fprf) { \ - gen_compute_fprf_float64(t2); \ - } \ - if (unlikely(Rc(ctx->opcode) != 0)) { \ - gen_set_cr1_from_fpscr(ctx); \ - } \ +static bool do_helper_ab(DisasContext *ctx, arg_A_tab *a, + void (*helper)(TCGv_i64, TCGv_ptr, TCGv_i64, + TCGv_i64)) +{ + TCGv_i64 t0, t1, t2; + REQUIRE_INSNS_FLAGS(ctx, FLOAT); + REQUIRE_FPU(ctx); + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + t2 = tcg_temp_new_i64(); + gen_reset_fpstatus(); + get_fpr(t0, a->fra); + get_fpr(t1, a->frb); + helper(t2, tcg_env, t0, t1); + set_fpr(a->frt, t2); + gen_compute_fprf_float64(t2); + if (unlikely(a->rc)) { + gen_set_cr1_from_fpscr(ctx); + } + return true; } -#define GEN_FLOAT_AB(name, op2, inval, set_fprf, type) \ -_GEN_FLOAT_AB(name, 0x3F, op2, inval, set_fprf, type); \ -_GEN_FLOAT_AB(name##s, 0x3B, op2, inval, set_fprf, type); -#define _GEN_FLOAT_AC(name, op1, op2, inval, set_fprf, type) \ -static void gen_f##name(DisasContext *ctx) \ -{ \ - TCGv_i64 t0; \ - TCGv_i64 t1; \ - TCGv_i64 t2; \ - if (unlikely(!ctx->fpu_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_FPU); \ - return; \ - } \ - t0 = tcg_temp_new_i64(); \ - t1 = tcg_temp_new_i64(); \ - t2 = tcg_temp_new_i64(); \ - gen_reset_fpstatus(); \ - get_fpr(t0, rA(ctx->opcode)); \ - get_fpr(t1, rC(ctx->opcode)); \ - gen_helper_f##name(t2, tcg_env, t0, t1); \ - set_fpr(rD(ctx->opcode), t2); \ - if (set_fprf) { \ - gen_compute_fprf_float64(t2); \ - } \ - if (unlikely(Rc(ctx->opcode) != 0)) { \ - gen_set_cr1_from_fpscr(ctx); \ - } \ +static bool do_helper_ac(DisasContext *ctx, arg_A_tac *a, + void (*helper)(TCGv_i64, TCGv_ptr, TCGv_i64, + TCGv_i64)) +{ + TCGv_i64 t0, t1, t2; + REQUIRE_INSNS_FLAGS(ctx, FLOAT); + REQUIRE_FPU(ctx); + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + t2 = tcg_temp_new_i64(); + gen_reset_fpstatus(); + get_fpr(t0, a->fra); + get_fpr(t1, a->frc); + helper(t2, tcg_env, t0, t1); + set_fpr(a->frt, t2); + gen_compute_fprf_float64(t2); + if (unlikely(a->rc)) { + gen_set_cr1_from_fpscr(ctx); + } + return true; } -#define GEN_FLOAT_AC(name, op2, inval, set_fprf, type) \ -_GEN_FLOAT_AC(name, 0x3F, op2, inval, set_fprf, type); \ -_GEN_FLOAT_AC(name##s, 0x3B, op2, inval, set_fprf, type); #define GEN_FLOAT_B(name, op2, op3, set_fprf, type) \ static void gen_f##name(DisasContext *ctx) \ @@ -145,64 +122,22 @@ static void gen_f##name(DisasContext *ctx) \ } \ } -#define GEN_FLOAT_BS(name, op1, op2, set_fprf, type) \ -static void gen_f##name(DisasContext *ctx) \ -{ \ - TCGv_i64 t0; \ - TCGv_i64 t1; \ - if (unlikely(!ctx->fpu_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_FPU); \ - return; \ - } \ - t0 = tcg_temp_new_i64(); \ - t1 = tcg_temp_new_i64(); \ - gen_reset_fpstatus(); \ - get_fpr(t0, rB(ctx->opcode)); \ - gen_helper_f##name(t1, tcg_env, t0); \ - set_fpr(rD(ctx->opcode), t1); \ - if (set_fprf) { \ - gen_compute_fprf_float64(t1); \ - } \ - if (unlikely(Rc(ctx->opcode) != 0)) { \ - gen_set_cr1_from_fpscr(ctx); \ - } \ -} - -/* fadd - fadds */ -GEN_FLOAT_AB(add, 0x15, 0x000007C0, 1, PPC_FLOAT); -/* fdiv - fdivs */ -GEN_FLOAT_AB(div, 0x12, 0x000007C0, 1, PPC_FLOAT); -/* fmul - fmuls */ -GEN_FLOAT_AC(mul, 0x19, 0x0000F800, 1, PPC_FLOAT); - -/* fre */ -GEN_FLOAT_BS(re, 0x3F, 0x18, 1, PPC_FLOAT_EXT); - -/* fres */ -GEN_FLOAT_BS(res, 0x3B, 0x18, 1, PPC_FLOAT_FRES); - -/* frsqrte */ -GEN_FLOAT_BS(rsqrte, 0x3F, 0x1A, 1, PPC_FLOAT_FRSQRTE); - -/* frsqrtes */ -static void gen_frsqrtes(DisasContext *ctx) +static bool do_helper_bs(DisasContext *ctx, arg_A_tb *a, + void (*helper)(TCGv_i64, TCGv_ptr, TCGv_i64)) { - TCGv_i64 t0; - TCGv_i64 t1; - if (unlikely(!ctx->fpu_enabled)) { - gen_exception(ctx, POWERPC_EXCP_FPU); - return; - } + TCGv_i64 t0, t1; + REQUIRE_FPU(ctx); t0 = tcg_temp_new_i64(); t1 = tcg_temp_new_i64(); gen_reset_fpstatus(); - get_fpr(t0, rB(ctx->opcode)); - gen_helper_frsqrtes(t1, tcg_env, t0); - set_fpr(rD(ctx->opcode), t1); + get_fpr(t0, a->frb); + helper(t1, tcg_env, t0); + set_fpr(a->frt, t1); gen_compute_fprf_float64(t1); - if (unlikely(Rc(ctx->opcode) != 0)) { + if (unlikely(a->rc)) { gen_set_cr1_from_fpscr(ctx); } + return true; } static bool trans_FSEL(DisasContext *ctx, arg_A *a) @@ -228,10 +163,6 @@ static bool trans_FSEL(DisasContext *ctx, arg_A *a) return true; } -/* fsub - fsubs */ -GEN_FLOAT_AB(sub, 0x14, 0x000007C0, 1, PPC_FLOAT); -/* Optional: */ - static bool do_helper_fsqrt(DisasContext *ctx, arg_A_tb *a, void (*helper)(TCGv_i64, TCGv_ptr, TCGv_i64)) { @@ -254,19 +185,33 @@ static bool do_helper_fsqrt(DisasContext *ctx, arg_A_tb *a, return true; } +TRANS(FADD, do_helper_ab, gen_helper_FADD); +TRANS(FADDS, do_helper_ab, gen_helper_FADDS); +TRANS(FSUB, do_helper_ab, gen_helper_FSUB); +TRANS(FSUBS, do_helper_ab, gen_helper_FSUBS); +TRANS(FDIV, do_helper_ab, gen_helper_FDIV); +TRANS(FDIVS, do_helper_ab, gen_helper_FDIVS); +TRANS(FMUL, do_helper_ac, gen_helper_FMUL); +TRANS(FMULS, do_helper_ac, gen_helper_FMULS); + +TRANS(FMADD, do_helper_acb, gen_helper_FMADD); +TRANS(FMADDS, do_helper_acb, gen_helper_FMADDS); +TRANS(FMSUB, do_helper_acb, gen_helper_FMSUB); +TRANS(FMSUBS, do_helper_acb, gen_helper_FMSUBS); + +TRANS(FNMADD, do_helper_acb, gen_helper_FNMADD); +TRANS(FNMADDS, do_helper_acb, gen_helper_FNMADDS); +TRANS(FNMSUB, do_helper_acb, gen_helper_FNMSUB); +TRANS(FNMSUBS, do_helper_acb, gen_helper_FNMSUBS); + +TRANS_FLAGS(FLOAT_EXT, FRE, do_helper_bs, gen_helper_FRE); +TRANS_FLAGS(FLOAT_FRES, FRES, do_helper_bs, gen_helper_FRES); +TRANS_FLAGS(FLOAT_FRSQRTE, FRSQRTE, do_helper_bs, gen_helper_FRSQRTE); +TRANS_FLAGS(FLOAT_FRSQRTES, FRSQRTES, do_helper_bs, gen_helper_FRSQRTES); + TRANS(FSQRT, do_helper_fsqrt, gen_helper_FSQRT); TRANS(FSQRTS, do_helper_fsqrt, gen_helper_FSQRTS); -/*** Floating-Point multiply-and-add ***/ -/* fmadd - fmadds */ -GEN_FLOAT_ACB(madd, 0x1D, 1, PPC_FLOAT); -/* fmsub - fmsubs */ -GEN_FLOAT_ACB(msub, 0x1C, 1, PPC_FLOAT); -/* fnmadd - fnmadds */ -GEN_FLOAT_ACB(nmadd, 0x1F, 1, PPC_FLOAT); -/* fnmsub - fnmsubs */ -GEN_FLOAT_ACB(nmsub, 0x1E, 1, PPC_FLOAT); - /*** Floating-Point round & convert ***/ /* fctiw */ GEN_FLOAT_B(ctiw, 0x0E, 0x00, 0, PPC_FLOAT); @@ -304,35 +249,30 @@ GEN_FLOAT_B(rip, 0x08, 0x0E, 1, PPC_FLOAT_EXT); /* frim */ GEN_FLOAT_B(rim, 0x08, 0x0F, 1, PPC_FLOAT_EXT); -static void gen_ftdiv(DisasContext *ctx) +static bool trans_FTDIV(DisasContext *ctx, arg_X_bf *a) { - TCGv_i64 t0; - TCGv_i64 t1; - if (unlikely(!ctx->fpu_enabled)) { - gen_exception(ctx, POWERPC_EXCP_FPU); - return; - } + TCGv_i64 t0, t1; + REQUIRE_INSNS_FLAGS2(ctx, FP_TST_ISA206); + REQUIRE_FPU(ctx); t0 = tcg_temp_new_i64(); t1 = tcg_temp_new_i64(); - get_fpr(t0, rA(ctx->opcode)); - get_fpr(t1, rB(ctx->opcode)); - gen_helper_ftdiv(cpu_crf[crfD(ctx->opcode)], t0, t1); + get_fpr(t0, a->ra); + get_fpr(t1, a->rb); + gen_helper_FTDIV(cpu_crf[a->bf], t0, t1); + return true; } -static void gen_ftsqrt(DisasContext *ctx) +static bool trans_FTSQRT(DisasContext *ctx, arg_X_bf_b *a) { TCGv_i64 t0; - if (unlikely(!ctx->fpu_enabled)) { - gen_exception(ctx, POWERPC_EXCP_FPU); - return; - } + REQUIRE_INSNS_FLAGS2(ctx, FP_TST_ISA206); + REQUIRE_FPU(ctx); t0 = tcg_temp_new_i64(); - get_fpr(t0, rB(ctx->opcode)); - gen_helper_ftsqrt(cpu_crf[crfD(ctx->opcode)], t0); + get_fpr(t0, a->rb); + gen_helper_FTSQRT(cpu_crf[a->bf], t0); + return true; } - - /*** Floating-Point compare ***/ /* fcmpo */ @@ -1111,14 +1051,7 @@ TRANS(STFDX, do_lsfp_X, false, true, false) TRANS(STFDUX, do_lsfp_X, true, true, false) TRANS(PSTFD, do_lsfp_PLS_D, false, true, false) -#undef _GEN_FLOAT_ACB -#undef GEN_FLOAT_ACB -#undef _GEN_FLOAT_AB -#undef GEN_FLOAT_AB -#undef _GEN_FLOAT_AC -#undef GEN_FLOAT_AC #undef GEN_FLOAT_B -#undef GEN_FLOAT_BS #undef GEN_LDF #undef GEN_LDUF diff --git a/target/ppc/translate/fp-ops.c.inc b/target/ppc/translate/fp-ops.c.inc index d4c6c4bed1..cef4b5dfcb 100644 --- a/target/ppc/translate/fp-ops.c.inc +++ b/target/ppc/translate/fp-ops.c.inc @@ -1,36 +1,6 @@ -#define _GEN_FLOAT_ACB(name, op, op1, op2, isfloat, set_fprf, type) \ -GEN_HANDLER(f##name, op1, op2, 0xFF, 0x00000000, type) -#define GEN_FLOAT_ACB(name, op2, set_fprf, type) \ -_GEN_FLOAT_ACB(name, name, 0x3F, op2, 0, set_fprf, type), \ -_GEN_FLOAT_ACB(name##s, name, 0x3B, op2, 1, set_fprf, type) -#define _GEN_FLOAT_AB(name, op, op1, op2, inval, isfloat, set_fprf, type) \ -GEN_HANDLER(f##name, op1, op2, 0xFF, inval, type) -#define GEN_FLOAT_AB(name, op2, inval, set_fprf, type) \ -_GEN_FLOAT_AB(name, name, 0x3F, op2, inval, 0, set_fprf, type), \ -_GEN_FLOAT_AB(name##s, name, 0x3B, op2, inval, 1, set_fprf, type) -#define _GEN_FLOAT_AC(name, op, op1, op2, inval, isfloat, set_fprf, type) \ -GEN_HANDLER(f##name, op1, op2, 0xFF, inval, type) -#define GEN_FLOAT_AC(name, op2, inval, set_fprf, type) \ -_GEN_FLOAT_AC(name, name, 0x3F, op2, inval, 0, set_fprf, type), \ -_GEN_FLOAT_AC(name##s, name, 0x3B, op2, inval, 1, set_fprf, type) #define GEN_FLOAT_B(name, op2, op3, set_fprf, type) \ GEN_HANDLER(f##name, 0x3F, op2, op3, 0x001F0000, type) -#define GEN_FLOAT_BS(name, op1, op2, set_fprf, type) \ -GEN_HANDLER(f##name, op1, op2, 0xFF, 0x001F07C0, type) -GEN_FLOAT_AB(add, 0x15, 0x000007C0, 1, PPC_FLOAT), -GEN_FLOAT_AB(div, 0x12, 0x000007C0, 1, PPC_FLOAT), -GEN_FLOAT_AC(mul, 0x19, 0x0000F800, 1, PPC_FLOAT), -GEN_FLOAT_BS(re, 0x3F, 0x18, 1, PPC_FLOAT_EXT), -GEN_FLOAT_BS(res, 0x3B, 0x18, 1, PPC_FLOAT_FRES), -GEN_FLOAT_BS(rsqrte, 0x3F, 0x1A, 1, PPC_FLOAT_FRSQRTE), -GEN_FLOAT_AB(sub, 0x14, 0x000007C0, 1, PPC_FLOAT), -GEN_FLOAT_ACB(madd, 0x1D, 1, PPC_FLOAT), -GEN_FLOAT_ACB(msub, 0x1C, 1, PPC_FLOAT), -GEN_FLOAT_ACB(nmadd, 0x1F, 1, PPC_FLOAT), -GEN_FLOAT_ACB(nmsub, 0x1E, 1, PPC_FLOAT), -GEN_HANDLER_E(ftdiv, 0x3F, 0x00, 0x04, 1, PPC_NONE, PPC2_FP_TST_ISA206), -GEN_HANDLER_E(ftsqrt, 0x3F, 0x00, 0x05, 1, PPC_NONE, PPC2_FP_TST_ISA206), GEN_FLOAT_B(ctiw, 0x0E, 0x00, 0, PPC_FLOAT), GEN_HANDLER_E(fctiwu, 0x3F, 0x0E, 0x04, 0, PPC_NONE, PPC2_FP_CVT_ISA206), GEN_FLOAT_B(ctiwz, 0x0F, 0x00, 0, PPC_FLOAT), @@ -61,7 +31,6 @@ GEN_STXF(stfiw, st32fiw, 0x17, 0x1E, PPC_FLOAT_STFIWX) GEN_HANDLER_E(stfdepx, 0x1F, 0x1F, 0x16, 0x00000001, PPC_NONE, PPC2_BOOKE206), GEN_HANDLER_E(stfdpx, 0x1F, 0x17, 0x1C, 0x00200001, PPC_NONE, PPC2_ISA205), -GEN_HANDLER(frsqrtes, 0x3B, 0x1A, 0xFF, 0x001F07C0, PPC_FLOAT_FRSQRTES), GEN_HANDLER(fcmpo, 0x3F, 0x00, 0x01, 0x00600001, PPC_FLOAT), GEN_HANDLER(fcmpu, 0x3F, 0x00, 0x00, 0x00600001, PPC_FLOAT), GEN_HANDLER(fabs, 0x3F, 0x08, 0x08, 0x001F0000, PPC_FLOAT), diff --git a/target/ppc/translate/misc-impl.c.inc b/target/ppc/translate/misc-impl.c.inc new file mode 100644 index 0000000000..cbf82b1ea0 --- /dev/null +++ b/target/ppc/translate/misc-impl.c.inc @@ -0,0 +1,157 @@ +/* + * Power ISA decode for misc instructions + * + * Copyright (c) 2024, IBM Corporation. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Memory Barrier Instructions + */ + +static bool trans_SYNC(DisasContext *ctx, arg_X_sync *a) +{ + TCGBar bar = TCG_MO_ALL; + uint32_t l = a->l; + uint32_t sc = a->sc; + + /* + * BookE uses the msync mnemonic. This means hwsync, except in the + * 440, where it an execution serialisation point that requires all + * previous storage accesses to have been performed to memory (which + * doesn't matter for TCG). + */ + if (!(ctx->insns_flags & PPC_MEM_SYNC)) { + if (ctx->insns_flags & PPC_BOOKE) { + tcg_gen_mb(bar | TCG_BAR_SC); + return true; + } + + return false; + } + + /* + * In ISA v3.1, the L field grew one bit. Mask that out to ignore it in + * older processors. It also added the SC field, zero this to ignore + * it too. + */ + if (!(ctx->insns_flags2 & PPC2_ISA310)) { + l &= 0x3; + sc = 0; + } + + if (sc) { + /* Store syncs [stsync, stcisync, stncisync]. These ignore L. */ + bar = TCG_MO_ST_ST; + } else { + if (((l == 1) && (ctx->insns_flags2 & PPC2_MEM_LWSYNC)) || (l == 5)) { + /* lwsync, or plwsync on POWER10 and later */ + bar = TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST; + } + + /* + * We may need to check for a pending TLB flush. + * + * We do this on ptesync (l == 2) on ppc64 and any sync on ppc32. + * + * Additionally, this can only happen in kernel mode however so + * check MSR_PR as well. + */ + if (((l == 2) || !(ctx->insns_flags & PPC_64B)) && !ctx->pr) { + gen_check_tlb_flush(ctx, true); + } + } + + tcg_gen_mb(bar | TCG_BAR_SC); + + return true; +} + +static bool trans_EIEIO(DisasContext *ctx, arg_EIEIO *a) +{ + TCGBar bar = TCG_MO_ALL; + + /* + * BookE uses the mbar instruction instead of eieio, which is basically + * full hwsync memory barrier, but is not execution synchronising. For + * the purpose of TCG the distinction is not relevant. + */ + if (!(ctx->insns_flags & PPC_MEM_EIEIO)) { + if ((ctx->insns_flags & PPC_BOOKE) || + (ctx->insns_flags2 & PPC2_BOOKE206)) { + tcg_gen_mb(bar | TCG_BAR_SC); + return true; + } + return false; + } + + /* + * eieio has complex semanitcs. It provides memory ordering between + * operations in the set: + * - loads from CI memory. + * - stores to CI memory. + * - stores to WT memory. + * + * It separately also orders memory for operations in the set: + * - stores to cacheble memory. + * + * It also serializes instructions: + * - dcbt and dcbst. + * + * It separately serializes: + * - tlbie and tlbsync. + * + * And separately serializes: + * - slbieg, slbiag, and slbsync. + * + * The end result is that CI memory ordering requires TCG_MO_ALL + * and it is not possible to special-case more relaxed ordering for + * cacheable accesses. TCG_BAR_SC is required to provide this + * serialization. + */ + + /* + * POWER9 has a eieio instruction variant using bit 6 as a hint to + * tell the CPU it is a store-forwarding barrier. + */ + if (ctx->opcode & 0x2000000) { + /* + * ISA says that "Reserved fields in instructions are ignored + * by the processor". So ignore the bit 6 on non-POWER9 CPU but + * as this is not an instruction software should be using, + * complain to the user. + */ + if (!(ctx->insns_flags2 & PPC2_ISA300)) { + qemu_log_mask(LOG_GUEST_ERROR, "invalid eieio using bit 6 at @" + TARGET_FMT_lx "\n", ctx->cia); + } else { + bar = TCG_MO_ST_LD; + } + } + + tcg_gen_mb(bar | TCG_BAR_SC); + + return true; +} + +static bool trans_ATTN(DisasContext *ctx, arg_ATTN *a) +{ +#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY) + gen_helper_attn(tcg_env); + return true; +#else + return false; +#endif +} diff --git a/target/ppc/translate/processor-ctrl-impl.c.inc b/target/ppc/translate/processor-ctrl-impl.c.inc index 0142801985..8abbb89630 100644 --- a/target/ppc/translate/processor-ctrl-impl.c.inc +++ b/target/ppc/translate/processor-ctrl-impl.c.inc @@ -59,7 +59,7 @@ static bool trans_MSGSND(DisasContext *ctx, arg_X_rb *a) #if !defined(CONFIG_USER_ONLY) if (is_book3s_arch2x(ctx)) { - gen_helper_book3s_msgsnd(cpu_gpr[a->rb]); + gen_helper_book3s_msgsnd(tcg_env, cpu_gpr[a->rb]); } else { gen_helper_msgsnd(cpu_gpr[a->rb]); } diff --git a/target/ppc/translate/storage-ctrl-impl.c.inc b/target/ppc/translate/storage-ctrl-impl.c.inc index 74c23a4191..b8b4454663 100644 --- a/target/ppc/translate/storage-ctrl-impl.c.inc +++ b/target/ppc/translate/storage-ctrl-impl.c.inc @@ -224,6 +224,13 @@ static bool do_tlbie(DisasContext *ctx, arg_X_tlbie *a, bool local) a->prs << TLBIE_F_PRS_SHIFT | a->r << TLBIE_F_R_SHIFT | local << TLBIE_F_LOCAL_SHIFT)); + if (!local) { + /* + * Global TLB flush uses async-work which must run before the + * next instruction, so this must be the last in the TB. + */ + ctx->base.is_jmp = DISAS_EXIT_UPDATE; + } return true; #endif diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc index b56e615c24..8084af75cc 100644 --- a/target/ppc/translate/vmx-impl.c.inc +++ b/target/ppc/translate/vmx-impl.c.inc @@ -14,125 +14,88 @@ static inline TCGv_ptr gen_avr_ptr(int reg) return r; } -#define GEN_VR_LDX(name, opc2, opc3) \ -static void glue(gen_, name)(DisasContext *ctx) \ -{ \ - TCGv EA; \ - TCGv_i64 avr; \ - if (unlikely(!ctx->altivec_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_VPU); \ - return; \ - } \ - gen_set_access_type(ctx, ACCESS_INT); \ - avr = tcg_temp_new_i64(); \ - EA = tcg_temp_new(); \ - gen_addr_reg_index(ctx, EA); \ - tcg_gen_andi_tl(EA, EA, ~0xf); \ - /* \ - * We only need to swap high and low halves. gen_qemu_ld64_i64 \ - * does necessary 64-bit byteswap already. \ - */ \ - if (ctx->le_mode) { \ - gen_qemu_ld64_i64(ctx, avr, EA); \ - set_avr64(rD(ctx->opcode), avr, false); \ - tcg_gen_addi_tl(EA, EA, 8); \ - gen_qemu_ld64_i64(ctx, avr, EA); \ - set_avr64(rD(ctx->opcode), avr, true); \ - } else { \ - gen_qemu_ld64_i64(ctx, avr, EA); \ - set_avr64(rD(ctx->opcode), avr, true); \ - tcg_gen_addi_tl(EA, EA, 8); \ - gen_qemu_ld64_i64(ctx, avr, EA); \ - set_avr64(rD(ctx->opcode), avr, false); \ - } \ -} - -#define GEN_VR_STX(name, opc2, opc3) \ -static void gen_st##name(DisasContext *ctx) \ -{ \ - TCGv EA; \ - TCGv_i64 avr; \ - if (unlikely(!ctx->altivec_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_VPU); \ - return; \ - } \ - gen_set_access_type(ctx, ACCESS_INT); \ - avr = tcg_temp_new_i64(); \ - EA = tcg_temp_new(); \ - gen_addr_reg_index(ctx, EA); \ - tcg_gen_andi_tl(EA, EA, ~0xf); \ - /* \ - * We only need to swap high and low halves. gen_qemu_st64_i64 \ - * does necessary 64-bit byteswap already. \ - */ \ - if (ctx->le_mode) { \ - get_avr64(avr, rD(ctx->opcode), false); \ - gen_qemu_st64_i64(ctx, avr, EA); \ - tcg_gen_addi_tl(EA, EA, 8); \ - get_avr64(avr, rD(ctx->opcode), true); \ - gen_qemu_st64_i64(ctx, avr, EA); \ - } else { \ - get_avr64(avr, rD(ctx->opcode), true); \ - gen_qemu_st64_i64(ctx, avr, EA); \ - tcg_gen_addi_tl(EA, EA, 8); \ - get_avr64(avr, rD(ctx->opcode), false); \ - gen_qemu_st64_i64(ctx, avr, EA); \ - } \ -} - -#define GEN_VR_LVE(name, opc2, opc3, size) \ -static void gen_lve##name(DisasContext *ctx) \ - { \ - TCGv EA; \ - TCGv_ptr rs; \ - if (unlikely(!ctx->altivec_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_VPU); \ - return; \ - } \ - gen_set_access_type(ctx, ACCESS_INT); \ - EA = tcg_temp_new(); \ - gen_addr_reg_index(ctx, EA); \ - if (size > 1) { \ - tcg_gen_andi_tl(EA, EA, ~(size - 1)); \ - } \ - rs = gen_avr_ptr(rS(ctx->opcode)); \ - gen_helper_lve##name(tcg_env, rs, EA); \ - } - -#define GEN_VR_STVE(name, opc2, opc3, size) \ -static void gen_stve##name(DisasContext *ctx) \ - { \ - TCGv EA; \ - TCGv_ptr rs; \ - if (unlikely(!ctx->altivec_enabled)) { \ - gen_exception(ctx, POWERPC_EXCP_VPU); \ - return; \ - } \ - gen_set_access_type(ctx, ACCESS_INT); \ - EA = tcg_temp_new(); \ - gen_addr_reg_index(ctx, EA); \ - if (size > 1) { \ - tcg_gen_andi_tl(EA, EA, ~(size - 1)); \ - } \ - rs = gen_avr_ptr(rS(ctx->opcode)); \ - gen_helper_stve##name(tcg_env, rs, EA); \ - } +static bool trans_LVX(DisasContext *ctx, arg_X *a) +{ + TCGv EA; + TCGv_i64 avr; + REQUIRE_INSNS_FLAGS(ctx, ALTIVEC); + REQUIRE_VECTOR(ctx); + gen_set_access_type(ctx, ACCESS_INT); + avr = tcg_temp_new_i64(); + EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); + tcg_gen_andi_tl(EA, EA, ~0xf); + /* + * We only need to swap high and low halves. gen_qemu_ld64_i64 + * does necessary 64-bit byteswap already. + */ + gen_qemu_ld64_i64(ctx, avr, EA); + set_avr64(a->rt, avr, !ctx->le_mode); + tcg_gen_addi_tl(EA, EA, 8); + gen_qemu_ld64_i64(ctx, avr, EA); + set_avr64(a->rt, avr, ctx->le_mode); + return true; +} -GEN_VR_LDX(lvx, 0x07, 0x03); /* As we don't emulate the cache, lvxl is strictly equivalent to lvx */ -GEN_VR_LDX(lvxl, 0x07, 0x0B); +QEMU_FLATTEN +static bool trans_LVXL(DisasContext *ctx, arg_LVXL *a) +{ + return trans_LVX(ctx, a); +} -GEN_VR_LVE(bx, 0x07, 0x00, 1); -GEN_VR_LVE(hx, 0x07, 0x01, 2); -GEN_VR_LVE(wx, 0x07, 0x02, 4); +static bool trans_STVX(DisasContext *ctx, arg_STVX *a) +{ + TCGv EA; + TCGv_i64 avr; + REQUIRE_INSNS_FLAGS(ctx, ALTIVEC); + REQUIRE_VECTOR(ctx); + gen_set_access_type(ctx, ACCESS_INT); + avr = tcg_temp_new_i64(); + EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); + tcg_gen_andi_tl(EA, EA, ~0xf); + /* + * We only need to swap high and low halves. gen_qemu_st64_i64 + * does necessary 64-bit byteswap already. + */ + get_avr64(avr, a->rt, !ctx->le_mode); + gen_qemu_st64_i64(ctx, avr, EA); + tcg_gen_addi_tl(EA, EA, 8); + get_avr64(avr, a->rt, ctx->le_mode); + gen_qemu_st64_i64(ctx, avr, EA); + return true; +} -GEN_VR_STX(svx, 0x07, 0x07); /* As we don't emulate the cache, stvxl is strictly equivalent to stvx */ -GEN_VR_STX(svxl, 0x07, 0x0F); +QEMU_FLATTEN +static bool trans_STVXL(DisasContext *ctx, arg_STVXL *a) +{ + return trans_STVX(ctx, a); +} + +static bool do_ldst_ve_X(DisasContext *ctx, arg_X *a, int size, + void (*helper)(TCGv_env, TCGv_ptr, TCGv)) +{ + TCGv EA; + TCGv_ptr vrt; + REQUIRE_INSNS_FLAGS(ctx, ALTIVEC); + REQUIRE_VECTOR(ctx); + gen_set_access_type(ctx, ACCESS_INT); + EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); + if (size > 1) { + tcg_gen_andi_tl(EA, EA, ~(size - 1)); + } + vrt = gen_avr_ptr(a->rt); + helper(tcg_env, vrt, EA); + return true; +} -GEN_VR_STVE(bx, 0x07, 0x04, 1); -GEN_VR_STVE(hx, 0x07, 0x05, 2); -GEN_VR_STVE(wx, 0x07, 0x06, 4); +TRANS(LVEBX, do_ldst_ve_X, 1, gen_helper_LVEBX); +TRANS(LVEHX, do_ldst_ve_X, 2, gen_helper_LVEHX); +TRANS(LVEWX, do_ldst_ve_X, 4, gen_helper_LVEWX); + +TRANS(STVEBX, do_ldst_ve_X, 1, gen_helper_STVEBX); +TRANS(STVEHX, do_ldst_ve_X, 2, gen_helper_STVEHX); +TRANS(STVEWX, do_ldst_ve_X, 4, gen_helper_STVEWX); static void gen_mfvscr(DisasContext *ctx) { @@ -242,16 +205,6 @@ static void glue(gen_, name)(DisasContext *ctx) \ 16, 16); \ } -/* Logical operations */ -GEN_VXFORM_V(vand, MO_64, tcg_gen_gvec_and, 2, 16); -GEN_VXFORM_V(vandc, MO_64, tcg_gen_gvec_andc, 2, 17); -GEN_VXFORM_V(vor, MO_64, tcg_gen_gvec_or, 2, 18); -GEN_VXFORM_V(vxor, MO_64, tcg_gen_gvec_xor, 2, 19); -GEN_VXFORM_V(vnor, MO_64, tcg_gen_gvec_nor, 2, 20); -GEN_VXFORM_V(veqv, MO_64, tcg_gen_gvec_eqv, 2, 26); -GEN_VXFORM_V(vnand, MO_64, tcg_gen_gvec_nand, 2, 22); -GEN_VXFORM_V(vorc, MO_64, tcg_gen_gvec_orc, 2, 21); - #define GEN_VXFORM(name, opc2, opc3) \ static void glue(gen_, name)(DisasContext *ctx) \ { \ @@ -389,22 +342,6 @@ GEN_VXFORM_V(vsububm, MO_8, tcg_gen_gvec_sub, 0, 16); GEN_VXFORM_V(vsubuhm, MO_16, tcg_gen_gvec_sub, 0, 17); GEN_VXFORM_V(vsubuwm, MO_32, tcg_gen_gvec_sub, 0, 18); GEN_VXFORM_V(vsubudm, MO_64, tcg_gen_gvec_sub, 0, 19); -GEN_VXFORM_V(vmaxub, MO_8, tcg_gen_gvec_umax, 1, 0); -GEN_VXFORM_V(vmaxuh, MO_16, tcg_gen_gvec_umax, 1, 1); -GEN_VXFORM_V(vmaxuw, MO_32, tcg_gen_gvec_umax, 1, 2); -GEN_VXFORM_V(vmaxud, MO_64, tcg_gen_gvec_umax, 1, 3); -GEN_VXFORM_V(vmaxsb, MO_8, tcg_gen_gvec_smax, 1, 4); -GEN_VXFORM_V(vmaxsh, MO_16, tcg_gen_gvec_smax, 1, 5); -GEN_VXFORM_V(vmaxsw, MO_32, tcg_gen_gvec_smax, 1, 6); -GEN_VXFORM_V(vmaxsd, MO_64, tcg_gen_gvec_smax, 1, 7); -GEN_VXFORM_V(vminub, MO_8, tcg_gen_gvec_umin, 1, 8); -GEN_VXFORM_V(vminuh, MO_16, tcg_gen_gvec_umin, 1, 9); -GEN_VXFORM_V(vminuw, MO_32, tcg_gen_gvec_umin, 1, 10); -GEN_VXFORM_V(vminud, MO_64, tcg_gen_gvec_umin, 1, 11); -GEN_VXFORM_V(vminsb, MO_8, tcg_gen_gvec_smin, 1, 12); -GEN_VXFORM_V(vminsh, MO_16, tcg_gen_gvec_smin, 1, 13); -GEN_VXFORM_V(vminsw, MO_32, tcg_gen_gvec_smin, 1, 14); -GEN_VXFORM_V(vminsd, MO_64, tcg_gen_gvec_smin, 1, 15); GEN_VXFORM(vmrghb, 6, 0); GEN_VXFORM(vmrghh, 6, 1); GEN_VXFORM(vmrghw, 6, 2); @@ -460,15 +397,17 @@ static void trans_vmrgow(DisasContext *ctx) * Let X be the 32-byte value 0x00 || 0x01 || 0x02 || ... || 0x1E || 0x1F. * Bytes sh:sh+15 of X are placed into vD. */ -static void trans_lvsl(DisasContext *ctx) +static bool trans_LVSL(DisasContext *ctx, arg_LVSL *a) { - int VT = rD(ctx->opcode); TCGv_i64 result = tcg_temp_new_i64(); TCGv_i64 sh = tcg_temp_new_i64(); TCGv EA = tcg_temp_new(); + REQUIRE_INSNS_FLAGS(ctx, ALTIVEC); + REQUIRE_VECTOR(ctx); + /* Get sh(from description) by anding EA with 0xf. */ - gen_addr_reg_index(ctx, EA); + EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); tcg_gen_extu_tl_i64(sh, EA); tcg_gen_andi_i64(sh, sh, 0xfULL); @@ -478,13 +417,14 @@ static void trans_lvsl(DisasContext *ctx) */ tcg_gen_muli_i64(sh, sh, 0x0101010101010101ULL); tcg_gen_addi_i64(result, sh, 0x0001020304050607ull); - set_avr64(VT, result, true); + set_avr64(a->rt, result, true); /* * Create bytes sh+8:sh+15 of X(from description) and place them in * lower doubleword of vD. */ tcg_gen_addi_i64(result, sh, 0x08090a0b0c0d0e0fULL); - set_avr64(VT, result, false); + set_avr64(a->rt, result, false); + return true; } /* @@ -494,16 +434,17 @@ static void trans_lvsl(DisasContext *ctx) * Let X be the 32-byte value 0x00 || 0x01 || 0x02 || ... || 0x1E || 0x1F. * Bytes (16-sh):(31-sh) of X are placed into vD. */ -static void trans_lvsr(DisasContext *ctx) +static bool trans_LVSR(DisasContext *ctx, arg_LVSR *a) { - int VT = rD(ctx->opcode); TCGv_i64 result = tcg_temp_new_i64(); TCGv_i64 sh = tcg_temp_new_i64(); TCGv EA = tcg_temp_new(); + REQUIRE_INSNS_FLAGS(ctx, ALTIVEC); + REQUIRE_VECTOR(ctx); /* Get sh(from description) by anding EA with 0xf. */ - gen_addr_reg_index(ctx, EA); + EA = do_ea_calc(ctx, a->ra, cpu_gpr[a->rb]); tcg_gen_extu_tl_i64(sh, EA); tcg_gen_andi_i64(sh, sh, 0xfULL); @@ -513,13 +454,14 @@ static void trans_lvsr(DisasContext *ctx) */ tcg_gen_muli_i64(sh, sh, 0x0101010101010101ULL); tcg_gen_subfi_i64(result, 0x1011121314151617ULL, sh); - set_avr64(VT, result, true); + set_avr64(a->rt, result, true); /* * Create bytes (24-sh):(32-sh) of X(from description) and place them in * lower doubleword of vD. */ tcg_gen_subfi_i64(result, 0x18191a1b1c1d1e1fULL, sh); - set_avr64(VT, result, false); + set_avr64(a->rt, result, false); + return true; } /* @@ -759,6 +701,37 @@ TRANS_FLAGS(ALTIVEC, VRLH, do_vector_gvec3_VX, MO_16, tcg_gen_gvec_rotlv) TRANS_FLAGS(ALTIVEC, VRLW, do_vector_gvec3_VX, MO_32, tcg_gen_gvec_rotlv) TRANS_FLAGS2(ALTIVEC_207, VRLD, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_rotlv) +/* Logical operations */ +TRANS_FLAGS(ALTIVEC, VAND, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_and); +TRANS_FLAGS(ALTIVEC, VANDC, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_andc); +TRANS_FLAGS(ALTIVEC, VOR, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_or); +TRANS_FLAGS(ALTIVEC, VXOR, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_xor); +TRANS_FLAGS(ALTIVEC, VNOR, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_nor); +TRANS_FLAGS2(ALTIVEC_207, VEQV, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_eqv); +TRANS_FLAGS2(ALTIVEC_207, VNAND, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_nand); +TRANS_FLAGS2(ALTIVEC_207, VORC, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_orc); + +/* Integer Max/Min operations */ +TRANS_FLAGS(ALTIVEC, VMAXUB, do_vector_gvec3_VX, MO_8, tcg_gen_gvec_umax); +TRANS_FLAGS(ALTIVEC, VMAXUH, do_vector_gvec3_VX, MO_16, tcg_gen_gvec_umax); +TRANS_FLAGS(ALTIVEC, VMAXUW, do_vector_gvec3_VX, MO_32, tcg_gen_gvec_umax); +TRANS_FLAGS2(ALTIVEC_207, VMAXUD, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_umax); + +TRANS_FLAGS(ALTIVEC, VMAXSB, do_vector_gvec3_VX, MO_8, tcg_gen_gvec_smax); +TRANS_FLAGS(ALTIVEC, VMAXSH, do_vector_gvec3_VX, MO_16, tcg_gen_gvec_smax); +TRANS_FLAGS(ALTIVEC, VMAXSW, do_vector_gvec3_VX, MO_32, tcg_gen_gvec_smax); +TRANS_FLAGS2(ALTIVEC_207, VMAXSD, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_smax); + +TRANS_FLAGS(ALTIVEC, VMINUB, do_vector_gvec3_VX, MO_8, tcg_gen_gvec_umin); +TRANS_FLAGS(ALTIVEC, VMINUH, do_vector_gvec3_VX, MO_16, tcg_gen_gvec_umin); +TRANS_FLAGS(ALTIVEC, VMINUW, do_vector_gvec3_VX, MO_32, tcg_gen_gvec_umin); +TRANS_FLAGS2(ALTIVEC_207, VMINUD, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_umin); + +TRANS_FLAGS(ALTIVEC, VMINSB, do_vector_gvec3_VX, MO_8, tcg_gen_gvec_smin); +TRANS_FLAGS(ALTIVEC, VMINSH, do_vector_gvec3_VX, MO_16, tcg_gen_gvec_smin); +TRANS_FLAGS(ALTIVEC, VMINSW, do_vector_gvec3_VX, MO_32, tcg_gen_gvec_smin); +TRANS_FLAGS2(ALTIVEC_207, VMINSD, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_smin); + static TCGv_vec do_vrl_mask_vec(unsigned vece, TCGv_vec vrb) { TCGv_vec t0 = tcg_temp_new_vec_matching(vrb), @@ -1158,8 +1131,6 @@ GEN_VXFORM_TRANS_DUAL(vmrgow, PPC_NONE, PPC2_ALTIVEC_207, GEN_VXFORM_HETRO(vextubrx, 6, 28) GEN_VXFORM_HETRO(vextuhrx, 6, 29) GEN_VXFORM_HETRO(vextuwrx, 6, 30) -GEN_VXFORM_TRANS(lvsl, 6, 31) -GEN_VXFORM_TRANS(lvsr, 6, 32) GEN_VXFORM_TRANS_DUAL(vmrgew, PPC_NONE, PPC2_ALTIVEC_207, vextuwrx, PPC_NONE, PPC2_ISA300) @@ -3365,13 +3336,6 @@ TRANS_FLAGS2(ISA310, VMODUQ, do_vx_helper, gen_helper_VMODUQ) #undef DIVS64 #undef DIVU64 -#undef GEN_VR_LDX -#undef GEN_VR_STX -#undef GEN_VR_LVE -#undef GEN_VR_STVE - -#undef GEN_VX_LOGICAL -#undef GEN_VX_LOGICAL_207 #undef GEN_VXFORM #undef GEN_VXFORM_207 #undef GEN_VXFORM_DUAL diff --git a/target/ppc/translate/vmx-ops.c.inc b/target/ppc/translate/vmx-ops.c.inc index 33fec8aca4..7bb11b0549 100644 --- a/target/ppc/translate/vmx-ops.c.inc +++ b/target/ppc/translate/vmx-ops.c.inc @@ -1,37 +1,3 @@ -#define GEN_VR_LDX(name, opc2, opc3) \ -GEN_HANDLER(name, 0x1F, opc2, opc3, 0x00000001, PPC_ALTIVEC) -#define GEN_VR_STX(name, opc2, opc3) \ -GEN_HANDLER(st##name, 0x1F, opc2, opc3, 0x00000001, PPC_ALTIVEC) -#define GEN_VR_LVE(name, opc2, opc3) \ - GEN_HANDLER(lve##name, 0x1F, opc2, opc3, 0x00000001, PPC_ALTIVEC) -#define GEN_VR_STVE(name, opc2, opc3) \ - GEN_HANDLER(stve##name, 0x1F, opc2, opc3, 0x00000001, PPC_ALTIVEC) -GEN_VR_LDX(lvx, 0x07, 0x03), -GEN_VR_LDX(lvxl, 0x07, 0x0B), -GEN_VR_LVE(bx, 0x07, 0x00), -GEN_VR_LVE(hx, 0x07, 0x01), -GEN_VR_LVE(wx, 0x07, 0x02), -GEN_VR_STX(svx, 0x07, 0x07), -GEN_VR_STX(svxl, 0x07, 0x0F), -GEN_VR_STVE(bx, 0x07, 0x04), -GEN_VR_STVE(hx, 0x07, 0x05), -GEN_VR_STVE(wx, 0x07, 0x06), - -#define GEN_VX_LOGICAL(name, tcg_op, opc2, opc3) \ -GEN_HANDLER(name, 0x04, opc2, opc3, 0x00000000, PPC_ALTIVEC) - -#define GEN_VX_LOGICAL_207(name, tcg_op, opc2, opc3) \ -GEN_HANDLER_E(name, 0x04, opc2, opc3, 0x00000000, PPC_NONE, PPC2_ALTIVEC_207) - -GEN_VX_LOGICAL(vand, tcg_gen_and_i64, 2, 16), -GEN_VX_LOGICAL(vandc, tcg_gen_andc_i64, 2, 17), -GEN_VX_LOGICAL(vor, tcg_gen_or_i64, 2, 18), -GEN_VX_LOGICAL(vxor, tcg_gen_xor_i64, 2, 19), -GEN_VX_LOGICAL(vnor, tcg_gen_nor_i64, 2, 20), -GEN_VX_LOGICAL_207(veqv, tcg_gen_eqv_i64, 2, 26), -GEN_VX_LOGICAL_207(vnand, tcg_gen_nand_i64, 2, 22), -GEN_VX_LOGICAL_207(vorc, tcg_gen_orc_i64, 2, 21), - #define GEN_VXFORM(name, opc2, opc3) \ GEN_HANDLER(name, 0x04, opc2, opc3, 0x00000000, PPC_ALTIVEC) @@ -67,22 +33,6 @@ GEN_VXFORM_DUAL(vsubuhm, bcdsub, 0, 17, PPC_ALTIVEC, PPC_NONE), GEN_VXFORM_DUAL(vsubuwm, bcdus, 0, 18, PPC_ALTIVEC, PPC2_ISA300), GEN_VXFORM_DUAL(vsubudm, bcds, 0, 19, PPC2_ALTIVEC_207, PPC2_ISA300), GEN_VXFORM_300(bcds, 0, 27), -GEN_VXFORM(vmaxub, 1, 0), -GEN_VXFORM(vmaxuh, 1, 1), -GEN_VXFORM(vmaxuw, 1, 2), -GEN_VXFORM_207(vmaxud, 1, 3), -GEN_VXFORM(vmaxsb, 1, 4), -GEN_VXFORM(vmaxsh, 1, 5), -GEN_VXFORM(vmaxsw, 1, 6), -GEN_VXFORM_207(vmaxsd, 1, 7), -GEN_VXFORM(vminub, 1, 8), -GEN_VXFORM(vminuh, 1, 9), -GEN_VXFORM(vminuw, 1, 10), -GEN_VXFORM_207(vminud, 1, 11), -GEN_VXFORM(vminsb, 1, 12), -GEN_VXFORM(vminsh, 1, 13), -GEN_VXFORM(vminsw, 1, 14), -GEN_VXFORM_207(vminsd, 1, 15), GEN_VXFORM(vmrghb, 6, 0), GEN_VXFORM(vmrghh, 6, 1), GEN_VXFORM(vmrghw, 6, 2), diff --git a/target/riscv/Kconfig b/target/riscv/Kconfig index adb7de3f37..5f30df22f2 100644 --- a/target/riscv/Kconfig +++ b/target/riscv/Kconfig @@ -1,7 +1,9 @@ config RISCV32 bool select ARM_COMPATIBLE_SEMIHOSTING # for do_common_semihosting() + select DEVICE_TREE # needed by boot.c config RISCV64 bool select ARM_COMPATIBLE_SEMIHOSTING # for do_common_semihosting() + select DEVICE_TREE # needed by boot.c diff --git a/target/riscv/cpu-param.h b/target/riscv/cpu-param.h index b2a9396dec..1fbd64939d 100644 --- a/target/riscv/cpu-param.h +++ b/target/riscv/cpu-param.h @@ -28,4 +28,6 @@ * - M mode HLV/HLVX/HSV 0b111 */ +#define TCG_GUEST_DEFAULT_MO 0 + #endif diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index 3b1a02b944..2d0c02c35b 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@ -43,8 +43,6 @@ typedef struct CPUArchState CPURISCVState; # define TYPE_RISCV_CPU_BASE TYPE_RISCV_CPU_BASE64 #endif -#define TCG_GUEST_DEFAULT_MO 0 - /* * RISC-V-specific extra insn start words: * 1: Original instruction opcode diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c index fc090d729a..8ad546a45a 100644 --- a/target/riscv/cpu_helper.c +++ b/target/riscv/cpu_helper.c @@ -24,6 +24,7 @@ #include "internals.h" #include "pmu.h" #include "exec/exec-all.h" +#include "exec/page-protection.h" #include "instmap.h" #include "tcg/tcg-op.h" #include "trace.h" diff --git a/target/riscv/debug.h b/target/riscv/debug.h index 5794aa6ee5..c347863578 100644 --- a/target/riscv/debug.h +++ b/target/riscv/debug.h @@ -22,6 +22,8 @@ #ifndef RISCV_DEBUG_H #define RISCV_DEBUG_H +#include "exec/breakpoint.h" + #define RV_MAX_TRIGGERS 2 /* register index of tdata CSRs */ diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c index 49d2f3ad58..eaa36121c7 100644 --- a/target/riscv/kvm/kvm-cpu.c +++ b/target/riscv/kvm/kvm-cpu.c @@ -1054,8 +1054,8 @@ static void kvm_riscv_read_vlenb(RISCVCPU *cpu, KVMScratchCPU *kvmcpu, ret = ioctl(kvmcpu->cpufd, KVM_GET_ONE_REG, ®); if (ret != 0) { - error_report("Unable to read vlenb register, error code: %s", - strerrorname_np(errno)); + error_report("Unable to read vlenb register, error code: %d", + errno); exit(EXIT_FAILURE); } diff --git a/target/riscv/pmp.c b/target/riscv/pmp.c index 2a76b611a0..9eea397e72 100644 --- a/target/riscv/pmp.c +++ b/target/riscv/pmp.c @@ -25,6 +25,7 @@ #include "cpu.h" #include "trace.h" #include "exec/exec-all.h" +#include "exec/page-protection.h" static bool pmp_write_cfg(CPURISCVState *env, uint32_t addr_index, uint8_t val); diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c index b5b95e052d..40054a391a 100644 --- a/target/riscv/tcg/tcg-cpu.c +++ b/target/riscv/tcg/tcg-cpu.c @@ -96,7 +96,7 @@ static void riscv_cpu_synchronize_from_tb(CPUState *cs, CPURISCVState *env = &cpu->env; RISCVMXL xl = FIELD_EX32(tb->flags, TB_FLAGS, XL); - tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL)); + tcg_debug_assert(!tcg_cflags_has(cs, CF_PCREL)); if (xl == MXL_RV32) { env->pc = (int32_t) tb->pc; @@ -890,7 +890,7 @@ static bool riscv_tcg_cpu_realize(CPUState *cs, Error **errp) CPURISCVState *env = &cpu->env; Error *local_err = NULL; - CPU(cs)->tcg_cflags |= CF_PCREL; + tcg_cflags_set(CPU(cs), CF_PCREL); if (cpu->cfg.ext_sstc) { riscv_timer_init(cpu); diff --git a/target/riscv/translate.c b/target/riscv/translate.c index 9ff09ebdb6..2c27fd4ce1 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -20,8 +20,6 @@ #include "qemu/log.h" #include "cpu.h" #include "tcg/tcg-op.h" -#include "disas/disas.h" -#include "exec/cpu_ldst.h" #include "exec/exec-all.h" #include "exec/helper-proto.h" #include "exec/helper-gen.h" @@ -1083,7 +1081,7 @@ static uint32_t opcode_at(DisasContextBase *dcbase, target_ulong pc) CPUState *cpu = ctx->cs; CPURISCVState *env = cpu_env(cpu); - return cpu_ldl_code(env, pc); + return translator_ldl(env, &ctx->base, pc); } /* Include insn module translation function */ @@ -1244,7 +1242,8 @@ static void riscv_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) unsigned page_ofs = ctx->base.pc_next & ~TARGET_PAGE_MASK; if (page_ofs > TARGET_PAGE_SIZE - MAX_INSN_LEN) { - uint16_t next_insn = cpu_lduw_code(env, ctx->base.pc_next); + uint16_t next_insn = + translator_lduw(env, &ctx->base, ctx->base.pc_next); int len = insn_len(next_insn); if (!is_same_page(&ctx->base, ctx->base.pc_next + len - 1)) { @@ -1270,29 +1269,12 @@ static void riscv_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) } } -static void riscv_tr_disas_log(const DisasContextBase *dcbase, - CPUState *cpu, FILE *logfile) -{ -#ifndef CONFIG_USER_ONLY - RISCVCPU *rvcpu = RISCV_CPU(cpu); - CPURISCVState *env = &rvcpu->env; -#endif - - fprintf(logfile, "IN: %s\n", lookup_symbol(dcbase->pc_first)); -#ifndef CONFIG_USER_ONLY - fprintf(logfile, "Priv: "TARGET_FMT_ld"; Virt: %d\n", - env->priv, env->virt_enabled); -#endif - target_disas(logfile, cpu, dcbase->pc_first, dcbase->tb->size); -} - static const TranslatorOps riscv_tr_ops = { .init_disas_context = riscv_tr_init_disas_context, .tb_start = riscv_tr_tb_start, .insn_start = riscv_tr_insn_start, .translate_insn = riscv_tr_translate_insn, .tb_stop = riscv_tr_tb_stop, - .disas_log = riscv_tr_disas_log, }; void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns, diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index fa139040f8..1b4d5a8e37 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -23,6 +23,7 @@ #include "exec/memop.h" #include "exec/exec-all.h" #include "exec/cpu_ldst.h" +#include "exec/page-protection.h" #include "exec/helper-proto.h" #include "fpu/softfloat.h" #include "tcg/tcg-gvec-desc.h" diff --git a/target/rx/cpu.c b/target/rx/cpu.c index e3dfb09722..8a584f0a11 100644 --- a/target/rx/cpu.c +++ b/target/rx/cpu.c @@ -22,6 +22,7 @@ #include "cpu.h" #include "migration/vmstate.h" #include "exec/exec-all.h" +#include "exec/page-protection.h" #include "hw/loader.h" #include "fpu/softfloat.h" #include "tcg/debug-assert.h" @@ -45,7 +46,7 @@ static void rx_cpu_synchronize_from_tb(CPUState *cs, { RXCPU *cpu = RX_CPU(cs); - tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL)); + tcg_debug_assert(!tcg_cflags_has(cs, CF_PCREL)); cpu->env.pc = tb->pc; } diff --git a/target/rx/translate.c b/target/rx/translate.c index f6e9e0ec90..9b81cf20b3 100644 --- a/target/rx/translate.c +++ b/target/rx/translate.c @@ -22,7 +22,6 @@ #include "cpu.h" #include "exec/exec-all.h" #include "tcg/tcg-op.h" -#include "exec/cpu_ldst.h" #include "exec/helper-proto.h" #include "exec/helper-gen.h" #include "exec/translator.h" @@ -75,10 +74,10 @@ static TCGv_i64 cpu_acc; /* decoder helper */ static uint32_t decode_load_bytes(DisasContext *ctx, uint32_t insn, - int i, int n) + int i, int n) { while (++i <= n) { - uint8_t b = cpu_ldub_code(ctx->env, ctx->base.pc_next++); + uint8_t b = translator_ldub(ctx->env, &ctx->base, ctx->base.pc_next++); insn |= b << (32 - i * 8); } return insn; @@ -90,22 +89,24 @@ static uint32_t li(DisasContext *ctx, int sz) CPURXState *env = ctx->env; addr = ctx->base.pc_next; - tcg_debug_assert(sz < 4); switch (sz) { case 1: ctx->base.pc_next += 1; - return cpu_ldsb_code(env, addr); + return (int8_t)translator_ldub(env, &ctx->base, addr); case 2: ctx->base.pc_next += 2; - return cpu_ldsw_code(env, addr); + return (int16_t)translator_lduw(env, &ctx->base, addr); case 3: ctx->base.pc_next += 3; - tmp = cpu_ldsb_code(env, addr + 2) << 16; - tmp |= cpu_lduw_code(env, addr) & 0xffff; + tmp = (int8_t)translator_ldub(env, &ctx->base, addr + 2); + tmp <<= 16; + tmp |= translator_lduw(env, &ctx->base, addr); return tmp; case 0: ctx->base.pc_next += 4; - return cpu_ldl_code(env, addr); + return translator_ldl(env, &ctx->base, addr); + default: + g_assert_not_reached(); } return 0; } @@ -190,22 +191,22 @@ static inline TCGv rx_index_addr(DisasContext *ctx, TCGv mem, { uint32_t dsp; - tcg_debug_assert(ld < 3); switch (ld) { case 0: return cpu_regs[reg]; case 1: - dsp = cpu_ldub_code(ctx->env, ctx->base.pc_next) << size; + dsp = translator_ldub(ctx->env, &ctx->base, ctx->base.pc_next) << size; tcg_gen_addi_i32(mem, cpu_regs[reg], dsp); ctx->base.pc_next += 1; return mem; case 2: - dsp = cpu_lduw_code(ctx->env, ctx->base.pc_next) << size; + dsp = translator_lduw(ctx->env, &ctx->base, ctx->base.pc_next) << size; tcg_gen_addi_i32(mem, cpu_regs[reg], dsp); ctx->base.pc_next += 2; return mem; + default: + g_assert_not_reached(); } - return NULL; } static inline MemOp mi_to_mop(unsigned mi) @@ -2247,20 +2248,12 @@ static void rx_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs) } } -static void rx_tr_disas_log(const DisasContextBase *dcbase, - CPUState *cs, FILE *logfile) -{ - fprintf(logfile, "IN: %s\n", lookup_symbol(dcbase->pc_first)); - target_disas(logfile, cs, dcbase->pc_first, dcbase->tb->size); -} - static const TranslatorOps rx_tr_ops = { .init_disas_context = rx_tr_init_disas_context, .tb_start = rx_tr_tb_start, .insn_start = rx_tr_insn_start, .translate_insn = rx_tr_translate_insn, .tb_stop = rx_tr_tb_stop, - .disas_log = rx_tr_disas_log, }; void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns, diff --git a/target/s390x/Kconfig b/target/s390x/Kconfig index 72da48136c..d886be48b4 100644 --- a/target/s390x/Kconfig +++ b/target/s390x/Kconfig @@ -1,2 +1,4 @@ config S390X bool + select PCI + select S390_FLIC diff --git a/target/s390x/cpu-param.h b/target/s390x/cpu-param.h index 84ca08626b..11d23b600d 100644 --- a/target/s390x/cpu-param.h +++ b/target/s390x/cpu-param.h @@ -13,4 +13,10 @@ #define TARGET_PHYS_ADDR_SPACE_BITS 64 #define TARGET_VIRT_ADDR_SPACE_BITS 64 +/* + * The z/Architecture has a strong memory model with some + * store-after-load re-ordering. + */ +#define TCG_GUEST_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD) + #endif diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c index f7194534ae..2bbeaca36e 100644 --- a/target/s390x/cpu.c +++ b/target/s390x/cpu.c @@ -324,6 +324,42 @@ static void s390_cpu_reset_full(DeviceState *dev) #ifdef CONFIG_TCG #include "hw/core/tcg-cpu-ops.h" +void cpu_get_tb_cpu_state(CPUS390XState *env, vaddr *pc, + uint64_t *cs_base, uint32_t *pflags) +{ + uint32_t flags; + + if (env->psw.addr & 1) { + /* + * Instructions must be at even addresses. + * This needs to be checked before address translation. + */ + env->int_pgm_ilen = 2; /* see s390_cpu_tlb_fill() */ + tcg_s390_program_interrupt(env, PGM_SPECIFICATION, 0); + } + + *pc = env->psw.addr; + *cs_base = env->ex_value; + + flags = (env->psw.mask >> FLAG_MASK_PSW_SHIFT) & FLAG_MASK_PSW; + if (env->psw.mask & PSW_MASK_PER) { + flags |= env->cregs[9] & (FLAG_MASK_PER_BRANCH | + FLAG_MASK_PER_IFETCH | + FLAG_MASK_PER_IFETCH_NULLIFY); + if ((env->cregs[9] & PER_CR9_EVENT_STORE) && + (env->cregs[9] & PER_CR9_EVENT_STORE_REAL)) { + flags |= FLAG_MASK_PER_STORE_REAL; + } + } + if (env->cregs[0] & CR0_AFP) { + flags |= FLAG_MASK_AFP; + } + if (env->cregs[0] & CR0_VECTOR) { + flags |= FLAG_MASK_VECTOR; + } + *pflags = flags; +} + static const TCGCPUOps s390_tcg_ops = { .initialize = s390x_translate_init, .restore_state_to_opc = s390x_restore_state_to_opc, diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h index 43a46a5a06..d6b75ad0e0 100644 --- a/target/s390x/cpu.h +++ b/target/s390x/cpu.h @@ -33,9 +33,6 @@ #define ELF_MACHINE_UNAME "S390X" -/* The z/Architecture has a strong memory model with some store-after-load re-ordering */ -#define TCG_GUEST_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD) - #define TARGET_HAS_PRECISE_SMC #define TARGET_INSN_START_EXTRA_WORDS 2 @@ -345,19 +342,32 @@ extern const VMStateDescription vmstate_s390_cpu; /* tb flags */ -#define FLAG_MASK_PSW_SHIFT 31 -#define FLAG_MASK_PER (PSW_MASK_PER >> FLAG_MASK_PSW_SHIFT) -#define FLAG_MASK_DAT (PSW_MASK_DAT >> FLAG_MASK_PSW_SHIFT) -#define FLAG_MASK_PSTATE (PSW_MASK_PSTATE >> FLAG_MASK_PSW_SHIFT) -#define FLAG_MASK_ASC (PSW_MASK_ASC >> FLAG_MASK_PSW_SHIFT) -#define FLAG_MASK_64 (PSW_MASK_64 >> FLAG_MASK_PSW_SHIFT) -#define FLAG_MASK_32 (PSW_MASK_32 >> FLAG_MASK_PSW_SHIFT) -#define FLAG_MASK_PSW (FLAG_MASK_PER | FLAG_MASK_DAT | FLAG_MASK_PSTATE \ - | FLAG_MASK_ASC | FLAG_MASK_64 | FLAG_MASK_32) - -/* we'll use some unused PSW positions to store CR flags in tb flags */ -#define FLAG_MASK_AFP (PSW_MASK_UNUSED_2 >> FLAG_MASK_PSW_SHIFT) -#define FLAG_MASK_VECTOR (PSW_MASK_UNUSED_3 >> FLAG_MASK_PSW_SHIFT) +#define FLAG_MASK_PSW_SHIFT 31 +#define FLAG_MASK_32 0x00000001u +#define FLAG_MASK_64 0x00000002u +#define FLAG_MASK_AFP 0x00000004u +#define FLAG_MASK_VECTOR 0x00000008u +#define FLAG_MASK_ASC 0x00018000u +#define FLAG_MASK_PSTATE 0x00020000u +#define FLAG_MASK_PER_IFETCH_NULLIFY 0x01000000u +#define FLAG_MASK_DAT 0x08000000u +#define FLAG_MASK_PER_STORE_REAL 0x20000000u +#define FLAG_MASK_PER_IFETCH 0x40000000u +#define FLAG_MASK_PER_BRANCH 0x80000000u + +QEMU_BUILD_BUG_ON(FLAG_MASK_32 != PSW_MASK_32 >> FLAG_MASK_PSW_SHIFT); +QEMU_BUILD_BUG_ON(FLAG_MASK_64 != PSW_MASK_64 >> FLAG_MASK_PSW_SHIFT); +QEMU_BUILD_BUG_ON(FLAG_MASK_ASC != PSW_MASK_ASC >> FLAG_MASK_PSW_SHIFT); +QEMU_BUILD_BUG_ON(FLAG_MASK_PSTATE != PSW_MASK_PSTATE >> FLAG_MASK_PSW_SHIFT); +QEMU_BUILD_BUG_ON(FLAG_MASK_DAT != PSW_MASK_DAT >> FLAG_MASK_PSW_SHIFT); + +#define FLAG_MASK_PSW (FLAG_MASK_DAT | FLAG_MASK_PSTATE | \ + FLAG_MASK_ASC | FLAG_MASK_64 | FLAG_MASK_32) +#define FLAG_MASK_CR9 (FLAG_MASK_PER_BRANCH | FLAG_MASK_PER_IFETCH) +#define FLAG_MASK_PER (FLAG_MASK_PER_BRANCH | \ + FLAG_MASK_PER_IFETCH | \ + FLAG_MASK_PER_IFETCH_NULLIFY | \ + FLAG_MASK_PER_STORE_REAL) /* Control register 0 bits */ #define CR0_LOWPROT 0x0000000010000000ULL @@ -416,38 +426,28 @@ static inline int s390x_env_mmu_index(CPUS390XState *env, bool ifetch) #include "tcg/tcg_s390x.h" -static inline void cpu_get_tb_cpu_state(CPUS390XState *env, vaddr *pc, - uint64_t *cs_base, uint32_t *flags) -{ - if (env->psw.addr & 1) { - /* - * Instructions must be at even addresses. - * This needs to be checked before address translation. - */ - env->int_pgm_ilen = 2; /* see s390_cpu_tlb_fill() */ - tcg_s390_program_interrupt(env, PGM_SPECIFICATION, 0); - } - *pc = env->psw.addr; - *cs_base = env->ex_value; - *flags = (env->psw.mask >> FLAG_MASK_PSW_SHIFT) & FLAG_MASK_PSW; - if (env->cregs[0] & CR0_AFP) { - *flags |= FLAG_MASK_AFP; - } - if (env->cregs[0] & CR0_VECTOR) { - *flags |= FLAG_MASK_VECTOR; - } -} +void cpu_get_tb_cpu_state(CPUS390XState *env, vaddr *pc, + uint64_t *cs_base, uint32_t *flags); #endif /* CONFIG_TCG */ /* PER bits from control register 9 */ -#define PER_CR9_EVENT_BRANCH 0x80000000 -#define PER_CR9_EVENT_IFETCH 0x40000000 -#define PER_CR9_EVENT_STORE 0x20000000 -#define PER_CR9_EVENT_STORE_REAL 0x08000000 -#define PER_CR9_EVENT_NULLIFICATION 0x01000000 -#define PER_CR9_CONTROL_BRANCH_ADDRESS 0x00800000 -#define PER_CR9_CONTROL_ALTERATION 0x00200000 +#define PER_CR9_EVENT_BRANCH 0x80000000 +#define PER_CR9_EVENT_IFETCH 0x40000000 +#define PER_CR9_EVENT_STORE 0x20000000 +#define PER_CR9_EVENT_STORAGE_KEY_ALTERATION 0x10000000 +#define PER_CR9_EVENT_STORE_REAL 0x08000000 +#define PER_CR9_EVENT_ZERO_ADDRESS_DETECTION 0x04000000 +#define PER_CR9_EVENT_TRANSACTION_END 0x02000000 +#define PER_CR9_EVENT_IFETCH_NULLIFICATION 0x01000000 +#define PER_CR9_CONTROL_BRANCH_ADDRESS 0x00800000 +#define PER_CR9_CONTROL_TRANSACTION_SUPRESS 0x00400000 +#define PER_CR9_CONTROL_STORAGE_ALTERATION 0x00200000 + +QEMU_BUILD_BUG_ON(FLAG_MASK_PER_BRANCH != PER_CR9_EVENT_BRANCH); +QEMU_BUILD_BUG_ON(FLAG_MASK_PER_IFETCH != PER_CR9_EVENT_IFETCH); +QEMU_BUILD_BUG_ON(FLAG_MASK_PER_IFETCH_NULLIFY != + PER_CR9_EVENT_IFETCH_NULLIFICATION); /* PER bits from the PER CODE/ATMID/AI in lowcore */ #define PER_CODE_EVENT_BRANCH 0x8000 diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c index d28eb65845..cb4e2b8920 100644 --- a/target/s390x/cpu_features.c +++ b/target/s390x/cpu_features.c @@ -212,6 +212,23 @@ void s390_feat_bitmap_to_ascii(const S390FeatBitmap features, void *opaque, }; } +void s390_get_deprecated_features(S390FeatBitmap features) +{ + static const int feats[] = { + /* CSSKE is deprecated on newer generations */ + S390_FEAT_CONDITIONAL_SSKE, + S390_FEAT_BPB, + /* Deprecated on z16 */ + S390_FEAT_CONSTRAINT_TRANSACTIONAL_EXE, + S390_FEAT_TRANSACTIONAL_EXE + }; + int i; + + for (i = 0; i < ARRAY_SIZE(feats); i++) { + set_bit(feats[i], features); + } +} + #define FEAT_GROUP_INIT(_name, _group, _desc) \ { \ .name = _name, \ diff --git a/target/s390x/cpu_features.h b/target/s390x/cpu_features.h index a9bd68a2e1..661a8cd6db 100644 --- a/target/s390x/cpu_features.h +++ b/target/s390x/cpu_features.h @@ -69,6 +69,7 @@ void s390_add_from_feat_block(S390FeatBitmap features, S390FeatType type, uint8_t *data); void s390_feat_bitmap_to_ascii(const S390FeatBitmap features, void *opaque, void (*fn)(const char *name, void *opaque)); +void s390_get_deprecated_features(S390FeatBitmap features); /* Definition of a CPU feature group */ typedef struct { diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c index 8ed3bb6a27..efb508cd2e 100644 --- a/target/s390x/cpu_models.c +++ b/target/s390x/cpu_models.c @@ -355,9 +355,9 @@ static void s390_print_cpu_model_list_entry(gpointer data, gpointer user_data) /* strip off the -s390x-cpu */ g_strrstr(name, "-" TYPE_S390_CPU)[0] = 0; if (details->len) { - qemu_printf("s390 %-15s %-35s (%s)\n", name, scc->desc, details->str); + qemu_printf(" %-15s %-35s (%s)\n", name, scc->desc, details->str); } else { - qemu_printf("s390 %-15s %-35s\n", name, scc->desc); + qemu_printf(" %-15s %-35s\n", name, scc->desc); } g_free(name); } @@ -402,6 +402,7 @@ void s390_cpu_list(void) S390Feat feat; GSList *list; + qemu_printf("Available CPUs:\n"); list = object_class_get_list(TYPE_S390_CPU, false); list = g_slist_sort(list, s390_cpu_list_compare); g_slist_foreach(list, s390_print_cpu_model_list_entry, NULL); @@ -411,14 +412,14 @@ void s390_cpu_list(void) for (feat = 0; feat < S390_FEAT_MAX; feat++) { const S390FeatDef *def = s390_feat_def(feat); - qemu_printf("%-20s %s\n", def->name, def->desc); + qemu_printf(" %-20s %s\n", def->name, def->desc); } qemu_printf("\nRecognized feature groups:\n"); for (group = 0; group < S390_FEAT_GROUP_MAX; group++) { const S390FeatGroupDef *def = s390_feat_group_def(group); - qemu_printf("%-20s %s\n", def->name, def->desc); + qemu_printf(" %-20s %s\n", def->name, def->desc); } } @@ -510,7 +511,7 @@ static void check_compat_model_failed(Error **errp, return; } -static void check_compatibility(const S390CPUModel *max_model, +static bool check_compatibility(const S390CPUModel *max_model, const S390CPUModel *model, Error **errp) { ERRP_GUARD(); @@ -518,11 +519,11 @@ static void check_compatibility(const S390CPUModel *max_model, if (model->def->gen > max_model->def->gen) { check_compat_model_failed(errp, max_model, "Selected CPU generation is too new"); - return; + return false; } else if (model->def->gen == max_model->def->gen && model->def->ec_ga > max_model->def->ec_ga) { check_compat_model_failed(errp, max_model, "Selected CPU GA level is too new"); - return; + return false; } #ifndef CONFIG_USER_ONLY @@ -530,14 +531,14 @@ static void check_compatibility(const S390CPUModel *max_model, error_setg(errp, "The unpack facility is not compatible with " "the --only-migratable option. You must remove either " "the 'unpack' facility or the --only-migratable option"); - return; + return false; } #endif /* detect the missing features to properly report them */ bitmap_andnot(missing, model->features, max_model->features, S390_FEAT_MAX); if (bitmap_empty(missing, S390_FEAT_MAX)) { - return; + return true; } error_setg(errp, " "); @@ -546,11 +547,11 @@ static void check_compatibility(const S390CPUModel *max_model, "available in the current configuration: "); error_append_hint(errp, "Consider a different accelerator, QEMU, or kernel version\n"); + return false; } S390CPUModel *get_max_cpu_model(Error **errp) { - Error *err = NULL; static S390CPUModel max_model; static bool cached; @@ -559,16 +560,14 @@ S390CPUModel *get_max_cpu_model(Error **errp) } if (kvm_enabled()) { - kvm_s390_get_host_cpu_model(&max_model, &err); + if (!kvm_s390_get_host_cpu_model(&max_model, errp)) { + return NULL; + } } else { max_model.def = s390_find_cpu_def(QEMU_MAX_CPU_TYPE, QEMU_MAX_CPU_GEN, QEMU_MAX_CPU_EC_GA, NULL); bitmap_copy(max_model.features, qemu_max_cpu_feat, S390_FEAT_MAX); } - if (err) { - error_propagate(errp, err); - return NULL; - } cached = true; return &max_model; } @@ -576,7 +575,6 @@ S390CPUModel *get_max_cpu_model(Error **errp) void s390_realize_cpu_model(CPUState *cs, Error **errp) { ERRP_GUARD(); - Error *err = NULL; S390CPUClass *xcc = S390_CPU_GET_CLASS(cs); S390CPU *cpu = S390_CPU(cs); const S390CPUModel *max_model; @@ -605,9 +603,7 @@ void s390_realize_cpu_model(CPUState *cs, Error **errp) cpu->model->cpu_ver = max_model->cpu_ver; check_consistency(cpu->model); - check_compatibility(max_model, cpu->model, &err); - if (err) { - error_propagate(errp, err); + if (!check_compatibility(max_model, cpu->model, errp)) { return; } diff --git a/target/s390x/cpu_models.h b/target/s390x/cpu_models.h index d7b8912989..71d4bc2dd4 100644 --- a/target/s390x/cpu_models.h +++ b/target/s390x/cpu_models.h @@ -114,23 +114,8 @@ static inline uint64_t s390_cpuid_from_cpu_model(const S390CPUModel *model) S390CPUDef const *s390_find_cpu_def(uint16_t type, uint8_t gen, uint8_t ec_ga, S390FeatBitmap features); -#ifdef CONFIG_KVM bool kvm_s390_cpu_models_supported(void); -void kvm_s390_get_host_cpu_model(S390CPUModel *model, Error **errp); -void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp); -#else -static inline void kvm_s390_get_host_cpu_model(S390CPUModel *model, - Error **errp) -{ -} -static inline void kvm_s390_apply_cpu_model(const S390CPUModel *model, - Error **errp) -{ -} -static inline bool kvm_s390_cpu_models_supported(void) -{ - return false; -} -#endif +bool kvm_s390_get_host_cpu_model(S390CPUModel *model, Error **errp); +bool kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp); #endif /* TARGET_S390X_CPU_MODELS_H */ diff --git a/target/s390x/cpu_models_sysemu.c b/target/s390x/cpu_models_sysemu.c index 2d99218069..977fbc6522 100644 --- a/target/s390x/cpu_models_sysemu.c +++ b/target/s390x/cpu_models_sysemu.c @@ -206,6 +206,14 @@ static void cpu_info_from_model(CpuModelInfo *info, const S390CPUModel *model, } else { info->props = QOBJECT(qdict); } + + /* features flagged as deprecated */ + bitmap_zero(bitmap, S390_FEAT_MAX); + s390_get_deprecated_features(bitmap); + + bitmap_and(bitmap, bitmap, model->def->full_feat, S390_FEAT_MAX); + s390_feat_bitmap_to_ascii(bitmap, &info->deprecated_props, list_add_feat); + info->has_deprecated_props = !!info->deprecated_props; } CpuModelExpansionInfo *qmp_query_cpu_model_expansion(CpuModelExpansionType type, @@ -389,7 +397,6 @@ CpuModelBaselineInfo *qmp_query_cpu_model_baseline(CpuModelInfo *infoa, void apply_cpu_model(const S390CPUModel *model, Error **errp) { - Error *err = NULL; static S390CPUModel applied_model; static bool applied; @@ -405,9 +412,7 @@ void apply_cpu_model(const S390CPUModel *model, Error **errp) } if (kvm_enabled()) { - kvm_s390_apply_cpu_model(model, &err); - if (err) { - error_propagate(errp, err); + if (!kvm_s390_apply_cpu_model(model, errp)) { return; } } diff --git a/target/s390x/helper.h b/target/s390x/helper.h index cc1c20e9e3..1a8a76abb9 100644 --- a/target/s390x/helper.h +++ b/target/s390x/helper.h @@ -359,10 +359,10 @@ DEF_HELPER_FLAGS_4(ipte, TCG_CALL_NO_RWG, void, env, i64, i64, i32) DEF_HELPER_FLAGS_1(ptlb, TCG_CALL_NO_RWG, void, env) DEF_HELPER_FLAGS_1(purge, TCG_CALL_NO_RWG, void, env) DEF_HELPER_3(lra, i64, env, i64, i64) -DEF_HELPER_1(per_check_exception, void, env) -DEF_HELPER_FLAGS_3(per_branch, TCG_CALL_NO_RWG, void, env, i64, i64) -DEF_HELPER_FLAGS_2(per_ifetch, TCG_CALL_NO_RWG, void, env, i64) -DEF_HELPER_FLAGS_1(per_store_real, TCG_CALL_NO_RWG, void, env) +DEF_HELPER_FLAGS_1(per_check_exception, TCG_CALL_NO_WG, void, env) +DEF_HELPER_FLAGS_3(per_branch, TCG_CALL_NO_WG, void, env, i64, i32) +DEF_HELPER_FLAGS_2(per_ifetch, TCG_CALL_NO_WG, void, env, i32) +DEF_HELPER_FLAGS_2(per_store_real, TCG_CALL_NO_WG, noreturn, env, i32) DEF_HELPER_FLAGS_1(stfl, TCG_CALL_NO_RWG, void, env) DEF_HELPER_2(xsch, void, env, i64) diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c index 4dcd757cdc..1b494ecc20 100644 --- a/target/s390x/kvm/kvm.c +++ b/target/s390x/kvm/kvm.c @@ -2375,7 +2375,7 @@ bool kvm_s390_cpu_models_supported(void) KVM_S390_VM_CPU_MACHINE_SUBFUNC); } -void kvm_s390_get_host_cpu_model(S390CPUModel *model, Error **errp) +bool kvm_s390_get_host_cpu_model(S390CPUModel *model, Error **errp) { struct kvm_s390_vm_cpu_machine prop = {}; struct kvm_device_attr attr = { @@ -2390,14 +2390,14 @@ void kvm_s390_get_host_cpu_model(S390CPUModel *model, Error **errp) if (!kvm_s390_cpu_models_supported()) { error_setg(errp, "KVM doesn't support CPU models"); - return; + return false; } /* query the basic cpu model properties */ rc = kvm_vm_ioctl(kvm_state, KVM_GET_DEVICE_ATTR, &attr); if (rc) { error_setg(errp, "KVM: Error querying host CPU model: %d", rc); - return; + return false; } cpu_type = cpuid_type(prop.cpuid); @@ -2420,13 +2420,13 @@ void kvm_s390_get_host_cpu_model(S390CPUModel *model, Error **errp) rc = query_cpu_feat(model->features); if (rc) { error_setg(errp, "KVM: Error querying CPU features: %d", rc); - return; + return false; } /* get supported cpu subfunctions indicated via query / test bit */ rc = query_cpu_subfunc(model->features); if (rc) { error_setg(errp, "KVM: Error querying CPU subfunctions: %d", rc); - return; + return false; } /* PTFF subfunctions might be indicated although kernel support missing */ @@ -2482,7 +2482,7 @@ void kvm_s390_get_host_cpu_model(S390CPUModel *model, Error **errp) } if (!model->def) { error_setg(errp, "KVM: host CPU model could not be identified"); - return; + return false; } /* for now, we can only provide the AP feature with HW support */ if (ap_available()) { @@ -2506,6 +2506,7 @@ void kvm_s390_get_host_cpu_model(S390CPUModel *model, Error **errp) /* strip of features that are not part of the maximum model */ bitmap_and(model->features, model->features, model->def->full_feat, S390_FEAT_MAX); + return true; } static int configure_uv_feat_guest(const S390FeatBitmap features) @@ -2542,7 +2543,7 @@ static void kvm_s390_configure_apie(bool interpret) } } -void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) +bool kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) { struct kvm_s390_vm_cpu_processor prop = { .fac_list = { 0 }, @@ -2559,11 +2560,11 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) if (kvm_s390_cmma_available()) { kvm_s390_enable_cmma(); } - return; + return true; } if (!kvm_s390_cpu_models_supported()) { error_setg(errp, "KVM doesn't support CPU models"); - return; + return false; } prop.cpuid = s390_cpuid_from_cpu_model(model); prop.ibc = s390_ibc_from_cpu_model(model); @@ -2573,19 +2574,19 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) rc = kvm_vm_ioctl(kvm_state, KVM_SET_DEVICE_ATTR, &attr); if (rc) { error_setg(errp, "KVM: Error configuring the CPU model: %d", rc); - return; + return false; } /* configure cpu features indicated e.g. via SCLP */ rc = configure_cpu_feat(model->features); if (rc) { error_setg(errp, "KVM: Error configuring CPU features: %d", rc); - return; + return false; } /* configure cpu subfunctions indicated via query / test bit */ rc = configure_cpu_subfunc(model->features); if (rc) { error_setg(errp, "KVM: Error configuring CPU subfunctions: %d", rc); - return; + return false; } /* enable CMM via CMMA */ if (test_bit(S390_FEAT_CMM, model->features)) { @@ -2600,8 +2601,9 @@ void kvm_s390_apply_cpu_model(const S390CPUModel *model, Error **errp) rc = configure_uv_feat_guest(model->features); if (rc) { error_setg(errp, "KVM: Error configuring CPU UV features %d", rc); - return; + return false; } + return true; } void kvm_s390_restart_interrupt(S390CPU *cpu) diff --git a/target/s390x/mmu_helper.c b/target/s390x/mmu_helper.c index fbb2f1b4d4..f3a2f25a5c 100644 --- a/target/s390x/mmu_helper.c +++ b/target/s390x/mmu_helper.c @@ -24,6 +24,7 @@ #include "sysemu/kvm.h" #include "sysemu/tcg.h" #include "exec/exec-all.h" +#include "exec/page-protection.h" #include "trace.h" #include "hw/hw.h" #include "hw/s390x/storage-keys.h" diff --git a/target/s390x/sigp.c b/target/s390x/sigp.c index 9dd977349a..ad0ad61177 100644 --- a/target/s390x/sigp.c +++ b/target/s390x/sigp.c @@ -11,6 +11,7 @@ #include "qemu/osdep.h" #include "cpu.h" #include "s390x-internal.h" +#include "hw/boards.h" #include "sysemu/hw_accel.h" #include "sysemu/runstate.h" #include "exec/address-spaces.h" @@ -435,6 +436,22 @@ static int sigp_set_architecture(S390CPU *cpu, uint32_t param, return SIGP_CC_STATUS_STORED; } +S390CPU *s390_cpu_addr2state(uint16_t cpu_addr) +{ + static MachineState *ms; + + if (!ms) { + ms = MACHINE(qdev_get_machine()); + g_assert(ms->possible_cpus); + } + + /* CPU address corresponds to the core_id and the index */ + if (cpu_addr >= ms->possible_cpus->len) { + return NULL; + } + return S390_CPU(ms->possible_cpus->cpus[cpu_addr].cpu); +} + int handle_sigp(CPUS390XState *env, uint8_t order, uint64_t r1, uint64_t r3) { uint64_t *status_reg = &env->regs[r1]; diff --git a/target/s390x/tcg/excp_helper.c b/target/s390x/tcg/excp_helper.c index f1c33f7967..4c0b692c9e 100644 --- a/target/s390x/tcg/excp_helper.c +++ b/target/s390x/tcg/excp_helper.c @@ -209,7 +209,7 @@ static void do_program_interrupt(CPUS390XState *env) switch (env->int_pgm_code) { case PGM_PER: - advance = !(env->per_perc_atmid & PER_CODE_EVENT_NULLIFICATION); + /* advance already handled */ break; case PGM_ASCE_TYPE: case PGM_REG_FIRST_TRANS: diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c index 557831def4..6a308c5553 100644 --- a/target/s390x/tcg/mem_helper.c +++ b/target/s390x/tcg/mem_helper.c @@ -25,6 +25,7 @@ #include "tcg_s390x.h" #include "exec/helper-proto.h" #include "exec/exec-all.h" +#include "exec/page-protection.h" #include "exec/cpu_ldst.h" #include "hw/core/tcg-cpu-ops.h" #include "qemu/int128.h" diff --git a/target/s390x/tcg/misc_helper.c b/target/s390x/tcg/misc_helper.c index 8764846ce8..303f86d363 100644 --- a/target/s390x/tcg/misc_helper.c +++ b/target/s390x/tcg/misc_helper.c @@ -20,6 +20,7 @@ #include "qemu/osdep.h" #include "qemu/cutils.h" +#include "qemu/log.h" #include "cpu.h" #include "s390x-internal.h" #include "qemu/host-utils.h" @@ -590,10 +591,24 @@ void HELPER(chsc)(CPUS390XState *env, uint64_t inst) #endif #ifndef CONFIG_USER_ONLY +static G_NORETURN void per_raise_exception(CPUS390XState *env) +{ + trigger_pgm_exception(env, PGM_PER); + cpu_loop_exit(env_cpu(env)); +} + +static G_NORETURN void per_raise_exception_log(CPUS390XState *env) +{ + qemu_log_mask(CPU_LOG_INT, "PER interrupt after 0x%" PRIx64 "\n", + env->per_address); + per_raise_exception(env); +} + void HELPER(per_check_exception)(CPUS390XState *env) { - if (env->per_perc_atmid) { - tcg_s390_program_interrupt(env, PGM_PER, GETPC()); + /* psw_addr, per_address and int_pgm_ilen are already set. */ + if (unlikely(env->per_perc_atmid)) { + per_raise_exception_log(env); } } @@ -608,46 +623,45 @@ static inline bool get_per_in_range(CPUS390XState *env, uint64_t addr) } } -void HELPER(per_branch)(CPUS390XState *env, uint64_t from, uint64_t to) +void HELPER(per_branch)(CPUS390XState *env, uint64_t dest, uint32_t ilen) { - if ((env->cregs[9] & PER_CR9_EVENT_BRANCH)) { - if (!(env->cregs[9] & PER_CR9_CONTROL_BRANCH_ADDRESS) - || get_per_in_range(env, to)) { - env->per_address = from; - env->per_perc_atmid = PER_CODE_EVENT_BRANCH | get_per_atmid(env); - } + if ((env->cregs[9] & PER_CR9_CONTROL_BRANCH_ADDRESS) + && !get_per_in_range(env, dest)) { + return; } + + env->psw.addr = dest; + env->int_pgm_ilen = ilen; + env->per_address = env->gbea; + env->per_perc_atmid = PER_CODE_EVENT_BRANCH | get_per_atmid(env); + per_raise_exception_log(env); } -void HELPER(per_ifetch)(CPUS390XState *env, uint64_t addr) +void HELPER(per_ifetch)(CPUS390XState *env, uint32_t ilen) { - if ((env->cregs[9] & PER_CR9_EVENT_IFETCH) && get_per_in_range(env, addr)) { - env->per_address = addr; + if (get_per_in_range(env, env->psw.addr)) { + env->per_address = env->psw.addr; + env->int_pgm_ilen = ilen; env->per_perc_atmid = PER_CODE_EVENT_IFETCH | get_per_atmid(env); /* If the instruction has to be nullified, trigger the exception immediately. */ - if (env->cregs[9] & PER_CR9_EVENT_NULLIFICATION) { - CPUState *cs = env_cpu(env); - + if (env->cregs[9] & PER_CR9_EVENT_IFETCH_NULLIFICATION) { env->per_perc_atmid |= PER_CODE_EVENT_NULLIFICATION; - env->int_pgm_code = PGM_PER; - env->int_pgm_ilen = get_ilen(cpu_ldub_code(env, addr)); - - cs->exception_index = EXCP_PGM; - cpu_loop_exit(cs); + qemu_log_mask(CPU_LOG_INT, "PER interrupt before 0x%" PRIx64 "\n", + env->per_address); + per_raise_exception(env); } } } -void HELPER(per_store_real)(CPUS390XState *env) +void HELPER(per_store_real)(CPUS390XState *env, uint32_t ilen) { - if ((env->cregs[9] & PER_CR9_EVENT_STORE) && - (env->cregs[9] & PER_CR9_EVENT_STORE_REAL)) { - /* PSW is saved just before calling the helper. */ - env->per_address = env->psw.addr; - env->per_perc_atmid = PER_CODE_EVENT_STORE_REAL | get_per_atmid(env); - } + /* PSW is saved just before calling the helper. */ + env->per_address = env->psw.addr; + env->int_pgm_ilen = ilen; + env->per_perc_atmid = PER_CODE_EVENT_STORE_REAL | get_per_atmid(env); + per_raise_exception_log(env); } #endif diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c index 90a74ee795..c81e035dea 100644 --- a/target/s390x/tcg/translate.c +++ b/target/s390x/tcg/translate.c @@ -31,13 +31,11 @@ #include "qemu/osdep.h" #include "cpu.h" #include "s390x-internal.h" -#include "disas/disas.h" #include "exec/exec-all.h" #include "tcg/tcg-op.h" #include "tcg/tcg-op-gvec.h" #include "qemu/log.h" #include "qemu/host-utils.h" -#include "exec/cpu_ldst.h" #include "exec/helper-proto.h" #include "exec/helper-gen.h" @@ -343,33 +341,11 @@ static void update_psw_addr(DisasContext *s) tcg_gen_movi_i64(psw_addr, s->base.pc_next); } -static void per_branch(DisasContext *s, bool to_next) +static void per_branch(DisasContext *s, TCGv_i64 dest) { #ifndef CONFIG_USER_ONLY - tcg_gen_movi_i64(gbea, s->base.pc_next); - - if (s->base.tb->flags & FLAG_MASK_PER) { - TCGv_i64 next_pc = to_next ? tcg_constant_i64(s->pc_tmp) : psw_addr; - gen_helper_per_branch(tcg_env, gbea, next_pc); - } -#endif -} - -static void per_branch_cond(DisasContext *s, TCGCond cond, - TCGv_i64 arg1, TCGv_i64 arg2) -{ -#ifndef CONFIG_USER_ONLY - if (s->base.tb->flags & FLAG_MASK_PER) { - TCGLabel *lab = gen_new_label(); - tcg_gen_brcond_i64(tcg_invert_cond(cond), arg1, arg2, lab); - - tcg_gen_movi_i64(gbea, s->base.pc_next); - gen_helper_per_branch(tcg_env, gbea, psw_addr); - - gen_set_label(lab); - } else { - TCGv_i64 pc = tcg_constant_i64(s->base.pc_next); - tcg_gen_movcond_i64(cond, gbea, arg1, arg2, gbea, pc); + if (s->base.tb->flags & FLAG_MASK_PER_BRANCH) { + gen_helper_per_branch(tcg_env, dest, tcg_constant_i32(s->ilen)); } #endif } @@ -658,9 +634,6 @@ static void gen_op_calc_cc(DisasContext *s) static bool use_goto_tb(DisasContext *s, uint64_t dest) { - if (unlikely(s->base.tb->flags & FLAG_MASK_PER)) { - return false; - } return translator_use_goto_tb(&s->base, dest); } @@ -1102,144 +1075,105 @@ struct DisasInsn { static DisasJumpType help_goto_direct(DisasContext *s, uint64_t dest) { + update_cc_op(s); + per_breaking_event(s); + per_branch(s, tcg_constant_i64(dest)); + if (dest == s->pc_tmp) { - per_branch(s, true); return DISAS_NEXT; } if (use_goto_tb(s, dest)) { - update_cc_op(s); - per_breaking_event(s); tcg_gen_goto_tb(0); tcg_gen_movi_i64(psw_addr, dest); tcg_gen_exit_tb(s->base.tb, 0); return DISAS_NORETURN; } else { tcg_gen_movi_i64(psw_addr, dest); - per_branch(s, false); - return DISAS_PC_UPDATED; + return DISAS_PC_CC_UPDATED; } } +static DisasJumpType help_goto_indirect(DisasContext *s, TCGv_i64 dest) +{ + update_cc_op(s); + per_breaking_event(s); + tcg_gen_mov_i64(psw_addr, dest); + per_branch(s, psw_addr); + return DISAS_PC_CC_UPDATED; +} + static DisasJumpType help_branch(DisasContext *s, DisasCompare *c, bool is_imm, int imm, TCGv_i64 cdest) { - DisasJumpType ret; uint64_t dest = s->base.pc_next + (int64_t)imm * 2; TCGLabel *lab; /* Take care of the special cases first. */ if (c->cond == TCG_COND_NEVER) { - ret = DISAS_NEXT; - goto egress; + return DISAS_NEXT; } if (is_imm) { - if (dest == s->pc_tmp) { - /* Branch to next. */ - per_branch(s, true); - ret = DISAS_NEXT; - goto egress; - } - if (c->cond == TCG_COND_ALWAYS) { - ret = help_goto_direct(s, dest); - goto egress; + /* + * Do not optimize a conditional branch if PER enabled, because we + * still need a conditional call to helper_per_branch. + */ + if (c->cond == TCG_COND_ALWAYS + || (dest == s->pc_tmp && + !(s->base.tb->flags & FLAG_MASK_PER_BRANCH))) { + return help_goto_direct(s, dest); } } else { if (!cdest) { /* E.g. bcr %r0 -> no branch. */ - ret = DISAS_NEXT; - goto egress; + return DISAS_NEXT; } if (c->cond == TCG_COND_ALWAYS) { - tcg_gen_mov_i64(psw_addr, cdest); - per_branch(s, false); - ret = DISAS_PC_UPDATED; - goto egress; + return help_goto_indirect(s, cdest); } } - if (use_goto_tb(s, s->pc_tmp)) { - if (is_imm && use_goto_tb(s, dest)) { - /* Both exits can use goto_tb. */ - update_cc_op(s); - - lab = gen_new_label(); - if (c->is_64) { - tcg_gen_brcond_i64(c->cond, c->u.s64.a, c->u.s64.b, lab); - } else { - tcg_gen_brcond_i32(c->cond, c->u.s32.a, c->u.s32.b, lab); - } - - /* Branch not taken. */ - tcg_gen_goto_tb(0); - tcg_gen_movi_i64(psw_addr, s->pc_tmp); - tcg_gen_exit_tb(s->base.tb, 0); - - /* Branch taken. */ - gen_set_label(lab); - per_breaking_event(s); - tcg_gen_goto_tb(1); - tcg_gen_movi_i64(psw_addr, dest); - tcg_gen_exit_tb(s->base.tb, 1); - - ret = DISAS_NORETURN; - } else { - /* Fallthru can use goto_tb, but taken branch cannot. */ - /* Store taken branch destination before the brcond. This - avoids having to allocate a new local temp to hold it. - We'll overwrite this in the not taken case anyway. */ - if (!is_imm) { - tcg_gen_mov_i64(psw_addr, cdest); - } - - lab = gen_new_label(); - if (c->is_64) { - tcg_gen_brcond_i64(c->cond, c->u.s64.a, c->u.s64.b, lab); - } else { - tcg_gen_brcond_i32(c->cond, c->u.s32.a, c->u.s32.b, lab); - } + update_cc_op(s); - /* Branch not taken. */ - update_cc_op(s); - tcg_gen_goto_tb(0); - tcg_gen_movi_i64(psw_addr, s->pc_tmp); - tcg_gen_exit_tb(s->base.tb, 0); + /* + * Ensure the taken branch is fall-through of the tcg branch. + * This keeps @cdest usage within the extended basic block, + * which avoids an otherwise unnecessary spill to the stack. + */ + lab = gen_new_label(); + if (c->is_64) { + tcg_gen_brcond_i64(tcg_invert_cond(c->cond), + c->u.s64.a, c->u.s64.b, lab); + } else { + tcg_gen_brcond_i32(tcg_invert_cond(c->cond), + c->u.s32.a, c->u.s32.b, lab); + } - gen_set_label(lab); - if (is_imm) { - tcg_gen_movi_i64(psw_addr, dest); - } - per_breaking_event(s); - ret = DISAS_PC_UPDATED; - } + /* Branch taken. */ + per_breaking_event(s); + if (is_imm) { + tcg_gen_movi_i64(psw_addr, dest); } else { - /* Fallthru cannot use goto_tb. This by itself is vanishingly rare. - Most commonly we're single-stepping or some other condition that - disables all use of goto_tb. Just update the PC and exit. */ + tcg_gen_mov_i64(psw_addr, cdest); + } + per_branch(s, psw_addr); - TCGv_i64 next = tcg_constant_i64(s->pc_tmp); - if (is_imm) { - cdest = tcg_constant_i64(dest); - } + if (is_imm && use_goto_tb(s, dest)) { + tcg_gen_goto_tb(0); + tcg_gen_exit_tb(s->base.tb, 0); + } else { + tcg_gen_lookup_and_goto_ptr(); + } - if (c->is_64) { - tcg_gen_movcond_i64(c->cond, psw_addr, c->u.s64.a, c->u.s64.b, - cdest, next); - per_branch_cond(s, c->cond, c->u.s64.a, c->u.s64.b); - } else { - TCGv_i32 t0 = tcg_temp_new_i32(); - TCGv_i64 t1 = tcg_temp_new_i64(); - TCGv_i64 z = tcg_constant_i64(0); - tcg_gen_setcond_i32(c->cond, t0, c->u.s32.a, c->u.s32.b); - tcg_gen_extu_i32_i64(t1, t0); - tcg_gen_movcond_i64(TCG_COND_NE, psw_addr, t1, z, cdest, next); - per_branch_cond(s, TCG_COND_NE, t1, z); - } + gen_set_label(lab); - ret = DISAS_PC_UPDATED; + /* Branch not taken. */ + tcg_gen_movi_i64(psw_addr, s->pc_tmp); + if (use_goto_tb(s, s->pc_tmp)) { + tcg_gen_goto_tb(1); + tcg_gen_exit_tb(s->base.tb, 1); + return DISAS_NORETURN; } - - egress: - return ret; + return DISAS_PC_CC_UPDATED; } /* ====================================================================== */ @@ -1465,9 +1399,7 @@ static DisasJumpType op_bas(DisasContext *s, DisasOps *o) { pc_to_link_info(o->out, s, s->pc_tmp); if (o->in2) { - tcg_gen_mov_i64(psw_addr, o->in2); - per_branch(s, false); - return DISAS_PC_UPDATED; + return help_goto_indirect(s, o->in2); } else { return DISAS_NEXT; } @@ -1497,9 +1429,7 @@ static DisasJumpType op_bal(DisasContext *s, DisasOps *o) { save_link_info(s, o); if (o->in2) { - tcg_gen_mov_i64(psw_addr, o->in2); - per_branch(s, false); - return DISAS_PC_UPDATED; + return help_goto_indirect(s, o->in2); } else { return DISAS_NEXT; } @@ -4411,9 +4341,11 @@ static DisasJumpType op_stura(DisasContext *s, DisasOps *o) { tcg_gen_qemu_st_tl(o->in1, o->in2, MMU_REAL_IDX, s->insn->data); - if (s->base.tb->flags & FLAG_MASK_PER) { + if (s->base.tb->flags & FLAG_MASK_PER_STORE_REAL) { + update_cc_op(s); update_psw_addr(s); - gen_helper_per_store_real(tcg_env); + gen_helper_per_store_real(tcg_env, tcg_constant_i32(s->ilen)); + return DISAS_NORETURN; } return DISAS_NEXT; } @@ -6192,6 +6124,8 @@ static const DisasInsn *extract_insn(CPUS390XState *env, DisasContext *s) const DisasInsn *info; if (unlikely(s->ex_value)) { + uint64_t be_insn; + /* Drop the EX data now, so that it's clear on exception paths. */ tcg_gen_st_i64(tcg_constant_i64(0), tcg_env, offsetof(CPUS390XState, ex_value)); @@ -6199,13 +6133,11 @@ static const DisasInsn *extract_insn(CPUS390XState *env, DisasContext *s) /* Extract the values saved by EXECUTE. */ insn = s->ex_value & 0xffffffffffff0000ull; ilen = s->ex_value & 0xf; + op = insn >> 56; /* Register insn bytes with translator so plugins work. */ - for (int i = 0; i < ilen; i++) { - uint8_t byte = extract64(insn, 56 - (i * 8), 8); - translator_fake_ldb(byte, pc + i); - } - op = insn >> 56; + be_insn = cpu_to_be64(insn); + translator_fake_ld(&s->base, &be_insn, get_ilen(op)); } else { insn = ld_code2(env, s, pc); op = (insn >> 8) & 0xff; @@ -6325,9 +6257,9 @@ static DisasJumpType translate_one(CPUS390XState *env, DisasContext *s) } #ifndef CONFIG_USER_ONLY - if (s->base.tb->flags & FLAG_MASK_PER) { - TCGv_i64 addr = tcg_constant_i64(s->base.pc_next); - gen_helper_per_ifetch(tcg_env, addr); + if (s->base.tb->flags & FLAG_MASK_PER_IFETCH) { + /* With ifetch set, psw_addr and cc_op are always up-to-date. */ + gen_helper_per_ifetch(tcg_env, tcg_constant_i32(s->ilen)); } #endif @@ -6409,15 +6341,16 @@ static DisasJumpType translate_one(CPUS390XState *env, DisasContext *s) } if (insn->help_op) { ret = insn->help_op(s, &o); - } - if (ret != DISAS_NORETURN) { - if (insn->help_wout) { - insn->help_wout(s, &o); - } - if (insn->help_cout) { - insn->help_cout(s, &o); + if (ret == DISAS_NORETURN) { + goto out; } } + if (insn->help_wout) { + insn->help_wout(s, &o); + } + if (insn->help_cout) { + insn->help_cout(s, &o); + } /* io should be the last instruction in tb when icount is enabled */ if (unlikely(icount && ret == DISAS_NEXT)) { @@ -6425,13 +6358,18 @@ static DisasJumpType translate_one(CPUS390XState *env, DisasContext *s) } #ifndef CONFIG_USER_ONLY - if (s->base.tb->flags & FLAG_MASK_PER) { - /* An exception might be triggered, save PSW if not already done. */ - if (ret == DISAS_NEXT || ret == DISAS_TOO_MANY) { + if (s->base.tb->flags & FLAG_MASK_PER_IFETCH) { + switch (ret) { + case DISAS_TOO_MANY: + s->base.is_jmp = DISAS_PC_CC_UPDATED; + /* fall through */ + case DISAS_NEXT: tcg_gen_movi_i64(psw_addr, s->pc_tmp); + break; + default: + break; } - - /* Call the helper to check for a possible PER exception. */ + update_cc_op(s); gen_helper_per_check_exception(tcg_env); } #endif @@ -6454,7 +6392,7 @@ static void s390x_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) dc->cc_op = CC_OP_DYNAMIC; dc->ex_value = dc->base.tb->cs_base; - dc->exit_to_mainloop = (dc->base.tb->flags & FLAG_MASK_PER) || dc->ex_value; + dc->exit_to_mainloop = dc->ex_value; } static void s390x_tr_tb_start(DisasContextBase *db, CPUState *cs) @@ -6472,7 +6410,7 @@ static void s390x_tr_insn_start(DisasContextBase *dcbase, CPUState *cs) static target_ulong get_next_pc(CPUS390XState *env, DisasContext *s, uint64_t pc) { - uint64_t insn = cpu_lduw_code(env, pc); + uint64_t insn = translator_lduw(env, &s->base, pc); return pc + get_ilen((insn >> 8) & 0xff); } @@ -6520,18 +6458,18 @@ static void s390x_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs) } } -static void s390x_tr_disas_log(const DisasContextBase *dcbase, +static bool s390x_tr_disas_log(const DisasContextBase *dcbase, CPUState *cs, FILE *logfile) { DisasContext *dc = container_of(dcbase, DisasContext, base); if (unlikely(dc->ex_value)) { - /* ??? Unfortunately target_disas can't use host memory. */ - fprintf(logfile, "IN: EXECUTE %016" PRIx64, dc->ex_value); - } else { - fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first)); - target_disas(logfile, cs, dc->base.pc_first, dc->base.tb->size); + /* The ex_value has been recorded with translator_fake_ld. */ + fprintf(logfile, "IN: EXECUTE\n"); + target_disas(logfile, cs, &dc->base); + return true; } + return false; } static const TranslatorOps s390x_tr_ops = { diff --git a/target/sh4/Kconfig b/target/sh4/Kconfig index 2397c86028..93b92f1e48 100644 --- a/target/sh4/Kconfig +++ b/target/sh4/Kconfig @@ -1,2 +1,4 @@ config SH4 bool + # needed for sh_intc_get_pending_vector + select SH_INTC diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c index 43e35ec2ca..618aa7154e 100644 --- a/target/sh4/cpu.c +++ b/target/sh4/cpu.c @@ -47,7 +47,7 @@ static void superh_cpu_synchronize_from_tb(CPUState *cs, { SuperHCPU *cpu = SUPERH_CPU(cs); - tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL)); + tcg_debug_assert(!tcg_cflags_has(cs, CF_PCREL)); cpu->env.pc = tb->pc; cpu->env.flags = tb->flags & TB_FLAG_ENVFLAGS_MASK; } @@ -74,7 +74,7 @@ static bool superh_io_recompile_replay_branch(CPUState *cs, CPUSH4State *env = cpu_env(cs); if ((env->flags & (TB_FLAG_DELAY_SLOT | TB_FLAG_DELAY_SLOT_COND)) - && !(cs->tcg_cflags & CF_PCREL) && env->pc != tb->pc) { + && !tcg_cflags_has(cs, CF_PCREL) && env->pc != tb->pc) { env->pc -= 2; env->flags &= ~(TB_FLAG_DELAY_SLOT | TB_FLAG_DELAY_SLOT_COND); return true; diff --git a/target/sh4/helper.c b/target/sh4/helper.c index 7c6f9d374a..6702910627 100644 --- a/target/sh4/helper.c +++ b/target/sh4/helper.c @@ -21,6 +21,7 @@ #include "cpu.h" #include "exec/exec-all.h" +#include "exec/page-protection.h" #include "exec/log.h" #if !defined(CONFIG_USER_ONLY) diff --git a/target/sh4/translate.c b/target/sh4/translate.c index ebb6c901bf..53b092175d 100644 --- a/target/sh4/translate.c +++ b/target/sh4/translate.c @@ -19,7 +19,6 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "disas/disas.h" #include "exec/exec-all.h" #include "tcg/tcg-op.h" #include "exec/helper-proto.h" @@ -705,16 +704,20 @@ static void _decode_opc(DisasContext * ctx) return; case 0x300f: /* addv Rm,Rn */ { - TCGv t0, t1, t2; - t0 = tcg_temp_new(); - tcg_gen_add_i32(t0, REG(B7_4), REG(B11_8)); + TCGv Rn = REG(B11_8); + TCGv Rm = REG(B7_4); + TCGv result, t1, t2; + + result = tcg_temp_new(); t1 = tcg_temp_new(); - tcg_gen_xor_i32(t1, t0, REG(B11_8)); t2 = tcg_temp_new(); - tcg_gen_xor_i32(t2, REG(B7_4), REG(B11_8)); + tcg_gen_add_i32(result, Rm, Rn); + /* T = ((Rn ^ Rm) & (Result ^ Rn)) >> 31 */ + tcg_gen_xor_i32(t1, result, Rn); + tcg_gen_xor_i32(t2, Rm, Rn); tcg_gen_andc_i32(cpu_sr_t, t1, t2); tcg_gen_shri_i32(cpu_sr_t, cpu_sr_t, 31); - tcg_gen_mov_i32(REG(B7_4), t0); + tcg_gen_mov_i32(Rn, result); } return; case 0x2009: /* and Rm,Rn */ @@ -929,16 +932,20 @@ static void _decode_opc(DisasContext * ctx) return; case 0x300b: /* subv Rm,Rn */ { - TCGv t0, t1, t2; - t0 = tcg_temp_new(); - tcg_gen_sub_i32(t0, REG(B11_8), REG(B7_4)); + TCGv Rn = REG(B11_8); + TCGv Rm = REG(B7_4); + TCGv result, t1, t2; + + result = tcg_temp_new(); t1 = tcg_temp_new(); - tcg_gen_xor_i32(t1, t0, REG(B7_4)); t2 = tcg_temp_new(); - tcg_gen_xor_i32(t2, REG(B11_8), REG(B7_4)); + tcg_gen_sub_i32(result, Rn, Rm); + /* T = ((Rn ^ Rm) & (Result ^ Rn)) >> 31 */ + tcg_gen_xor_i32(t1, result, Rn); + tcg_gen_xor_i32(t2, Rn, Rm); tcg_gen_and_i32(t1, t1, t2); tcg_gen_shri_i32(cpu_sr_t, t1, 31); - tcg_gen_mov_i32(REG(B11_8), t0); + tcg_gen_mov_i32(Rn, result); } return; case 0x2008: /* tst Rm,Rn */ @@ -2181,6 +2188,7 @@ static void decode_gusa(DisasContext *ctx, CPUSH4State *env) */ for (i = 1; i < max_insns; ++i) { tcg_gen_insn_start(pc + i * 2, ctx->envflags); + ctx->base.insn_start = tcg_last_op(); } } #endif @@ -2301,20 +2309,12 @@ static void sh4_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs) } } -static void sh4_tr_disas_log(const DisasContextBase *dcbase, - CPUState *cs, FILE *logfile) -{ - fprintf(logfile, "IN: %s\n", lookup_symbol(dcbase->pc_first)); - target_disas(logfile, cs, dcbase->pc_first, dcbase->tb->size); -} - static const TranslatorOps sh4_tr_ops = { .init_disas_context = sh4_tr_init_disas_context, .tb_start = sh4_tr_tb_start, .insn_start = sh4_tr_insn_start, .translate_insn = sh4_tr_translate_insn, .tb_stop = sh4_tr_tb_stop, - .disas_log = sh4_tr_disas_log, }; void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns, diff --git a/target/sparc/cpu-param.h b/target/sparc/cpu-param.h index cb11980404..82293fb844 100644 --- a/target/sparc/cpu-param.h +++ b/target/sparc/cpu-param.h @@ -23,4 +23,27 @@ # define TARGET_VIRT_ADDR_SPACE_BITS 32 #endif +/* + * From Oracle SPARC Architecture 2015: + * + * Compatibility notes: The PSO memory model described in SPARC V8 and + * SPARC V9 compatibility architecture specifications was never implemented + * in a SPARC V9 implementation and is not included in the Oracle SPARC + * Architecture specification. + * + * The RMO memory model described in the SPARC V9 specification was + * implemented in some non-Sun SPARC V9 implementations, but is not + * directly supported in Oracle SPARC Architecture 2015 implementations. + * + * Therefore always use TSO in QEMU. + * + * D.5 Specification of Partial Store Order (PSO) + * ... [loads] are followed by an implied MEMBAR #LoadLoad | #LoadStore. + * + * D.6 Specification of Total Store Order (TSO) + * ... PSO with the additional requirement that all [stores] are followed + * by an implied MEMBAR #StoreStore. + */ +#define TCG_GUEST_DEFAULT_MO (TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST) + #endif diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c index 485d416925..5be1592e66 100644 --- a/target/sparc/cpu.c +++ b/target/sparc/cpu.c @@ -206,7 +206,7 @@ void cpu_sparc_set_id(CPUSPARCState *env, unsigned int cpu) static const sparc_def_t sparc_defs[] = { #ifdef TARGET_SPARC64 { - .name = "Fujitsu Sparc64", + .name = "Fujitsu-Sparc64", .iu_version = ((0x04ULL << 48) | (0x02ULL << 32) | (0ULL << 24)), .fpu_version = 0x00000000, .mmu_version = mmu_us_12, @@ -215,7 +215,7 @@ static const sparc_def_t sparc_defs[] = { .features = CPU_DEFAULT_FEATURES, }, { - .name = "Fujitsu Sparc64 III", + .name = "Fujitsu-Sparc64-III", .iu_version = ((0x04ULL << 48) | (0x03ULL << 32) | (0ULL << 24)), .fpu_version = 0x00000000, .mmu_version = mmu_us_12, @@ -224,7 +224,7 @@ static const sparc_def_t sparc_defs[] = { .features = CPU_DEFAULT_FEATURES, }, { - .name = "Fujitsu Sparc64 IV", + .name = "Fujitsu-Sparc64-IV", .iu_version = ((0x04ULL << 48) | (0x04ULL << 32) | (0ULL << 24)), .fpu_version = 0x00000000, .mmu_version = mmu_us_12, @@ -233,7 +233,7 @@ static const sparc_def_t sparc_defs[] = { .features = CPU_DEFAULT_FEATURES, }, { - .name = "Fujitsu Sparc64 V", + .name = "Fujitsu-Sparc64-V", .iu_version = ((0x04ULL << 48) | (0x05ULL << 32) | (0x51ULL << 24)), .fpu_version = 0x00000000, .mmu_version = mmu_us_12, @@ -242,7 +242,7 @@ static const sparc_def_t sparc_defs[] = { .features = CPU_DEFAULT_FEATURES, }, { - .name = "TI UltraSparc I", + .name = "TI-UltraSparc-I", .iu_version = ((0x17ULL << 48) | (0x10ULL << 32) | (0x40ULL << 24)), .fpu_version = 0x00000000, .mmu_version = mmu_us_12, @@ -251,7 +251,7 @@ static const sparc_def_t sparc_defs[] = { .features = CPU_DEFAULT_FEATURES, }, { - .name = "TI UltraSparc II", + .name = "TI-UltraSparc-II", .iu_version = ((0x17ULL << 48) | (0x11ULL << 32) | (0x20ULL << 24)), .fpu_version = 0x00000000, .mmu_version = mmu_us_12, @@ -260,7 +260,7 @@ static const sparc_def_t sparc_defs[] = { .features = CPU_DEFAULT_FEATURES, }, { - .name = "TI UltraSparc IIi", + .name = "TI-UltraSparc-IIi", .iu_version = ((0x17ULL << 48) | (0x12ULL << 32) | (0x91ULL << 24)), .fpu_version = 0x00000000, .mmu_version = mmu_us_12, @@ -269,7 +269,7 @@ static const sparc_def_t sparc_defs[] = { .features = CPU_DEFAULT_FEATURES, }, { - .name = "TI UltraSparc IIe", + .name = "TI-UltraSparc-IIe", .iu_version = ((0x17ULL << 48) | (0x13ULL << 32) | (0x14ULL << 24)), .fpu_version = 0x00000000, .mmu_version = mmu_us_12, @@ -278,7 +278,7 @@ static const sparc_def_t sparc_defs[] = { .features = CPU_DEFAULT_FEATURES, }, { - .name = "Sun UltraSparc III", + .name = "Sun-UltraSparc-III", .iu_version = ((0x3eULL << 48) | (0x14ULL << 32) | (0x34ULL << 24)), .fpu_version = 0x00000000, .mmu_version = mmu_us_12, @@ -287,7 +287,7 @@ static const sparc_def_t sparc_defs[] = { .features = CPU_DEFAULT_FEATURES, }, { - .name = "Sun UltraSparc III Cu", + .name = "Sun-UltraSparc-III-Cu", .iu_version = ((0x3eULL << 48) | (0x15ULL << 32) | (0x41ULL << 24)), .fpu_version = 0x00000000, .mmu_version = mmu_us_3, @@ -296,7 +296,7 @@ static const sparc_def_t sparc_defs[] = { .features = CPU_DEFAULT_FEATURES, }, { - .name = "Sun UltraSparc IIIi", + .name = "Sun-UltraSparc-IIIi", .iu_version = ((0x3eULL << 48) | (0x16ULL << 32) | (0x34ULL << 24)), .fpu_version = 0x00000000, .mmu_version = mmu_us_12, @@ -305,7 +305,7 @@ static const sparc_def_t sparc_defs[] = { .features = CPU_DEFAULT_FEATURES, }, { - .name = "Sun UltraSparc IV", + .name = "Sun-UltraSparc-IV", .iu_version = ((0x3eULL << 48) | (0x18ULL << 32) | (0x31ULL << 24)), .fpu_version = 0x00000000, .mmu_version = mmu_us_4, @@ -314,7 +314,7 @@ static const sparc_def_t sparc_defs[] = { .features = CPU_DEFAULT_FEATURES, }, { - .name = "Sun UltraSparc IV+", + .name = "Sun-UltraSparc-IV-plus", .iu_version = ((0x3eULL << 48) | (0x19ULL << 32) | (0x22ULL << 24)), .fpu_version = 0x00000000, .mmu_version = mmu_us_12, @@ -323,7 +323,7 @@ static const sparc_def_t sparc_defs[] = { .features = CPU_DEFAULT_FEATURES | CPU_FEATURE_CMT, }, { - .name = "Sun UltraSparc IIIi+", + .name = "Sun-UltraSparc-IIIi-plus", .iu_version = ((0x3eULL << 48) | (0x22ULL << 32) | (0ULL << 24)), .fpu_version = 0x00000000, .mmu_version = mmu_us_3, @@ -332,7 +332,7 @@ static const sparc_def_t sparc_defs[] = { .features = CPU_DEFAULT_FEATURES, }, { - .name = "Sun UltraSparc T1", + .name = "Sun-UltraSparc-T1", /* defined in sparc_ifu_fdp.v and ctu.h */ .iu_version = ((0x3eULL << 48) | (0x23ULL << 32) | (0x02ULL << 24)), .fpu_version = 0x00000000, @@ -343,7 +343,7 @@ static const sparc_def_t sparc_defs[] = { | CPU_FEATURE_GL, }, { - .name = "Sun UltraSparc T2", + .name = "Sun-UltraSparc-T2", /* defined in tlu_asi_ctl.v and n2_revid_cust.v */ .iu_version = ((0x3eULL << 48) | (0x24ULL << 32) | (0x02ULL << 24)), .fpu_version = 0x00000000, @@ -354,7 +354,7 @@ static const sparc_def_t sparc_defs[] = { | CPU_FEATURE_GL, }, { - .name = "NEC UltraSparc I", + .name = "NEC-UltraSparc-I", .iu_version = ((0x22ULL << 48) | (0x10ULL << 32) | (0x40ULL << 24)), .fpu_version = 0x00000000, .mmu_version = mmu_us_12, @@ -364,7 +364,7 @@ static const sparc_def_t sparc_defs[] = { }, #else { - .name = "Fujitsu MB86904", + .name = "Fujitsu-MB86904", .iu_version = 0x04 << 24, /* Impl 0, ver 4 */ .fpu_version = 4 << FSR_VER_SHIFT, /* FPU version 4 (Meiko) */ .mmu_version = 0x04 << 24, /* Impl 0, ver 4 */ @@ -377,7 +377,7 @@ static const sparc_def_t sparc_defs[] = { .features = CPU_DEFAULT_FEATURES, }, { - .name = "Fujitsu MB86907", + .name = "Fujitsu-MB86907", .iu_version = 0x05 << 24, /* Impl 0, ver 5 */ .fpu_version = 4 << FSR_VER_SHIFT, /* FPU version 4 (Meiko) */ .mmu_version = 0x05 << 24, /* Impl 0, ver 5 */ @@ -390,7 +390,7 @@ static const sparc_def_t sparc_defs[] = { .features = CPU_DEFAULT_FEATURES, }, { - .name = "TI MicroSparc I", + .name = "TI-MicroSparc-I", .iu_version = 0x41000000, .fpu_version = 4 << FSR_VER_SHIFT, .mmu_version = 0x41000000, @@ -403,7 +403,7 @@ static const sparc_def_t sparc_defs[] = { .features = CPU_FEATURE_MUL | CPU_FEATURE_DIV, }, { - .name = "TI MicroSparc II", + .name = "TI-MicroSparc-II", .iu_version = 0x42000000, .fpu_version = 4 << FSR_VER_SHIFT, .mmu_version = 0x02000000, @@ -416,7 +416,7 @@ static const sparc_def_t sparc_defs[] = { .features = CPU_DEFAULT_FEATURES, }, { - .name = "TI MicroSparc IIep", + .name = "TI-MicroSparc-IIep", .iu_version = 0x42000000, .fpu_version = 4 << FSR_VER_SHIFT, .mmu_version = 0x04000000, @@ -429,7 +429,7 @@ static const sparc_def_t sparc_defs[] = { .features = CPU_DEFAULT_FEATURES, }, { - .name = "TI SuperSparc 40", /* STP1020NPGA */ + .name = "TI-SuperSparc-40", /* STP1020NPGA */ .iu_version = 0x41000000, /* SuperSPARC 2.x */ .fpu_version = 0 << FSR_VER_SHIFT, .mmu_version = 0x00000800, /* SuperSPARC 2.x, no MXCC */ @@ -442,7 +442,7 @@ static const sparc_def_t sparc_defs[] = { .features = CPU_DEFAULT_FEATURES, }, { - .name = "TI SuperSparc 50", /* STP1020PGA */ + .name = "TI-SuperSparc-50", /* STP1020PGA */ .iu_version = 0x40000000, /* SuperSPARC 3.x */ .fpu_version = 0 << FSR_VER_SHIFT, .mmu_version = 0x01000800, /* SuperSPARC 3.x, no MXCC */ @@ -455,7 +455,7 @@ static const sparc_def_t sparc_defs[] = { .features = CPU_DEFAULT_FEATURES, }, { - .name = "TI SuperSparc 51", + .name = "TI-SuperSparc-51", .iu_version = 0x40000000, /* SuperSPARC 3.x */ .fpu_version = 0 << FSR_VER_SHIFT, .mmu_version = 0x01000000, /* SuperSPARC 3.x, MXCC */ @@ -469,7 +469,7 @@ static const sparc_def_t sparc_defs[] = { .features = CPU_DEFAULT_FEATURES, }, { - .name = "TI SuperSparc 60", /* STP1020APGA */ + .name = "TI-SuperSparc-60", /* STP1020APGA */ .iu_version = 0x40000000, /* SuperSPARC 3.x */ .fpu_version = 0 << FSR_VER_SHIFT, .mmu_version = 0x01000800, /* SuperSPARC 3.x, no MXCC */ @@ -482,7 +482,7 @@ static const sparc_def_t sparc_defs[] = { .features = CPU_DEFAULT_FEATURES, }, { - .name = "TI SuperSparc 61", + .name = "TI-SuperSparc-61", .iu_version = 0x44000000, /* SuperSPARC 3.x */ .fpu_version = 0 << FSR_VER_SHIFT, .mmu_version = 0x01000000, /* SuperSPARC 3.x, MXCC */ @@ -496,7 +496,7 @@ static const sparc_def_t sparc_defs[] = { .features = CPU_DEFAULT_FEATURES, }, { - .name = "TI SuperSparc II", + .name = "TI-SuperSparc-II", .iu_version = 0x40000000, /* SuperSPARC II 1.x */ .fpu_version = 0 << FSR_VER_SHIFT, .mmu_version = 0x08000000, /* SuperSPARC II 1.x, MXCC */ @@ -702,7 +702,7 @@ static void sparc_cpu_synchronize_from_tb(CPUState *cs, { SPARCCPU *cpu = SPARC_CPU(cs); - tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL)); + tcg_debug_assert(!tcg_cflags_has(cs, CF_PCREL)); cpu->env.pc = tb->pc; cpu->env.npc = tb->cs_base; } @@ -762,6 +762,16 @@ static ObjectClass *sparc_cpu_class_by_name(const char *cpu_model) char *typename; typename = sparc_cpu_type_name(cpu_model); + + /* Fix up legacy names with '+' in it */ + if (g_str_equal(typename, SPARC_CPU_TYPE_NAME("Sun-UltraSparc-IV+"))) { + g_free(typename); + typename = g_strdup(SPARC_CPU_TYPE_NAME("Sun-UltraSparc-IV-plus")); + } else if (g_str_equal(typename, SPARC_CPU_TYPE_NAME("Sun-UltraSparc-IIIi+"))) { + g_free(typename); + typename = g_strdup(SPARC_CPU_TYPE_NAME("Sun-UltraSparc-IIIi-plus")); + } + oc = object_class_by_name(typename); g_free(typename); return oc; diff --git a/target/sparc/cpu.h b/target/sparc/cpu.h index f3cdd17c62..dfd9512a21 100644 --- a/target/sparc/cpu.h +++ b/target/sparc/cpu.h @@ -6,29 +6,6 @@ #include "exec/cpu-defs.h" #include "qemu/cpu-float.h" -/* - * From Oracle SPARC Architecture 2015: - * - * Compatibility notes: The PSO memory model described in SPARC V8 and - * SPARC V9 compatibility architecture specifications was never implemented - * in a SPARC V9 implementation and is not included in the Oracle SPARC - * Architecture specification. - * - * The RMO memory model described in the SPARC V9 specification was - * implemented in some non-Sun SPARC V9 implementations, but is not - * directly supported in Oracle SPARC Architecture 2015 implementations. - * - * Therefore always use TSO in QEMU. - * - * D.5 Specification of Partial Store Order (PSO) - * ... [loads] are followed by an implied MEMBAR #LoadLoad | #LoadStore. - * - * D.6 Specification of Total Store Order (TSO) - * ... PSO with the additional requirement that all [stores] are followed - * by an implied MEMBAR #StoreStore. - */ -#define TCG_GUEST_DEFAULT_MO (TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST) - #if !defined(TARGET_SPARC64) #define TARGET_DPREGS 16 #define TARGET_FCCREGS 1 diff --git a/target/sparc/gdbstub.c b/target/sparc/gdbstub.c index 07ea81ab5f..ec0036e9ef 100644 --- a/target/sparc/gdbstub.c +++ b/target/sparc/gdbstub.c @@ -108,7 +108,7 @@ int sparc_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) SPARCCPU *cpu = SPARC_CPU(cs); CPUSPARCState *env = &cpu->env; #if defined(TARGET_ABI32) - abi_ulong tmp; + uint32_t tmp; tmp = ldl_p(mem_buf); #else diff --git a/target/sparc/helper.h b/target/sparc/helper.h index b8087d0d2b..97fbf6f66c 100644 --- a/target/sparc/helper.h +++ b/target/sparc/helper.h @@ -94,15 +94,12 @@ DEF_HELPER_FLAGS_2(fstox, TCG_CALL_NO_WG, s64, env, f32) DEF_HELPER_FLAGS_2(fdtox, TCG_CALL_NO_WG, s64, env, f64) DEF_HELPER_FLAGS_2(fqtox, TCG_CALL_NO_WG, s64, env, i128) -DEF_HELPER_FLAGS_2(fpmerge, TCG_CALL_NO_RWG_SE, i64, i64, i64) -DEF_HELPER_FLAGS_2(fmul8x16, TCG_CALL_NO_RWG_SE, i64, i64, i64) -DEF_HELPER_FLAGS_2(fmul8x16al, TCG_CALL_NO_RWG_SE, i64, i64, i64) -DEF_HELPER_FLAGS_2(fmul8x16au, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_2(fpmerge, TCG_CALL_NO_RWG_SE, i64, i32, i32) +DEF_HELPER_FLAGS_2(fmul8x16, TCG_CALL_NO_RWG_SE, i64, i32, i64) +DEF_HELPER_FLAGS_2(fmul8x16a, TCG_CALL_NO_RWG_SE, i64, i32, s32) DEF_HELPER_FLAGS_2(fmul8sux16, TCG_CALL_NO_RWG_SE, i64, i64, i64) DEF_HELPER_FLAGS_2(fmul8ulx16, TCG_CALL_NO_RWG_SE, i64, i64, i64) -DEF_HELPER_FLAGS_2(fmuld8sux16, TCG_CALL_NO_RWG_SE, i64, i64, i64) -DEF_HELPER_FLAGS_2(fmuld8ulx16, TCG_CALL_NO_RWG_SE, i64, i64, i64) -DEF_HELPER_FLAGS_2(fexpand, TCG_CALL_NO_RWG_SE, i64, i64, i64) +DEF_HELPER_FLAGS_1(fexpand, TCG_CALL_NO_RWG_SE, i64, i32) DEF_HELPER_FLAGS_3(pdist, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64) DEF_HELPER_FLAGS_2(fpack16, TCG_CALL_NO_RWG_SE, i32, i64, i64) DEF_HELPER_FLAGS_3(fpack32, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64) diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode index 2d26404cb2..e2d8a07dc4 100644 --- a/target/sparc/insns.decode +++ b/target/sparc/insns.decode @@ -352,7 +352,7 @@ FCMPEq 10 000 cc:2 110101 rs1:5 0 0101 0111 rs2:5 FALIGNDATAg 10 ..... 110110 ..... 0 0100 1000 ..... @r_r_r FPMERGE 10 ..... 110110 ..... 0 0100 1011 ..... @r_r_r BSHUFFLE 10 ..... 110110 ..... 0 0100 1100 ..... @r_r_r - FEXPAND 10 ..... 110110 ..... 0 0100 1101 ..... @r_r_r + FEXPAND 10 ..... 110110 00000 0 0100 1101 ..... @r_r2 FSRCd 10 ..... 110110 ..... 0 0111 0100 00000 @r_r1 # FSRC1d FSRCs 10 ..... 110110 ..... 0 0111 0101 00000 @r_r1 # FSRC1s diff --git a/target/sparc/ldst_helper.c b/target/sparc/ldst_helper.c index 2846a86cc4..7bdf99e0c0 100644 --- a/target/sparc/ldst_helper.c +++ b/target/sparc/ldst_helper.c @@ -23,6 +23,7 @@ #include "tcg/tcg.h" #include "exec/helper-proto.h" #include "exec/exec-all.h" +#include "exec/page-protection.h" #include "exec/cpu_ldst.h" #include "asi.h" diff --git a/target/sparc/mmu_helper.c b/target/sparc/mmu_helper.c index ad1591d9fd..9ff06026b8 100644 --- a/target/sparc/mmu_helper.c +++ b/target/sparc/mmu_helper.c @@ -21,6 +21,7 @@ #include "qemu/log.h" #include "cpu.h" #include "exec/exec-all.h" +#include "exec/page-protection.h" #include "qemu/qemu-print.h" #include "trace.h" diff --git a/target/sparc/translate.c b/target/sparc/translate.c index 571b3e3f03..dca072888a 100644 --- a/target/sparc/translate.c +++ b/target/sparc/translate.c @@ -21,7 +21,6 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "disas/disas.h" #include "exec/helper-proto.h" #include "exec/exec-all.h" #include "tcg/tcg-op.h" @@ -45,6 +44,7 @@ # define gen_helper_clear_softint(E, S) qemu_build_not_reached() # define gen_helper_done(E) qemu_build_not_reached() # define gen_helper_flushw(E) qemu_build_not_reached() +# define gen_helper_fmul8x16a(D, S1, S2) qemu_build_not_reached() # define gen_helper_rdccr(D, E) qemu_build_not_reached() # define gen_helper_rdcwp(D, E) qemu_build_not_reached() # define gen_helper_restored(E) qemu_build_not_reached() @@ -72,11 +72,7 @@ # define gen_helper_fexpand ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fmul8sux16 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fmul8ulx16 ({ qemu_build_not_reached(); NULL; }) -# define gen_helper_fmul8x16al ({ qemu_build_not_reached(); NULL; }) -# define gen_helper_fmul8x16au ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fmul8x16 ({ qemu_build_not_reached(); NULL; }) -# define gen_helper_fmuld8sux16 ({ qemu_build_not_reached(); NULL; }) -# define gen_helper_fmuld8ulx16 ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fpmerge ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fqtox ({ qemu_build_not_reached(); NULL; }) # define gen_helper_fstox ({ qemu_build_not_reached(); NULL; }) @@ -719,6 +715,60 @@ static void gen_op_bshuffle(TCGv_i64 dst, TCGv_i64 src1, TCGv_i64 src2) #endif } +static void gen_op_fmul8x16al(TCGv_i64 dst, TCGv_i32 src1, TCGv_i32 src2) +{ + tcg_gen_ext16s_i32(src2, src2); + gen_helper_fmul8x16a(dst, src1, src2); +} + +static void gen_op_fmul8x16au(TCGv_i64 dst, TCGv_i32 src1, TCGv_i32 src2) +{ + tcg_gen_sari_i32(src2, src2, 16); + gen_helper_fmul8x16a(dst, src1, src2); +} + +static void gen_op_fmuld8ulx16(TCGv_i64 dst, TCGv_i32 src1, TCGv_i32 src2) +{ + TCGv_i32 t0 = tcg_temp_new_i32(); + TCGv_i32 t1 = tcg_temp_new_i32(); + TCGv_i32 t2 = tcg_temp_new_i32(); + + tcg_gen_ext8u_i32(t0, src1); + tcg_gen_ext16s_i32(t1, src2); + tcg_gen_mul_i32(t0, t0, t1); + + tcg_gen_extract_i32(t1, src1, 16, 8); + tcg_gen_sextract_i32(t2, src2, 16, 16); + tcg_gen_mul_i32(t1, t1, t2); + + tcg_gen_concat_i32_i64(dst, t0, t1); +} + +static void gen_op_fmuld8sux16(TCGv_i64 dst, TCGv_i32 src1, TCGv_i32 src2) +{ + TCGv_i32 t0 = tcg_temp_new_i32(); + TCGv_i32 t1 = tcg_temp_new_i32(); + TCGv_i32 t2 = tcg_temp_new_i32(); + + /* + * The insn description talks about extracting the upper 8 bits + * of the signed 16-bit input rs1, performing the multiply, then + * shifting left by 8 bits. Instead, zap the lower 8 bits of + * the rs1 input, which avoids the need for two shifts. + */ + tcg_gen_ext16s_i32(t0, src1); + tcg_gen_andi_i32(t0, t0, ~0xff); + tcg_gen_ext16s_i32(t1, src2); + tcg_gen_mul_i32(t0, t0, t1); + + tcg_gen_sextract_i32(t1, src1, 16, 16); + tcg_gen_andi_i32(t1, t1, ~0xff); + tcg_gen_sextract_i32(t2, src2, 16, 16); + tcg_gen_mul_i32(t1, t1, t2); + + tcg_gen_concat_i32_i64(dst, t0, t1); +} + static void finishing_insn(DisasContext *dc) { /* @@ -4358,6 +4408,25 @@ TRANS(FSQRTd, ALL, do_env_dd, a, gen_helper_fsqrtd) TRANS(FxTOd, 64, do_env_dd, a, gen_helper_fxtod) TRANS(FdTOx, 64, do_env_dd, a, gen_helper_fdtox) +static bool do_df(DisasContext *dc, arg_r_r *a, + void (*func)(TCGv_i64, TCGv_i32)) +{ + TCGv_i64 dst; + TCGv_i32 src; + + if (gen_trap_ifnofpu(dc)) { + return true; + } + + dst = tcg_temp_new_i64(); + src = gen_load_fpr_F(dc, a->rs); + func(dst, src); + gen_store_fpr_D(dc, a->rd, dst); + return advance_pc(dc); +} + +TRANS(FEXPAND, VIS1, do_df, a, gen_helper_fexpand) + static bool do_env_df(DisasContext *dc, arg_r_r *a, void (*func)(TCGv_i64, TCGv_env, TCGv_i32)) { @@ -4564,6 +4633,50 @@ TRANS(FSUBs, ALL, do_env_fff, a, gen_helper_fsubs) TRANS(FMULs, ALL, do_env_fff, a, gen_helper_fmuls) TRANS(FDIVs, ALL, do_env_fff, a, gen_helper_fdivs) +static bool do_dff(DisasContext *dc, arg_r_r_r *a, + void (*func)(TCGv_i64, TCGv_i32, TCGv_i32)) +{ + TCGv_i64 dst; + TCGv_i32 src1, src2; + + if (gen_trap_ifnofpu(dc)) { + return true; + } + + dst = gen_dest_fpr_D(dc, a->rd); + src1 = gen_load_fpr_F(dc, a->rs1); + src2 = gen_load_fpr_F(dc, a->rs2); + func(dst, src1, src2); + gen_store_fpr_D(dc, a->rd, dst); + return advance_pc(dc); +} + +TRANS(FMUL8x16AU, VIS1, do_dff, a, gen_op_fmul8x16au) +TRANS(FMUL8x16AL, VIS1, do_dff, a, gen_op_fmul8x16al) +TRANS(FMULD8SUx16, VIS1, do_dff, a, gen_op_fmuld8sux16) +TRANS(FMULD8ULx16, VIS1, do_dff, a, gen_op_fmuld8ulx16) +TRANS(FPMERGE, VIS1, do_dff, a, gen_helper_fpmerge) + +static bool do_dfd(DisasContext *dc, arg_r_r_r *a, + void (*func)(TCGv_i64, TCGv_i32, TCGv_i64)) +{ + TCGv_i64 dst, src2; + TCGv_i32 src1; + + if (gen_trap_ifnofpu(dc)) { + return true; + } + + dst = gen_dest_fpr_D(dc, a->rd); + src1 = gen_load_fpr_F(dc, a->rs1); + src2 = gen_load_fpr_D(dc, a->rs2); + func(dst, src1, src2); + gen_store_fpr_D(dc, a->rd, dst); + return advance_pc(dc); +} + +TRANS(FMUL8x16, VIS1, do_dfd, a, gen_helper_fmul8x16) + static bool do_ddd(DisasContext *dc, arg_r_r_r *a, void (*func)(TCGv_i64, TCGv_i64, TCGv_i64)) { @@ -4581,15 +4694,8 @@ static bool do_ddd(DisasContext *dc, arg_r_r_r *a, return advance_pc(dc); } -TRANS(FMUL8x16, VIS1, do_ddd, a, gen_helper_fmul8x16) -TRANS(FMUL8x16AU, VIS1, do_ddd, a, gen_helper_fmul8x16au) -TRANS(FMUL8x16AL, VIS1, do_ddd, a, gen_helper_fmul8x16al) TRANS(FMUL8SUx16, VIS1, do_ddd, a, gen_helper_fmul8sux16) TRANS(FMUL8ULx16, VIS1, do_ddd, a, gen_helper_fmul8ulx16) -TRANS(FMULD8SUx16, VIS1, do_ddd, a, gen_helper_fmuld8sux16) -TRANS(FMULD8ULx16, VIS1, do_ddd, a, gen_helper_fmuld8ulx16) -TRANS(FPMERGE, VIS1, do_ddd, a, gen_helper_fpmerge) -TRANS(FEXPAND, VIS1, do_ddd, a, gen_helper_fexpand) TRANS(FPADD16, VIS1, do_ddd, a, tcg_gen_vec_add16_i64) TRANS(FPADD32, VIS1, do_ddd, a, tcg_gen_vec_add32_i64) @@ -5042,20 +5148,12 @@ static void sparc_tr_tb_stop(DisasContextBase *dcbase, CPUState *cs) } } -static void sparc_tr_disas_log(const DisasContextBase *dcbase, - CPUState *cpu, FILE *logfile) -{ - fprintf(logfile, "IN: %s\n", lookup_symbol(dcbase->pc_first)); - target_disas(logfile, cpu, dcbase->pc_first, dcbase->tb->size); -} - static const TranslatorOps sparc_tr_ops = { .init_disas_context = sparc_tr_init_disas_context, .tb_start = sparc_tr_tb_start, .insn_start = sparc_tr_insn_start, .translate_insn = sparc_tr_translate_insn, .tb_stop = sparc_tr_tb_stop, - .disas_log = sparc_tr_disas_log, }; void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int *max_insns, diff --git a/target/sparc/vis_helper.c b/target/sparc/vis_helper.c index 7763b16c24..e15c6bb34e 100644 --- a/target/sparc/vis_helper.c +++ b/target/sparc/vis_helper.c @@ -44,6 +44,7 @@ target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize) #if HOST_BIG_ENDIAN #define VIS_B64(n) b[7 - (n)] +#define VIS_SB64(n) sb[7 - (n)] #define VIS_W64(n) w[3 - (n)] #define VIS_SW64(n) sw[3 - (n)] #define VIS_L64(n) l[1 - (n)] @@ -51,6 +52,7 @@ target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize) #define VIS_W32(n) w[1 - (n)] #else #define VIS_B64(n) b[n] +#define VIS_SB64(n) sb[n] #define VIS_W64(n) w[n] #define VIS_SW64(n) sw[n] #define VIS_L64(n) l[n] @@ -60,6 +62,7 @@ target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize) typedef union { uint8_t b[8]; + int8_t sb[8]; uint16_t w[4]; int16_t sw[4]; uint32_t l[2]; @@ -74,94 +77,60 @@ typedef union { float32 f; } VIS32; -uint64_t helper_fpmerge(uint64_t src1, uint64_t src2) +uint64_t helper_fpmerge(uint32_t src1, uint32_t src2) { - VIS64 s, d; + VIS32 s1, s2; + VIS64 d; - s.ll = src1; - d.ll = src2; + s1.l = src1; + s2.l = src2; + d.ll = 0; - /* Reverse calculation order to handle overlap */ - d.VIS_B64(7) = s.VIS_B64(3); - d.VIS_B64(6) = d.VIS_B64(3); - d.VIS_B64(5) = s.VIS_B64(2); - d.VIS_B64(4) = d.VIS_B64(2); - d.VIS_B64(3) = s.VIS_B64(1); - d.VIS_B64(2) = d.VIS_B64(1); - d.VIS_B64(1) = s.VIS_B64(0); - /* d.VIS_B64(0) = d.VIS_B64(0); */ + d.VIS_B64(7) = s1.VIS_B32(3); + d.VIS_B64(6) = s2.VIS_B32(3); + d.VIS_B64(5) = s1.VIS_B32(2); + d.VIS_B64(4) = s2.VIS_B32(2); + d.VIS_B64(3) = s1.VIS_B32(1); + d.VIS_B64(2) = s2.VIS_B32(1); + d.VIS_B64(1) = s1.VIS_B32(0); + d.VIS_B64(0) = s2.VIS_B32(0); return d.ll; } -uint64_t helper_fmul8x16(uint64_t src1, uint64_t src2) +static inline int do_ms16b(int x, int y) { - VIS64 s, d; - uint32_t tmp; - - s.ll = src1; - d.ll = src2; - -#define PMUL(r) \ - tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B64(r); \ - if ((tmp & 0xff) > 0x7f) { \ - tmp += 0x100; \ - } \ - d.VIS_W64(r) = tmp >> 8; - - PMUL(0); - PMUL(1); - PMUL(2); - PMUL(3); -#undef PMUL - - return d.ll; + return ((x * y) + 0x80) >> 8; } -uint64_t helper_fmul8x16al(uint64_t src1, uint64_t src2) +uint64_t helper_fmul8x16(uint32_t src1, uint64_t src2) { - VIS64 s, d; - uint32_t tmp; + VIS64 d; + VIS32 s; - s.ll = src1; + s.l = src1; d.ll = src2; -#define PMUL(r) \ - tmp = (int32_t)d.VIS_SW64(1) * (int32_t)s.VIS_B64(r); \ - if ((tmp & 0xff) > 0x7f) { \ - tmp += 0x100; \ - } \ - d.VIS_W64(r) = tmp >> 8; - - PMUL(0); - PMUL(1); - PMUL(2); - PMUL(3); -#undef PMUL + d.VIS_W64(0) = do_ms16b(s.VIS_B32(0), d.VIS_SW64(0)); + d.VIS_W64(1) = do_ms16b(s.VIS_B32(1), d.VIS_SW64(1)); + d.VIS_W64(2) = do_ms16b(s.VIS_B32(2), d.VIS_SW64(2)); + d.VIS_W64(3) = do_ms16b(s.VIS_B32(3), d.VIS_SW64(3)); return d.ll; } -uint64_t helper_fmul8x16au(uint64_t src1, uint64_t src2) +uint64_t helper_fmul8x16a(uint32_t src1, int32_t src2) { - VIS64 s, d; - uint32_t tmp; - - s.ll = src1; - d.ll = src2; + VIS32 s; + VIS64 d; -#define PMUL(r) \ - tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r); \ - if ((tmp & 0xff) > 0x7f) { \ - tmp += 0x100; \ - } \ - d.VIS_W64(r) = tmp >> 8; + s.l = src1; + d.ll = 0; - PMUL(0); - PMUL(1); - PMUL(2); - PMUL(3); -#undef PMUL + d.VIS_W64(0) = do_ms16b(s.VIS_B32(0), src2); + d.VIS_W64(1) = do_ms16b(s.VIS_B32(1), src2); + d.VIS_W64(2) = do_ms16b(s.VIS_B32(2), src2); + d.VIS_W64(3) = do_ms16b(s.VIS_B32(3), src2); return d.ll; } @@ -169,23 +138,14 @@ uint64_t helper_fmul8x16au(uint64_t src1, uint64_t src2) uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2) { VIS64 s, d; - uint32_t tmp; s.ll = src1; d.ll = src2; -#define PMUL(r) \ - tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \ - if ((tmp & 0xff) > 0x7f) { \ - tmp += 0x100; \ - } \ - d.VIS_W64(r) = tmp >> 8; - - PMUL(0); - PMUL(1); - PMUL(2); - PMUL(3); -#undef PMUL + d.VIS_W64(0) = do_ms16b(s.VIS_SB64(1), d.VIS_SW64(0)); + d.VIS_W64(1) = do_ms16b(s.VIS_SB64(3), d.VIS_SW64(1)); + d.VIS_W64(2) = do_ms16b(s.VIS_SB64(5), d.VIS_SW64(2)); + d.VIS_W64(3) = do_ms16b(s.VIS_SB64(7), d.VIS_SW64(3)); return d.ll; } @@ -193,80 +153,25 @@ uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2) uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2) { VIS64 s, d; - uint32_t tmp; s.ll = src1; d.ll = src2; -#define PMUL(r) \ - tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \ - if ((tmp & 0xff) > 0x7f) { \ - tmp += 0x100; \ - } \ - d.VIS_W64(r) = tmp >> 8; - - PMUL(0); - PMUL(1); - PMUL(2); - PMUL(3); -#undef PMUL - - return d.ll; -} - -uint64_t helper_fmuld8sux16(uint64_t src1, uint64_t src2) -{ - VIS64 s, d; - uint32_t tmp; - - s.ll = src1; - d.ll = src2; - -#define PMUL(r) \ - tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8); \ - if ((tmp & 0xff) > 0x7f) { \ - tmp += 0x100; \ - } \ - d.VIS_L64(r) = tmp; - - /* Reverse calculation order to handle overlap */ - PMUL(1); - PMUL(0); -#undef PMUL + d.VIS_W64(0) = do_ms16b(s.VIS_B64(0), d.VIS_SW64(0)); + d.VIS_W64(1) = do_ms16b(s.VIS_B64(2), d.VIS_SW64(1)); + d.VIS_W64(2) = do_ms16b(s.VIS_B64(4), d.VIS_SW64(2)); + d.VIS_W64(3) = do_ms16b(s.VIS_B64(6), d.VIS_SW64(3)); return d.ll; } -uint64_t helper_fmuld8ulx16(uint64_t src1, uint64_t src2) -{ - VIS64 s, d; - uint32_t tmp; - - s.ll = src1; - d.ll = src2; - -#define PMUL(r) \ - tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2)); \ - if ((tmp & 0xff) > 0x7f) { \ - tmp += 0x100; \ - } \ - d.VIS_L64(r) = tmp; - - /* Reverse calculation order to handle overlap */ - PMUL(1); - PMUL(0); -#undef PMUL - - return d.ll; -} - -uint64_t helper_fexpand(uint64_t src1, uint64_t src2) +uint64_t helper_fexpand(uint32_t src2) { VIS32 s; VIS64 d; - s.l = (uint32_t)src1; - d.ll = src2; + s.l = src2; + d.ll = 0; d.VIS_W64(0) = s.VIS_B32(0) << 4; d.VIS_W64(1) = s.VIS_B32(1) << 4; d.VIS_W64(2) = s.VIS_B32(2) << 4; diff --git a/target/target-common.c b/target/target-common.c deleted file mode 100644 index 903b10cfe4..0000000000 --- a/target/target-common.c +++ /dev/null @@ -1,10 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#include "qemu/osdep.h" - -#include "cpu.h" -#include "exec/target_page.h" - -int qemu_target_page_mask(void) -{ - return TARGET_PAGE_MASK; -} diff --git a/target/tricore/cpu.c b/target/tricore/cpu.c index 8f9b72c3a0..bdefb84511 100644 --- a/target/tricore/cpu.c +++ b/target/tricore/cpu.c @@ -47,7 +47,7 @@ static vaddr tricore_cpu_get_pc(CPUState *cs) static void tricore_cpu_synchronize_from_tb(CPUState *cs, const TranslationBlock *tb) { - tcg_debug_assert(!(cs->tcg_cflags & CF_PCREL)); + tcg_debug_assert(!tcg_cflags_has(cs, CF_PCREL)); cpu_env(cs)->PC = tb->pc; } diff --git a/target/tricore/gdbstub.c b/target/tricore/gdbstub.c index f9309c5e27..29a70051ff 100644 --- a/target/tricore/gdbstub.c +++ b/target/tricore/gdbstub.c @@ -19,6 +19,7 @@ #include "qemu/osdep.h" #include "gdbstub/helpers.h" +#include "cpu.h" #define LCX_REGNUM 32 diff --git a/target/tricore/helper.c b/target/tricore/helper.c index 76bd226370..7014255f77 100644 --- a/target/tricore/helper.c +++ b/target/tricore/helper.c @@ -20,6 +20,7 @@ #include "hw/registerfields.h" #include "cpu.h" #include "exec/exec-all.h" +#include "exec/page-protection.h" #include "fpu/softfloat-helpers.h" #include "qemu/qemu-print.h" diff --git a/target/tricore/translate.c b/target/tricore/translate.c index c45e1d992e..a46a03e1fd 100644 --- a/target/tricore/translate.c +++ b/target/tricore/translate.c @@ -20,7 +20,6 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "disas/disas.h" #include "exec/exec-all.h" #include "tcg/tcg-op.h" #include "exec/cpu_ldst.h" @@ -8453,20 +8452,12 @@ static void tricore_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) } } -static void tricore_tr_disas_log(const DisasContextBase *dcbase, - CPUState *cpu, FILE *logfile) -{ - fprintf(logfile, "IN: %s\n", lookup_symbol(dcbase->pc_first)); - target_disas(logfile, cpu, dcbase->pc_first, dcbase->tb->size); -} - static const TranslatorOps tricore_tr_ops = { .init_disas_context = tricore_tr_init_disas_context, .tb_start = tricore_tr_tb_start, .insn_start = tricore_tr_insn_start, .translate_insn = tricore_tr_translate_insn, .tb_stop = tricore_tr_tb_stop, - .disas_log = tricore_tr_disas_log, }; diff --git a/target/xtensa/cpu-param.h b/target/xtensa/cpu-param.h index b1da0555de..0000725f2f 100644 --- a/target/xtensa/cpu-param.h +++ b/target/xtensa/cpu-param.h @@ -17,4 +17,7 @@ #define TARGET_VIRT_ADDR_SPACE_BITS 32 #endif +/* Xtensa processors have a weak memory model */ +#define TCG_GUEST_DEFAULT_MO (0) + #endif diff --git a/target/xtensa/cpu.h b/target/xtensa/cpu.h index 6b8d0636d2..9f2341d856 100644 --- a/target/xtensa/cpu.h +++ b/target/xtensa/cpu.h @@ -34,9 +34,6 @@ #include "hw/clock.h" #include "xtensa-isa.h" -/* Xtensa processors have a weak memory model */ -#define TCG_GUEST_DEFAULT_MO (0) - enum { /* Additional instructions */ XTENSA_OPTION_CODE_DENSITY, diff --git a/target/xtensa/mmu_helper.c b/target/xtensa/mmu_helper.c index 47063b0a57..997b21d389 100644 --- a/target/xtensa/mmu_helper.c +++ b/target/xtensa/mmu_helper.c @@ -33,6 +33,7 @@ #include "exec/helper-proto.h" #include "qemu/host-utils.h" #include "exec/exec-all.h" +#include "exec/page-protection.h" #define XTENSA_MPU_SEGMENT_MASK 0x0000001f #define XTENSA_MPU_ACC_RIGHTS_MASK 0x00000f00 diff --git a/target/xtensa/op_helper.c b/target/xtensa/op_helper.c index 496754ba57..028d4e0a1c 100644 --- a/target/xtensa/op_helper.c +++ b/target/xtensa/op_helper.c @@ -28,6 +28,7 @@ #include "qemu/osdep.h" #include "cpu.h" #include "exec/helper-proto.h" +#include "exec/page-protection.h" #include "qemu/host-utils.h" #include "exec/exec-all.h" #include "qemu/atomic.h" diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c index b206d57fc4..75b7bfda4c 100644 --- a/target/xtensa/translate.c +++ b/target/xtensa/translate.c @@ -32,11 +32,9 @@ #include "cpu.h" #include "exec/exec-all.h" -#include "disas/disas.h" #include "tcg/tcg-op.h" #include "qemu/log.h" #include "qemu/qemu-print.h" -#include "exec/cpu_ldst.h" #include "semihosting/semihost.h" #include "exec/translator.h" @@ -1119,7 +1117,7 @@ static void disas_xtensa_insn(CPUXtensaState *env, DisasContext *dc) static inline unsigned xtensa_insn_len(CPUXtensaState *env, DisasContext *dc) { - uint8_t b0 = cpu_ldub_code(env, dc->pc); + uint8_t b0 = translator_ldub(env, &dc->base, dc->pc); return xtensa_op0_insn_len(dc, b0); } @@ -1221,20 +1219,12 @@ static void xtensa_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) } } -static void xtensa_tr_disas_log(const DisasContextBase *dcbase, - CPUState *cpu, FILE *logfile) -{ - fprintf(logfile, "IN: %s\n", lookup_symbol(dcbase->pc_first)); - target_disas(logfile, cpu, dcbase->pc_first, dcbase->tb->size); -} - static const TranslatorOps xtensa_translator_ops = { .init_disas_context = xtensa_tr_init_disas_context, .tb_start = xtensa_tr_tb_start, .insn_start = xtensa_tr_insn_start, .translate_insn = xtensa_tr_translate_insn, .tb_stop = xtensa_tr_tb_stop, - .disas_log = xtensa_tr_disas_log, }; void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns, |