diff options
author | aph <none@none> | 2014-06-20 09:21:08 -0400 |
---|---|---|
committer | aph <none@none> | 2014-06-20 09:21:08 -0400 |
commit | dd63b5f874276f5bb43c0b00c3d04a18ca2af209 (patch) | |
tree | a7855b484e92b8ed69c01f3c44bac382ab6ccd85 | |
parent | ca2c3de300a07ea1a764b6517d653c24ec83aef1 (diff) |
Improve C1 performance improvements in frame creation and ic_cache checks
-rw-r--r-- | src/cpu/aarch64/vm/aarch64.ad | 35 | ||||
-rw-r--r-- | src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp | 30 | ||||
-rw-r--r-- | src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp | 28 | ||||
-rw-r--r-- | src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.hpp | 14 | ||||
-rw-r--r-- | src/cpu/aarch64/vm/macroAssembler_aarch64.cpp | 44 | ||||
-rw-r--r-- | src/cpu/aarch64/vm/macroAssembler_aarch64.hpp | 4 | ||||
-rw-r--r-- | src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp | 3 |
7 files changed, 74 insertions, 84 deletions
diff --git a/src/cpu/aarch64/vm/aarch64.ad b/src/cpu/aarch64/vm/aarch64.ad index 9735565a0..22563e570 100644 --- a/src/cpu/aarch64/vm/aarch64.ad +++ b/src/cpu/aarch64/vm/aarch64.ad @@ -950,7 +950,7 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const { if (framesize == 0) { // Is this even possible? st->print("stp lr, rfp, [sp, #%d]!", -(2 * wordSize)); - } else if (framesize < (1 << 7)) { + } else if (framesize < ((1 << 9) + 2 * wordSize)) { st->print("sub sp, sp, #%d\n\t", framesize); st->print("stp rfp, lr, [sp, #%d]", framesize - 2 * wordSize); } else { @@ -976,21 +976,7 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { if (C->need_stack_bang(framesize)) __ generate_stack_overflow_check(framesize); - if (framesize == 0) { - // Is this even possible? - __ stp(rfp, lr, Address(__ pre(sp, -2 * wordSize))); - } else if (framesize < ((1 << 9) + 2 * wordSize)) { - __ sub(sp, sp, framesize); - __ stp(rfp, lr, Address(sp, framesize - 2 * wordSize)); - } else { - __ stp(rfp, lr, Address(__ pre(sp, -2 * wordSize))); - if (framesize < ((1 << 12) + 2 * wordSize)) - __ sub(sp, sp, framesize - 2 * wordSize); - else { - __ mov(rscratch1, framesize - 2 * wordSize); - __ sub(sp, sp, rscratch1); - } - } + __ build_frame(framesize); if (NotifySimulator) { __ notify(Assembler::method_entry); @@ -1032,7 +1018,7 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const { if (framesize == 0) { st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize)); - } else if (framesize < (1 << 7)) { + } else if (framesize < ((1 << 9) + 2 * wordSize)) { st->print("ldp lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize); st->print("add sp, sp, #%d\n\t", framesize); } else { @@ -1054,20 +1040,7 @@ void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { MacroAssembler _masm(&cbuf); int framesize = C->frame_slots() << LogBytesPerInt; - if (framesize == 0) { - __ ldp(rfp, lr, Address(__ post(sp, 2 * wordSize))); - } else if (framesize < ((1 << 9) + 2 * wordSize)) { - __ ldp(rfp, lr, Address(sp, framesize - 2 * wordSize)); - __ add(sp, sp, framesize); - } else { - if (framesize < ((1 << 12) + 2 * wordSize)) - __ add(sp, sp, framesize - 2 * wordSize); - else { - __ mov(rscratch1, framesize - 2 * wordSize); - __ add(sp, sp, rscratch1); - } - __ ldp(rfp, lr, Address(__ post(sp, 2 * wordSize))); - } + __ remove_frame(framesize); if (NotifySimulator) { __ notify(Assembler::method_reentry); diff --git a/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp b/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp index 1f3433460..423628e50 100644 --- a/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp +++ b/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp @@ -290,23 +290,25 @@ void LIR_Assembler::osr_entry() { int LIR_Assembler::check_icache() { Register receiver = FrameMap::receiver_opr->as_register(); Register ic_klass = IC_Klass; - const int ic_cmp_size = 4 * 4; - const bool do_post_padding = VerifyOops || UseCompressedClassPointers; - if (!do_post_padding) { - // insert some nops so that the verified entry point is aligned on CodeEntryAlignment - while ((__ offset() + ic_cmp_size) % CodeEntryAlignment != 0) { - __ nop(); - } - } - int offset = __ offset(); - __ inline_cache_check(receiver, IC_Klass); - assert(__ offset() % CodeEntryAlignment == 0 || do_post_padding, "alignment must be correct"); - if (do_post_padding) { + int start_offset = __ offset(); + __ inline_cache_check(receiver, ic_klass); + + // if icache check fails, then jump to runtime routine + // Note: RECEIVER must still contain the receiver! + Label dont; + __ br(Assembler::EQ, dont); + __ b(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); + + // We align the verified entry point unless the method body + // (including its inline cache check) will fit in a single 64-byte + // icache line. + if (! method()->is_accessor() || __ offset() - start_offset > 4 * 4) { // force alignment after the cache check. - // It's been verified to be aligned if !VerifyOops __ align(CodeEntryAlignment); } - return offset; + + __ bind(dont); + return start_offset; } diff --git a/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp b/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp index f28d8d6e8..547ca82c1 100644 --- a/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp +++ b/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp @@ -404,23 +404,12 @@ void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache) { // explicit NULL check not needed since load from [klass_offset] causes a trap // check against inline cache assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()), "must add explicit null check"); - int start_offset = offset(); - - load_klass(rscratch1, receiver); - cmp(rscratch1, iCache); - - // if icache check fails, then jump to runtime routine - // Note: RECEIVER must still contain the receiver! - Label dont; - br(Assembler::EQ, dont); - b(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); - bind(dont); - const int ic_cmp_size = 4 * 4; - assert(UseCompressedClassPointers || offset() - start_offset == ic_cmp_size, "check alignment in emit_method_entry"); + + cmp_klass(receiver, iCache, rscratch1); } -void C1_MacroAssembler::build_frame(int frame_size_in_bytes) { +void C1_MacroAssembler::build_frame(int framesize) { // If we have to make this method not-entrant we'll overwrite its // first instruction with a jump. For this action to be legal we // must ensure that this first instruction is a B, BL, NOP, BKPT, @@ -428,18 +417,15 @@ void C1_MacroAssembler::build_frame(int frame_size_in_bytes) { nop(); // Make sure there is enough stack space for this method's activation. // Note that we do this before doing an enter(). - generate_stack_overflow_check(frame_size_in_bytes); - enter(); - sub(sp, sp, frame_size_in_bytes); // does not emit code for frame_size == 0 + generate_stack_overflow_check(framesize); + MacroAssembler::build_frame(framesize + 2 * wordSize); if (NotifySimulator) { notify(Assembler::method_entry); } } - -void C1_MacroAssembler::remove_frame(int frame_size_in_bytes) { - add(sp, sp, frame_size_in_bytes); // Does not emit code for frame_size == 0 - ldp(rfp, lr, Address(post(sp, 2 * wordSize))); +void C1_MacroAssembler::remove_frame(int framesize) { + MacroAssembler::remove_frame(framesize + 2 * wordSize); if (NotifySimulator) { notify(Assembler::method_reentry); } diff --git a/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.hpp b/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.hpp index e35e39f0e..f53fa6433 100644 --- a/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.hpp +++ b/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.hpp @@ -102,20 +102,6 @@ void zero_memory(Register addr, Register len, Register t1); int rsp_offset() const { return _rsp_offset; } void set_rsp_offset(int n) { _rsp_offset = n; } - // Note: NEVER push values directly, but only through following push_xxx functions; - // This helps us to track the rsp changes compared to the entry rsp (->_rsp_offset) - - void push_jint (jint i) { Unimplemented(); } - void push_oop (jobject o) { Unimplemented(); } - // Seems to always be in wordSize - void push_addr (Address a) { Unimplemented(); } - void push_reg (Register r) { Unimplemented(); } - void pop_reg (Register r) { Unimplemented(); } - - void dec_stack (int nof_words) { Unimplemented(); } - - void dec_stack_after_call (int nof_words) { Unimplemented(); } - void invalidate_registers(bool inv_r0, bool inv_r19, bool inv_r2, bool inv_r3, bool inv_r4, bool inv_r5) PRODUCT_RETURN; #endif // CPU_AARCH64_VM_C1_MACROASSEMBLER_AARCH64_HPP diff --git a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp index 3f9c85ef0..cba988f60 100644 --- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp +++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp @@ -1842,8 +1842,12 @@ void MacroAssembler::addw(Register Rd, Register Rn, RegisterOrConstant increment void MacroAssembler::reinit_heapbase() { if (UseCompressedOops) { - lea(rheapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr())); - ldr(rheapbase, Address(rheapbase)); + if (Universe::is_fully_initialized()) { + mov(rheapbase, Universe::narrow_ptrs_base()); + } else { + lea(rheapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr())); + ldr(rheapbase, Address(rheapbase)); + } } } @@ -3313,3 +3317,39 @@ void MacroAssembler::adrp(Register reg1, const Address &dest, unsigned long &byt return UseAcqRelForVolatileFields; #endif } + +void MacroAssembler::build_frame(int framesize) { + if (framesize == 0) { + // Is this even possible? + stp(rfp, lr, Address(pre(sp, -2 * wordSize))); + } else if (framesize < ((1 << 9) + 2 * wordSize)) { + sub(sp, sp, framesize); + stp(rfp, lr, Address(sp, framesize - 2 * wordSize)); + } else { + stp(rfp, lr, Address(pre(sp, -2 * wordSize))); + if (framesize < ((1 << 12) + 2 * wordSize)) + sub(sp, sp, framesize - 2 * wordSize); + else { + mov(rscratch1, framesize - 2 * wordSize); + sub(sp, sp, rscratch1); + } + } +} + +void MacroAssembler::remove_frame(int framesize) { + if (framesize == 0) { + ldp(rfp, lr, Address(post(sp, 2 * wordSize))); + } else if (framesize < ((1 << 9) + 2 * wordSize)) { + ldp(rfp, lr, Address(sp, framesize - 2 * wordSize)); + add(sp, sp, framesize); + } else { + if (framesize < ((1 << 12) + 2 * wordSize)) + add(sp, sp, framesize - 2 * wordSize); + else { + mov(rscratch1, framesize - 2 * wordSize); + add(sp, sp, rscratch1); + } + ldp(rfp, lr, Address(post(sp, 2 * wordSize))); + } +} + diff --git a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp index c3ebea34b..10ede044c 100644 --- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp +++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp @@ -151,6 +151,10 @@ class MacroAssembler: public Assembler { strw(scratch, a); } + // Frame creation and destruction shared between JITs. + void build_frame(int framesize); + void remove_frame(int framesize); + virtual void _call_Unimplemented(address call_site) { mov(rscratch2, call_site); haltsim(); diff --git a/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp b/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp index 61d11f09f..f7f4814de 100644 --- a/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp +++ b/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp @@ -1529,8 +1529,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, assert_different_registers(ic_reg, receiver, rscratch1); __ verify_oop(receiver); - __ load_klass(rscratch1, receiver); - __ cmp(ic_reg, rscratch1); + __ cmp_klass(receiver, ic_reg, rscratch1); __ br(Assembler::EQ, hit); __ b(RuntimeAddress(SharedRuntime::get_ic_miss_stub())); |