aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoraph <none@none>2014-06-20 09:21:08 -0400
committeraph <none@none>2014-06-20 09:21:08 -0400
commitdd63b5f874276f5bb43c0b00c3d04a18ca2af209 (patch)
treea7855b484e92b8ed69c01f3c44bac382ab6ccd85
parentca2c3de300a07ea1a764b6517d653c24ec83aef1 (diff)
Improve C1 performance improvements in frame creation and ic_cache checks
-rw-r--r--src/cpu/aarch64/vm/aarch64.ad35
-rw-r--r--src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp30
-rw-r--r--src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp28
-rw-r--r--src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.hpp14
-rw-r--r--src/cpu/aarch64/vm/macroAssembler_aarch64.cpp44
-rw-r--r--src/cpu/aarch64/vm/macroAssembler_aarch64.hpp4
-rw-r--r--src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp3
7 files changed, 74 insertions, 84 deletions
diff --git a/src/cpu/aarch64/vm/aarch64.ad b/src/cpu/aarch64/vm/aarch64.ad
index 9735565a0..22563e570 100644
--- a/src/cpu/aarch64/vm/aarch64.ad
+++ b/src/cpu/aarch64/vm/aarch64.ad
@@ -950,7 +950,7 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
if (framesize == 0) {
// Is this even possible?
st->print("stp lr, rfp, [sp, #%d]!", -(2 * wordSize));
- } else if (framesize < (1 << 7)) {
+ } else if (framesize < ((1 << 9) + 2 * wordSize)) {
st->print("sub sp, sp, #%d\n\t", framesize);
st->print("stp rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
} else {
@@ -976,21 +976,7 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
if (C->need_stack_bang(framesize))
__ generate_stack_overflow_check(framesize);
- if (framesize == 0) {
- // Is this even possible?
- __ stp(rfp, lr, Address(__ pre(sp, -2 * wordSize)));
- } else if (framesize < ((1 << 9) + 2 * wordSize)) {
- __ sub(sp, sp, framesize);
- __ stp(rfp, lr, Address(sp, framesize - 2 * wordSize));
- } else {
- __ stp(rfp, lr, Address(__ pre(sp, -2 * wordSize)));
- if (framesize < ((1 << 12) + 2 * wordSize))
- __ sub(sp, sp, framesize - 2 * wordSize);
- else {
- __ mov(rscratch1, framesize - 2 * wordSize);
- __ sub(sp, sp, rscratch1);
- }
- }
+ __ build_frame(framesize);
if (NotifySimulator) {
__ notify(Assembler::method_entry);
@@ -1032,7 +1018,7 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
if (framesize == 0) {
st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize));
- } else if (framesize < (1 << 7)) {
+ } else if (framesize < ((1 << 9) + 2 * wordSize)) {
st->print("ldp lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
st->print("add sp, sp, #%d\n\t", framesize);
} else {
@@ -1054,20 +1040,7 @@ void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
MacroAssembler _masm(&cbuf);
int framesize = C->frame_slots() << LogBytesPerInt;
- if (framesize == 0) {
- __ ldp(rfp, lr, Address(__ post(sp, 2 * wordSize)));
- } else if (framesize < ((1 << 9) + 2 * wordSize)) {
- __ ldp(rfp, lr, Address(sp, framesize - 2 * wordSize));
- __ add(sp, sp, framesize);
- } else {
- if (framesize < ((1 << 12) + 2 * wordSize))
- __ add(sp, sp, framesize - 2 * wordSize);
- else {
- __ mov(rscratch1, framesize - 2 * wordSize);
- __ add(sp, sp, rscratch1);
- }
- __ ldp(rfp, lr, Address(__ post(sp, 2 * wordSize)));
- }
+ __ remove_frame(framesize);
if (NotifySimulator) {
__ notify(Assembler::method_reentry);
diff --git a/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp b/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp
index 1f3433460..423628e50 100644
--- a/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp
+++ b/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp
@@ -290,23 +290,25 @@ void LIR_Assembler::osr_entry() {
int LIR_Assembler::check_icache() {
Register receiver = FrameMap::receiver_opr->as_register();
Register ic_klass = IC_Klass;
- const int ic_cmp_size = 4 * 4;
- const bool do_post_padding = VerifyOops || UseCompressedClassPointers;
- if (!do_post_padding) {
- // insert some nops so that the verified entry point is aligned on CodeEntryAlignment
- while ((__ offset() + ic_cmp_size) % CodeEntryAlignment != 0) {
- __ nop();
- }
- }
- int offset = __ offset();
- __ inline_cache_check(receiver, IC_Klass);
- assert(__ offset() % CodeEntryAlignment == 0 || do_post_padding, "alignment must be correct");
- if (do_post_padding) {
+ int start_offset = __ offset();
+ __ inline_cache_check(receiver, ic_klass);
+
+ // if icache check fails, then jump to runtime routine
+ // Note: RECEIVER must still contain the receiver!
+ Label dont;
+ __ br(Assembler::EQ, dont);
+ __ b(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
+
+ // We align the verified entry point unless the method body
+ // (including its inline cache check) will fit in a single 64-byte
+ // icache line.
+ if (! method()->is_accessor() || __ offset() - start_offset > 4 * 4) {
// force alignment after the cache check.
- // It's been verified to be aligned if !VerifyOops
__ align(CodeEntryAlignment);
}
- return offset;
+
+ __ bind(dont);
+ return start_offset;
}
diff --git a/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp b/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp
index f28d8d6e8..547ca82c1 100644
--- a/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp
+++ b/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.cpp
@@ -404,23 +404,12 @@ void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache) {
// explicit NULL check not needed since load from [klass_offset] causes a trap
// check against inline cache
assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()), "must add explicit null check");
- int start_offset = offset();
-
- load_klass(rscratch1, receiver);
- cmp(rscratch1, iCache);
-
- // if icache check fails, then jump to runtime routine
- // Note: RECEIVER must still contain the receiver!
- Label dont;
- br(Assembler::EQ, dont);
- b(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
- bind(dont);
- const int ic_cmp_size = 4 * 4;
- assert(UseCompressedClassPointers || offset() - start_offset == ic_cmp_size, "check alignment in emit_method_entry");
+
+ cmp_klass(receiver, iCache, rscratch1);
}
-void C1_MacroAssembler::build_frame(int frame_size_in_bytes) {
+void C1_MacroAssembler::build_frame(int framesize) {
// If we have to make this method not-entrant we'll overwrite its
// first instruction with a jump. For this action to be legal we
// must ensure that this first instruction is a B, BL, NOP, BKPT,
@@ -428,18 +417,15 @@ void C1_MacroAssembler::build_frame(int frame_size_in_bytes) {
nop();
// Make sure there is enough stack space for this method's activation.
// Note that we do this before doing an enter().
- generate_stack_overflow_check(frame_size_in_bytes);
- enter();
- sub(sp, sp, frame_size_in_bytes); // does not emit code for frame_size == 0
+ generate_stack_overflow_check(framesize);
+ MacroAssembler::build_frame(framesize + 2 * wordSize);
if (NotifySimulator) {
notify(Assembler::method_entry);
}
}
-
-void C1_MacroAssembler::remove_frame(int frame_size_in_bytes) {
- add(sp, sp, frame_size_in_bytes); // Does not emit code for frame_size == 0
- ldp(rfp, lr, Address(post(sp, 2 * wordSize)));
+void C1_MacroAssembler::remove_frame(int framesize) {
+ MacroAssembler::remove_frame(framesize + 2 * wordSize);
if (NotifySimulator) {
notify(Assembler::method_reentry);
}
diff --git a/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.hpp b/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.hpp
index e35e39f0e..f53fa6433 100644
--- a/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.hpp
+++ b/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.hpp
@@ -102,20 +102,6 @@ void zero_memory(Register addr, Register len, Register t1);
int rsp_offset() const { return _rsp_offset; }
void set_rsp_offset(int n) { _rsp_offset = n; }
- // Note: NEVER push values directly, but only through following push_xxx functions;
- // This helps us to track the rsp changes compared to the entry rsp (->_rsp_offset)
-
- void push_jint (jint i) { Unimplemented(); }
- void push_oop (jobject o) { Unimplemented(); }
- // Seems to always be in wordSize
- void push_addr (Address a) { Unimplemented(); }
- void push_reg (Register r) { Unimplemented(); }
- void pop_reg (Register r) { Unimplemented(); }
-
- void dec_stack (int nof_words) { Unimplemented(); }
-
- void dec_stack_after_call (int nof_words) { Unimplemented(); }
-
void invalidate_registers(bool inv_r0, bool inv_r19, bool inv_r2, bool inv_r3, bool inv_r4, bool inv_r5) PRODUCT_RETURN;
#endif // CPU_AARCH64_VM_C1_MACROASSEMBLER_AARCH64_HPP
diff --git a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
index 3f9c85ef0..cba988f60 100644
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
@@ -1842,8 +1842,12 @@ void MacroAssembler::addw(Register Rd, Register Rn, RegisterOrConstant increment
void MacroAssembler::reinit_heapbase()
{
if (UseCompressedOops) {
- lea(rheapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()));
- ldr(rheapbase, Address(rheapbase));
+ if (Universe::is_fully_initialized()) {
+ mov(rheapbase, Universe::narrow_ptrs_base());
+ } else {
+ lea(rheapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()));
+ ldr(rheapbase, Address(rheapbase));
+ }
}
}
@@ -3313,3 +3317,39 @@ void MacroAssembler::adrp(Register reg1, const Address &dest, unsigned long &byt
return UseAcqRelForVolatileFields;
#endif
}
+
+void MacroAssembler::build_frame(int framesize) {
+ if (framesize == 0) {
+ // Is this even possible?
+ stp(rfp, lr, Address(pre(sp, -2 * wordSize)));
+ } else if (framesize < ((1 << 9) + 2 * wordSize)) {
+ sub(sp, sp, framesize);
+ stp(rfp, lr, Address(sp, framesize - 2 * wordSize));
+ } else {
+ stp(rfp, lr, Address(pre(sp, -2 * wordSize)));
+ if (framesize < ((1 << 12) + 2 * wordSize))
+ sub(sp, sp, framesize - 2 * wordSize);
+ else {
+ mov(rscratch1, framesize - 2 * wordSize);
+ sub(sp, sp, rscratch1);
+ }
+ }
+}
+
+void MacroAssembler::remove_frame(int framesize) {
+ if (framesize == 0) {
+ ldp(rfp, lr, Address(post(sp, 2 * wordSize)));
+ } else if (framesize < ((1 << 9) + 2 * wordSize)) {
+ ldp(rfp, lr, Address(sp, framesize - 2 * wordSize));
+ add(sp, sp, framesize);
+ } else {
+ if (framesize < ((1 << 12) + 2 * wordSize))
+ add(sp, sp, framesize - 2 * wordSize);
+ else {
+ mov(rscratch1, framesize - 2 * wordSize);
+ add(sp, sp, rscratch1);
+ }
+ ldp(rfp, lr, Address(post(sp, 2 * wordSize)));
+ }
+}
+
diff --git a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
index c3ebea34b..10ede044c 100644
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
@@ -151,6 +151,10 @@ class MacroAssembler: public Assembler {
strw(scratch, a);
}
+ // Frame creation and destruction shared between JITs.
+ void build_frame(int framesize);
+ void remove_frame(int framesize);
+
virtual void _call_Unimplemented(address call_site) {
mov(rscratch2, call_site);
haltsim();
diff --git a/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp b/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp
index 61d11f09f..f7f4814de 100644
--- a/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp
+++ b/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp
@@ -1529,8 +1529,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
assert_different_registers(ic_reg, receiver, rscratch1);
__ verify_oop(receiver);
- __ load_klass(rscratch1, receiver);
- __ cmp(ic_reg, rscratch1);
+ __ cmp_klass(receiver, ic_reg, rscratch1);
__ br(Assembler::EQ, hit);
__ b(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));