diff options
author | aph <none@none> | 2014-06-23 18:56:33 +0100 |
---|---|---|
committer | aph <none@none> | 2014-06-23 18:56:33 +0100 |
commit | 42b911a8cd82864eefc141b2163ad81feaf2dd76 (patch) | |
tree | d3eb6b32ad2df03544984a0cb18c5cd1c1dfa2c5 | |
parent | 38726e7bbe56a6c3f34006d0e716aeb356e46d65 (diff) |
AArch64 type profiling support
-rw-r--r-- | src/cpu/aarch64/vm/globals_aarch64.hpp | 2 | ||||
-rw-r--r-- | src/cpu/aarch64/vm/interp_masm_aarch64.cpp | 205 | ||||
-rw-r--r-- | src/cpu/aarch64/vm/interp_masm_aarch64.hpp | 5 | ||||
-rw-r--r-- | src/cpu/aarch64/vm/macroAssembler_aarch64.hpp | 9 | ||||
-rw-r--r-- | src/cpu/aarch64/vm/templateTable_aarch64.cpp | 8 |
5 files changed, 228 insertions, 1 deletions
diff --git a/src/cpu/aarch64/vm/globals_aarch64.hpp b/src/cpu/aarch64/vm/globals_aarch64.hpp index 9c2933452..23b26c7cd 100644 --- a/src/cpu/aarch64/vm/globals_aarch64.hpp +++ b/src/cpu/aarch64/vm/globals_aarch64.hpp @@ -72,7 +72,7 @@ define_pd_global(bool, UseMembar, true); // GC Ergo Flags define_pd_global(uintx, CMSYoungGenPerWorker, 64*M); // default max size of CMS young gen, per GC worker thread -define_pd_global(uintx, TypeProfileLevel, 0); +define_pd_global(uintx, TypeProfileLevel, 111); #if defined(COMPILER1) || defined(COMPILER2) define_pd_global(intx, InlineSmallCode, 1000); diff --git a/src/cpu/aarch64/vm/interp_masm_aarch64.cpp b/src/cpu/aarch64/vm/interp_masm_aarch64.cpp index 4e565f365..69954a916 100644 --- a/src/cpu/aarch64/vm/interp_masm_aarch64.cpp +++ b/src/cpu/aarch64/vm/interp_masm_aarch64.cpp @@ -1479,3 +1479,208 @@ void InterpreterMacroAssembler::call_VM_base(Register oop_result, restore_locals(); } +void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) { + Label update, next, none; + + verify_oop(obj); + + cbnz(obj, update); + orptr(mdo_addr, TypeEntries::null_seen); + b(next); + + bind(update); + load_klass(obj, obj); + + ldr(rscratch1, mdo_addr); + eor(obj, obj, rscratch1); + tst(obj, TypeEntries::type_klass_mask); + br(Assembler::EQ, next); // klass seen before, nothing to + // do. The unknown bit may have been + // set already but no need to check. + + tst(obj, TypeEntries::type_unknown); + br(Assembler::NE, next); // already unknown. Nothing to do anymore. + + ldr(rscratch1, mdo_addr); + cbz(rscratch1, none); + cmp(rscratch1, TypeEntries::null_seen); + br(Assembler::EQ, none); + // There is a chance that the checks above (re-reading profiling + // data from memory) fail if another thread has just set the + // profiling to this obj's klass + ldr(rscratch1, mdo_addr); + eor(obj, obj, rscratch1); + tst(obj, TypeEntries::type_klass_mask); + br(Assembler::EQ, next); + + // different than before. Cannot keep accurate profile. + orptr(mdo_addr, TypeEntries::type_unknown); + b(next); + + bind(none); + // first time here. Set profile type. + str(obj, mdo_addr); + + bind(next); +} + +void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) { + if (!ProfileInterpreter) { + return; + } + + if (MethodData::profile_arguments() || MethodData::profile_return()) { + Label profile_continue; + + test_method_data_pointer(mdp, profile_continue); + + int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size()); + + ldrb(rscratch1, Address(mdp, in_bytes(DataLayout::tag_offset()) - off_to_start)); + cmp(rscratch1, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag); + br(Assembler::NE, profile_continue); + + if (MethodData::profile_arguments()) { + Label done; + int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset()); + add(mdp, mdp, off_to_args); + + for (int i = 0; i < TypeProfileArgsLimit; i++) { + if (i > 0 || MethodData::profile_return()) { + // If return value type is profiled we may have no argument to profile + ldr(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args)); + sub(tmp, tmp, i*TypeStackSlotEntries::per_arg_count()); + cmp(tmp, TypeStackSlotEntries::per_arg_count()); + br(Assembler::LT, done); + } + ldr(tmp, Address(callee, Method::const_offset())); + load_unsigned_short(tmp, Address(tmp, ConstMethod::size_of_parameters_offset())); + // stack offset o (zero based) from the start of the argument + // list, for n arguments translates into offset n - o - 1 from + // the end of the argument list + ldr(rscratch1, Address(mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args)); + sub(tmp, tmp, rscratch1); + sub(tmp, tmp, 1); + Address arg_addr = argument_address(tmp); + ldr(tmp, arg_addr); + + Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args); + profile_obj_type(tmp, mdo_arg_addr); + + int to_add = in_bytes(TypeStackSlotEntries::per_arg_size()); + add(mdp, mdp, to_add); + off_to_args += to_add; + } + + if (MethodData::profile_return()) { + ldr(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args)); + sub(tmp, tmp, TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count()); + } + + bind(done); + + if (MethodData::profile_return()) { + // We're right after the type profile for the last + // argument. tmp is the number of cells left in the + // CallTypeData/VirtualCallTypeData to reach its end. Non null + // if there's a return to profile. + assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); + add(mdp, mdp, tmp, LSL, exact_log2(DataLayout::cell_size)); + } + str(mdp, Address(rfp, frame::interpreter_frame_mdx_offset * wordSize)); + } else { + assert(MethodData::profile_return(), "either profile call args or call ret"); + update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size())); + } + + // mdp points right after the end of the + // CallTypeData/VirtualCallTypeData, right after the cells for the + // return value type if there's one + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) { + assert_different_registers(mdp, ret, tmp, rbcp); + if (ProfileInterpreter && MethodData::profile_return()) { + Label profile_continue, done; + + test_method_data_pointer(mdp, profile_continue); + + if (MethodData::profile_return_jsr292_only()) { + // If we don't profile all invoke bytecodes we must make sure + // it's a bytecode we indeed profile. We can't go back to the + // begining of the ProfileData we intend to update to check its + // type because we're right after it and we don't known its + // length + Label do_profile; + ldrb(rscratch1, Address(rbcp, 0)); + cmp(rscratch1, Bytecodes::_invokedynamic); + br(Assembler::EQ, do_profile); + cmp(rscratch1, Bytecodes::_invokehandle); + br(Assembler::EQ, do_profile); + get_method(tmp); + ldrb(rscratch1, Address(tmp, Method::intrinsic_id_offset_in_bytes())); + cmp(rscratch1, vmIntrinsics::_compiledLambdaForm); + br(Assembler::NE, profile_continue); + + bind(do_profile); + } + + Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size())); + mov(tmp, ret); + profile_obj_type(tmp, mdo_ret_addr); + + bind(profile_continue); + } +} + +void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) { + if (ProfileInterpreter && MethodData::profile_parameters()) { + Label profile_continue, done; + + test_method_data_pointer(mdp, profile_continue); + + // Load the offset of the area within the MDO used for + // parameters. If it's negative we're not profiling any parameters + ldr(tmp1, Address(mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset()))); + cmp(tmp1, 0u); + br(Assembler::LT, profile_continue); + + // Compute a pointer to the area for parameters from the offset + // and move the pointer to the slot for the last + // parameters. Collect profiling from last parameter down. + // mdo start + parameters offset + array length - 1 + add(mdp, mdp, tmp1); + ldr(tmp1, Address(mdp, ArrayData::array_len_offset())); + sub(tmp1, tmp1, TypeStackSlotEntries::per_arg_count()); + + Label loop; + bind(loop); + + int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0)); + int type_base = in_bytes(ParametersTypeData::type_offset(0)); + int per_arg_scale = exact_log2(DataLayout::cell_size); + add(rscratch1, mdp, off_base); + add(rscratch2, mdp, type_base); + + Address arg_off(rscratch1, tmp1, Address::lsl(per_arg_scale)); + Address arg_type(rscratch2, tmp1, Address::lsl(per_arg_scale)); + + // load offset on the stack from the slot for this parameter + ldr(tmp2, arg_off); + neg(tmp2, tmp2); + // read the parameter from the local area + ldr(tmp2, Address(rlocals, tmp2, Address::lsl(Interpreter::logStackElementSize))); + + // profile the parameter + profile_obj_type(tmp2, arg_type); + + // go to next parameter + subs(tmp1, tmp1, TypeStackSlotEntries::per_arg_count()); + br(Assembler::GE, loop); + + bind(profile_continue); + } +} diff --git a/src/cpu/aarch64/vm/interp_masm_aarch64.hpp b/src/cpu/aarch64/vm/interp_masm_aarch64.hpp index ce47cafab..7b9a21589 100644 --- a/src/cpu/aarch64/vm/interp_masm_aarch64.hpp +++ b/src/cpu/aarch64/vm/interp_masm_aarch64.hpp @@ -263,6 +263,11 @@ class InterpreterMacroAssembler: public MacroAssembler { void profile_switch_case(Register index_in_scratch, Register mdp, Register scratch2); + void profile_obj_type(Register obj, const Address& mdo_addr); + void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual); + void profile_return_type(Register mdp, Register ret, Register tmp); + void profile_parameters_type(Register mdp, Register tmp1, Register tmp2); + // Debugging // only if +VerifyOops && state == atos void verify_oop(Register reg, TosState state = atos); diff --git a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp index 10ede044c..fe583c7ae 100644 --- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp +++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp @@ -1054,6 +1054,15 @@ public: void xorptr(Register dst, Address src) { Unimplemented(); } #endif + void orptr(Address adr, RegisterOrConstant src) { + ldr(rscratch2, adr); + if (src.is_register()) + orr(rscratch2, rscratch2, src.as_register()); + else + orr(rscratch2, rscratch2, src.as_constant()); + str(rscratch2, adr); + } + // Calls // void call(Label& L, relocInfo::relocType rtype); diff --git a/src/cpu/aarch64/vm/templateTable_aarch64.cpp b/src/cpu/aarch64/vm/templateTable_aarch64.cpp index d2e717e0f..944f5de2e 100644 --- a/src/cpu/aarch64/vm/templateTable_aarch64.cpp +++ b/src/cpu/aarch64/vm/templateTable_aarch64.cpp @@ -3004,6 +3004,7 @@ void TemplateTable::invokevirtual_helper(Register index, // profile this call __ profile_final_call(r0); + __ profile_arguments_type(r0, method, r4, true); __ jump_from_interpreted(method, r0); @@ -3018,6 +3019,7 @@ void TemplateTable::invokevirtual_helper(Register index, // get target methodOop & entry point __ lookup_virtual_method(r0, index, method); + __ profile_arguments_type(r3, method, r4, true); // FIXME -- this looks completely redundant. is it? // __ ldr(r3, Address(method, Method::interpreter_entry_offset())); __ jump_from_interpreted(method, r3); @@ -3048,6 +3050,7 @@ void TemplateTable::invokespecial(int byte_no) __ null_check(r2); // do the call __ profile_call(r0); + __ profile_arguments_type(r0, rmethod, rbcp, false); __ jump_from_interpreted(rmethod, r0); } @@ -3059,6 +3062,7 @@ void TemplateTable::invokestatic(int byte_no) prepare_invoke(byte_no, rmethod); // get f1 Method* // do the call __ profile_call(r0); + __ profile_arguments_type(r0, rmethod, r4, false); __ jump_from_interpreted(rmethod, r0); } @@ -3113,6 +3117,8 @@ void TemplateTable::invokeinterface(int byte_no) { // method. __ cbz(rmethod, no_such_method); + __ profile_arguments_type(r3, rmethod, r13, true); + // do the call // r2: receiver // rmethod,: methodOop @@ -3162,6 +3168,7 @@ void TemplateTable::invokehandle(int byte_no) { // r13 is safe to use here as a scratch reg because it is about to // be clobbered by jump_from_interpreted(). __ profile_final_call(r13); + __ profile_arguments_type(r13, rmethod, r4, true); __ jump_from_interpreted(rmethod, r0); } @@ -3191,6 +3198,7 @@ void TemplateTable::invokedynamic(int byte_no) { // %%% should make a type profile for any invokedynamic that takes a ref argument // profile this call __ profile_call(rbcp); + __ profile_arguments_type(r3, rmethod, r13, false); __ verify_oop(r0); |