diff options
author | aph <none@none> | 2014-06-19 16:02:36 +0100 |
---|---|---|
committer | aph <none@none> | 2014-06-19 16:02:36 +0100 |
commit | ca2c3de300a07ea1a764b6517d653c24ec83aef1 (patch) | |
tree | a44f43d45910cd1bdd5e6b9b7531d07518799640 | |
parent | 3223329ff622c5020b392d1675902c1f8292df40 (diff) | |
parent | 4b03e6ff564b65b86ae6d9a3fba85cfa7b891642 (diff) |
Merge
20 files changed, 259 insertions, 665 deletions
diff --git a/src/cpu/aarch64/vm/aarch64.ad b/src/cpu/aarch64/vm/aarch64.ad index 45a5b17bd..9735565a0 100644 --- a/src/cpu/aarch64/vm/aarch64.ad +++ b/src/cpu/aarch64/vm/aarch64.ad @@ -839,19 +839,7 @@ int MachCallStaticJavaNode::ret_addr_offset() int MachCallDynamicJavaNode::ret_addr_offset() { - // call should be - // ldr_constant - // bl - // where ldr_constant is either - // ldr // if NearCpool - // or - // adrp // if !NearCPool - // ldr - int off = 8; - if (!NearCpool) { - off += 4; - } - return off; + return 16; // movz, movk, movk, bl } int MachCallRuntimeNode::ret_addr_offset() { @@ -864,7 +852,8 @@ int MachCallRuntimeNode::ret_addr_offset() { if (cb) { return 4; } else { - return 20; + // A 48-bit address. See movptr(). + return 16; } } @@ -961,7 +950,7 @@ void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const { if (framesize == 0) { // Is this even possible? st->print("stp lr, rfp, [sp, #%d]!", -(2 * wordSize)); - } else if (framesize < (1 << 12)) { + } else if (framesize < (1 << 7)) { st->print("sub sp, sp, #%d\n\t", framesize); st->print("stp rfp, lr, [sp, #%d]", framesize - 2 * wordSize); } else { @@ -1043,7 +1032,7 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const { if (framesize == 0) { st->print("ldp lr, rfp, [sp],#%d\n\t", (2 * wordSize)); - } else if (framesize < (1 << 12)) { + } else if (framesize < (1 << 7)) { st->print("ldp lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize); st->print("add sp, sp, #%d\n\t", framesize); } else { @@ -2107,7 +2096,6 @@ encode %{ // movz xscratch1 0xnnnn <-- current pc is here // movk xscratch1 0xnnnn // movk xscratch1 0xnnnn - // movk xscratch1 0xnnnn // str xscratch1, [xthread,#anchor_pc_off] // mov xscratch2, sp // str xscratch2, [xthread,#anchor_sp_off @@ -2119,7 +2107,6 @@ encode %{ // movz xscratch1 0xnnnn // movk xscratch1 0xnnnn // movk xscratch1 0xnnnn - // movk xscratch1 0xnnnn // blrt xscratch1 // . . . // @@ -2129,18 +2116,18 @@ encode %{ // stub. we assert that nargs is < 7. // // so the offset we need to add to the pc (in 32-bit words) is - // 4 + <-- load 64 bit constant return pc + // 3 + <-- load 48-bit constant return pc // 1 + <-- write anchor pc // 1 + <-- copy sp // 1 + <-- write anchor sp // nargs + <-- java stub arg count // 1 + <-- extra thread arg // [ 1 + ] <-- optional ret address of stub caller - // 4 + <-- load 64 bit call target address + // 3 + <-- load 64 bit call target address // 1 <-- blrt instruction // - // i.e we need to add (nargs + 13) * 4 bytes or (nargs + 14) * 4 bytes - // + // i.e we need to add (nargs + 11) * 4 bytes or (nargs + 12) * 4 bytes + // enc_class aarch64_enc_save_pc() %{ Compile* C = ra_->C; @@ -2149,10 +2136,10 @@ encode %{ assert(nargs <= 8, "opto runtime stub has more than 8 args!"); MacroAssembler _masm(&cbuf); address pc = __ pc(); - int call_offset = (nargs + 13) * 4; + int call_offset = (nargs + 11) * 4; int field_offset = in_bytes(JavaThread::frame_anchor_offset()) + in_bytes(JavaFrameAnchor::last_Java_pc_offset()); - __ mov(rscratch1, InternalAddress(pc + call_offset)); + __ lea(rscratch1, InternalAddress(pc + call_offset)); __ str(rscratch1, Address(rthread, field_offset)); %} @@ -2579,7 +2566,7 @@ encode %{ } else { relocInfo::relocType rtype = $src->constant_reloc(); if (rtype == relocInfo::oop_type) { - __ movoop(dst_reg, (jobject)con); + __ movoop(dst_reg, (jobject)con, /*immediate*/true); } else if (rtype == relocInfo::metadata_type) { __ mov_metadata(dst_reg, (Metadata*)con); } else { @@ -2653,7 +2640,7 @@ encode %{ } else { relocInfo::relocType rtype = $src->constant_reloc(); assert(rtype == relocInfo::metadata_type, "unexpected reloc type"); - __ set_narrow_klass(dst_reg, (Klass *)con); + __ set_narrow_klass(dst_reg, (Klass *)con); } %} @@ -2845,8 +2832,6 @@ encode %{ address mark = __ pc(); address addr = (address)$meth$$method; if (!_method) { - // TODO check this - // think we are calling generated Java here not x86 // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap. __ bl(Address(addr, relocInfo::runtime_call_type)); } else if (_optimized_virtual) { @@ -2921,7 +2906,7 @@ encode %{ int fpcnt; int rtype; getCallInfo(tf(), gpcnt, fpcnt, rtype); - __ mov(rscratch1, RuntimeAddress(entry)); + __ lea(rscratch1, RuntimeAddress(entry)); __ blrt(rscratch1, gpcnt, fpcnt, rtype); } %} diff --git a/src/cpu/aarch64/vm/assembler_aarch64.cpp b/src/cpu/aarch64/vm/assembler_aarch64.cpp index b8c7e5c03..d8c32756e 100644 --- a/src/cpu/aarch64/vm/assembler_aarch64.cpp +++ b/src/cpu/aarch64/vm/assembler_aarch64.cpp @@ -1273,7 +1273,7 @@ void Address::lea(MacroAssembler *as, Register r) const { if (rtype == relocInfo::none) __ mov(r, target()); else - __ mov64(r, (uint64_t)target()); + __ movptr(r, (uint64_t)target()); break; } default: diff --git a/src/cpu/aarch64/vm/assembler_aarch64.hpp b/src/cpu/aarch64/vm/assembler_aarch64.hpp index c694cf901..d40042994 100644 --- a/src/cpu/aarch64/vm/assembler_aarch64.hpp +++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp @@ -1243,7 +1243,7 @@ public: f(size & 0b01, 31, 30), f(0b011, 29, 27), f(0b00, 25, 24); long offset = (adr.target() - pc()) >> 2; sf(offset, 23, 5); -#ifdef ASSERT +#if 0 Relocation* reloc = adr.rspec().reloc(); relocInfo::relocType rtype = (relocInfo::relocType) reloc->type(); assert(rtype == relocInfo::internal_word_type, diff --git a/src/cpu/aarch64/vm/c1_CodeStubs_aarch64.cpp b/src/cpu/aarch64/vm/c1_CodeStubs_aarch64.cpp index cdedc64ec..2b6b0f852 100644 --- a/src/cpu/aarch64/vm/c1_CodeStubs_aarch64.cpp +++ b/src/cpu/aarch64/vm/c1_CodeStubs_aarch64.cpp @@ -320,133 +320,7 @@ void PatchingStub::align_patch_site(MacroAssembler* masm) { } void PatchingStub::emit_code(LIR_Assembler* ce) { - assert(NativeCall::instruction_size <= _bytes_to_copy && _bytes_to_copy <= 0xFF, "not enough room for call"); - - Label call_patch; - - // static field accesses have special semantics while the class - // initializer is being run so we emit a test which can be used to - // check that this code is being executed by the initializing - // thread. - address being_initialized_entry = __ pc(); - if (CommentedAssembly) { - __ block_comment(" patch template"); - } - - // make a copy the code which is going to be patched. - for (int i = 0; i < _bytes_to_copy; i++) { - address ptr = (address)(_pc_start + i); - int a_byte = (*ptr) & 0xFF; - __ emit_int8(a_byte); - } - - address end_of_patch = __ pc(); - int bytes_to_skip = 0; - if (_id == load_mirror_id) { - int offset = __ offset(); - if (CommentedAssembly) { - __ block_comment(" being_initialized check"); - } - assert(_obj != noreg, "must be a valid register"); - Register tmp = r0; - Register tmp2 = r19; - __ stp(tmp, tmp2, Address(__ pre(sp, -2 * wordSize))); - // Load without verification to keep code size small. We need it because - // begin_initialized_entry_offset has to fit in a byte. Also, we know it's not null. - __ ldr(tmp2, Address(_obj, java_lang_Class::klass_offset_in_bytes())); - __ ldr(tmp, Address(tmp2, InstanceKlass::init_thread_offset())); - __ cmp(rthread, tmp); - __ ldp(tmp, tmp2, Address(__ post(sp, 2 * wordSize))); - __ br(Assembler::NE, call_patch); - - // access_field patches may execute the patched code before it's - // copied back into place so we need to jump back into the main - // code of the nmethod to continue execution. - __ b(_patch_site_continuation); - - // make sure this extra code gets skipped - bytes_to_skip += __ offset() - offset; - } - if (CommentedAssembly) { - __ block_comment("patch data"); - } - // Now emit the patch record telling the runtime how to find the - // pieces of the patch. - int sizeof_patch_record = 8; - bytes_to_skip += sizeof_patch_record; - - // emit the offsets needed to find the code to patch - int being_initialized_entry_offset = __ pc() - being_initialized_entry + sizeof_patch_record; - - // If this is a field access, the offset is held in the constant - // pool rather than embedded in the instruction, so we don't copy - // any instructions: we set the value in the constant pool and - // overwrite the NativeGeneralJump. - { - Label L; - __ br(Assembler::AL, L); - __ emit_int8(0); - __ emit_int8(being_initialized_entry_offset); - if (_id == access_field_id) { - __ emit_int8(bytes_to_skip + _bytes_to_copy); - __ emit_int8(0); - } else { - __ emit_int8(bytes_to_skip); - __ emit_int8(_bytes_to_copy); - } - __ bind(L); - } - - address patch_info_pc = __ pc(); - assert(patch_info_pc - end_of_patch == bytes_to_skip, "incorrect patch info"); - - address entry = __ pc(); - NativeGeneralJump::insert_unconditional((address)_pc_start, entry); - address target = NULL; - relocInfo::relocType reloc_type = relocInfo::none; - - switch (_id) { - case access_field_id: - target = Runtime1::entry_for(Runtime1::access_field_patching_id); - reloc_type = relocInfo::section_word_type; - break; - case load_klass_id: - target = Runtime1::entry_for(Runtime1::load_klass_patching_id); - reloc_type = relocInfo::metadata_type; - break; - case load_mirror_id: - target = Runtime1::entry_for(Runtime1::load_mirror_patching_id); - reloc_type = relocInfo::oop_type; - break; - case load_appendix_id: - target = Runtime1::entry_for(Runtime1::load_appendix_patching_id); - reloc_type = relocInfo::oop_type; - break; - default: ShouldNotReachHere(); - } - - __ bind(call_patch); - - if (CommentedAssembly) { - __ block_comment("patch entry point"); - } - __ bl(RuntimeAddress(target)); - assert(_patch_info_offset == (patch_info_pc - __ pc()), "must not change"); - ce->add_call_info_here(_info); - int jmp_off = __ offset(); - __ b(_patch_site_entry); - // Add enough nops so deoptimization can overwrite the jmp above with a call - // and not destroy the world. - // FIXME: AArch64 doesn't really need this - // __ nop(); __ nop(); - // if (_id == load_klass_id - // || _id == load_mirror_id - // || _id == access_field_id - // ) { - // CodeSection* cs = __ code_section(); - // RelocIterator iter(cs, (address)_pc_start, (address)(_pc_start + 1)); - // relocInfo::change_reloc_info_for_address(&iter, (address) _pc_start, reloc_type, relocInfo::none); - // } + assert(false, "AArch64 should not use C1 runtime patching"); } diff --git a/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp b/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp index 671ac4cf6..1f3433460 100644 --- a/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp +++ b/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp @@ -26,6 +26,7 @@ #include "precompiled.hpp" #include "asm/assembler.hpp" +#include "c1/c1_CodeStubs.hpp" #include "c1/c1_Compilation.hpp" #include "c1/c1_LIRAssembler.hpp" #include "c1/c1_MacroAssembler.hpp" @@ -200,8 +201,7 @@ Address LIR_Assembler::as_Address(LIR_Address* addr, Register tmp) { if (Address::offset_ok_for_immed(addr_offset, addr->scale())) return Address(base, addr_offset, Address::lsl(addr->scale())); else { - address const_addr = int_constant(addr_offset); - __ ldr_constant(tmp, const_addr); + __ mov(tmp, addr_offset); return Address(base, tmp, Address::lsl(addr->scale())); } } @@ -314,34 +314,40 @@ void LIR_Assembler::jobject2reg(jobject o, Register reg) { if (o == NULL) { __ mov(reg, zr); } else { - int oop_index = __ oop_recorder()->find_index(o); - assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(o)), "should be real oop"); - RelocationHolder rspec = oop_Relocation::spec(oop_index); - address const_ptr = int_constant(jlong(o)); - __ code()->consts()->relocate(const_ptr, rspec); - __ ldr_constant(reg, const_ptr); - - if (PrintRelocations && Verbose) { - puts("jobject2reg:\n"); - printf("oop %p at %p\n", o, const_ptr); - fflush(stdout); - das((uint64_t)__ pc(), -2); - } + __ movoop(reg, o, /*immediate*/true); } } +void LIR_Assembler::deoptimize_trap(CodeEmitInfo *info) { + address target = NULL; + relocInfo::relocType reloc_type = relocInfo::none; -void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo *info) { - // Allocate a new index in table to hold the object once it's been patched - int oop_index = __ oop_recorder()->allocate_oop_index(NULL); -// PatchingStub* patch = new PatchingStub(_masm, PatchingStub::load_mirror_id, oop_index); - PatchingStub* patch = new PatchingStub(_masm, patching_id(info), oop_index); + switch (patching_id(info)) { + case PatchingStub::access_field_id: + target = Runtime1::entry_for(Runtime1::access_field_patching_id); + reloc_type = relocInfo::section_word_type; + break; + case PatchingStub::load_klass_id: + target = Runtime1::entry_for(Runtime1::load_klass_patching_id); + reloc_type = relocInfo::metadata_type; + break; + case PatchingStub::load_mirror_id: + target = Runtime1::entry_for(Runtime1::load_mirror_patching_id); + reloc_type = relocInfo::oop_type; + break; + case PatchingStub::load_appendix_id: + target = Runtime1::entry_for(Runtime1::load_appendix_patching_id); + reloc_type = relocInfo::oop_type; + break; + default: ShouldNotReachHere(); + } + + __ bl(RuntimeAddress(target)); + add_call_info_here(info); +} - RelocationHolder rspec = oop_Relocation::spec(oop_index); - address const_ptr = int_constant(-1); - __ code()->consts()->relocate(const_ptr, rspec); - __ ldr_constant(reg, const_ptr); - patching_epilog(patch, lir_patch_normal, reg, info); +void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo *info) { + deoptimize_trap(info); } @@ -808,23 +814,21 @@ void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch PatchingStub* patch = NULL; Register compressed_src = rscratch1; + if (patch_code != lir_patch_none) { + deoptimize_trap(info); + return; + } + if (type == T_ARRAY || type == T_OBJECT) { __ verify_oop(src->as_register()); if (UseCompressedOops && !wide) { __ encode_heap_oop(compressed_src, src->as_register()); - if (patch_code != lir_patch_none) { - info->oop_map()->set_narrowoop(compressed_src->as_VMReg()); - } } else { compressed_src = src->as_register(); } } - if (patch_code != lir_patch_none) { - patch = new PatchingStub(_masm, PatchingStub::access_field_id); - } - int null_check_here = code_offset(); switch (type) { case T_FLOAT: { @@ -882,10 +886,6 @@ void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch if (info != NULL) { add_debug_info_for_null_check(null_check_here, info); } - - if (patch_code != lir_patch_none) { - patching_epilog(patch, patch_code, to_addr->base()->as_register(), info); - } } @@ -922,10 +922,31 @@ void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) { void LIR_Assembler::klass2reg_with_patching(Register reg, CodeEmitInfo* info) { - Metadata* o = NULL; - PatchingStub* patch = new PatchingStub(_masm, PatchingStub::load_klass_id); - __ mov_metadata(reg, o); - patching_epilog(patch, lir_patch_normal, reg, info); + address target = NULL; + relocInfo::relocType reloc_type = relocInfo::none; + + switch (patching_id(info)) { + case PatchingStub::access_field_id: + target = Runtime1::entry_for(Runtime1::access_field_patching_id); + reloc_type = relocInfo::section_word_type; + break; + case PatchingStub::load_klass_id: + target = Runtime1::entry_for(Runtime1::load_klass_patching_id); + reloc_type = relocInfo::metadata_type; + break; + case PatchingStub::load_mirror_id: + target = Runtime1::entry_for(Runtime1::load_mirror_patching_id); + reloc_type = relocInfo::oop_type; + break; + case PatchingStub::load_appendix_id: + target = Runtime1::entry_for(Runtime1::load_appendix_patching_id); + reloc_type = relocInfo::oop_type; + break; + default: ShouldNotReachHere(); + } + + __ bl(RuntimeAddress(target)); + add_call_info_here(info); } void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) { @@ -948,10 +969,9 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch __ verify_oop(addr->base()->as_pointer_register()); } - PatchingStub* patch = NULL; - if (patch_code != lir_patch_none) { - patch = new PatchingStub(_masm, PatchingStub::access_field_id); + deoptimize_trap(info); + return; } if (info != NULL) { @@ -1023,10 +1043,6 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch ShouldNotReachHere(); } - if (patch != NULL) { - patching_epilog(patch, patch_code, addr->base()->as_register(), info); - } - if (type == T_ARRAY || type == T_OBJECT) { #ifdef _LP64 if (UseCompressedOops && !wide) { diff --git a/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.hpp b/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.hpp index bf2d70320..5a3f551ed 100644 --- a/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.hpp +++ b/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.hpp @@ -64,6 +64,8 @@ void init() { tableswitch_count = 0; } + void deoptimize_trap(CodeEmitInfo *info); + public: void store_parameter(Register r, int offset_from_esp_in_words); diff --git a/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp b/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp index 9d89d2313..a5e3ae810 100644 --- a/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp +++ b/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp @@ -63,7 +63,7 @@ int StubAssembler::call_RT(Register oop_result1, Register metadata_result, addre set_last_Java_frame(sp, rfp, retaddr, rscratch1); // do the call - mov(rscratch1, RuntimeAddress(entry)); + lea(rscratch1, RuntimeAddress(entry)); blrt(rscratch1, args_size + 1, 8, 1); bind(retaddr); int call_offset = offset(); @@ -553,7 +553,7 @@ OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) { Label retaddr; __ set_last_Java_frame(sp, rfp, retaddr, rscratch1); // do the call - __ mov(rscratch1, RuntimeAddress(target)); + __ lea(rscratch1, RuntimeAddress(target)); __ blrt(rscratch1, 1, 0, 1); __ bind(retaddr); OopMapSet* oop_maps = new OopMapSet(); @@ -1321,19 +1321,6 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) { #undef __ -static Klass* resolve_field_return_klass(methodHandle caller, int bci, TRAPS) { - Bytecode_field field_access(caller, bci); - // This can be static or non-static field access - Bytecodes::Code code = field_access.code(); - - // We must load class, initialize class and resolvethe field - fieldDescriptor result; // initialize class if needed - constantPoolHandle constants(THREAD, caller->constants()); - LinkResolver::resolve_field_access(result, constants, field_access.index(), Bytecodes::java_code(code), CHECK_NULL); - return result.field_holder(); -} - - // Simple helper to see if the caller of a runtime stub which // entered the VM has been deoptimized @@ -1347,261 +1334,40 @@ static bool caller_is_deopted() { } JRT_ENTRY(void, Runtime1::patch_code_aarch64(JavaThread* thread, Runtime1::StubID stub_id )) - NOT_PRODUCT(_patch_code_slowcase_cnt++;) - - ResourceMark rm(thread); +{ RegisterMap reg_map(thread, false); - frame runtime_frame = thread->last_frame(); - frame caller_frame = runtime_frame.sender(®_map); - if (DeoptimizeWhenPatching) { - // According to the ARMv8 ARM, "Concurrent modification and - // execution of instructions can lead to the resulting instruction - // performing any behavior that can be achieved by executing any - // sequence of instructions that can be executed from the same - // Exception level, except where the instruction before - // modification and the instruction after modification is a B, BL, - // NOP, BKPT, SVC, HVC, or SMC instruction." - // - // This effectively makes the games we play when patching - // impossible, so when we come across an access that needs - // patching we must deoptimize. - - if (TracePatching) { - tty->print_cr("Deoptimizing because patch is needed"); - } - // It's possible the nmethod was invalidated in the last - // safepoint, but if it's still alive then make it not_entrant. - nmethod* nm = CodeCache::find_nmethod(caller_frame.pc()); - if (nm != NULL) { - nm->make_not_entrant(); - } - - Deoptimization::deoptimize_frame(thread, caller_frame.id()); - - // Return to the now deoptimized frame. - return; - } - - // last java frame on stack - vframeStream vfst(thread, true); - assert(!vfst.at_end(), "Java frame must exist"); - - methodHandle caller_method(THREAD, vfst.method()); - // Note that caller_method->code() may not be same as caller_code because of OSR's - // Note also that in the presence of inlining it is not guaranteed - // that caller_method() == caller_code->method() - - int bci = vfst.bci(); - Bytecodes::Code code = caller_method()->java_code_at(bci); - - bool deoptimize_for_volatile = false; - int patch_field_offset = -1; - KlassHandle init_klass(THREAD, NULL); // klass needed by load_klass_patching code - KlassHandle load_klass(THREAD, NULL); // klass needed by load_klass_patching code - Handle mirror(THREAD, NULL); // oop needed by load_mirror_patching code - fieldDescriptor result; // initialize class if needed - - bool load_klass_or_mirror_patch_id = - (stub_id == Runtime1::load_klass_patching_id || stub_id == Runtime1::load_mirror_patching_id); - - if (stub_id == Runtime1::access_field_patching_id) { - - Bytecode_field field_access(caller_method, bci); - fieldDescriptor result; // initialize class if needed - Bytecodes::Code code = field_access.code(); - constantPoolHandle constants(THREAD, caller_method->constants()); - LinkResolver::resolve_field_access(result, constants, field_access.index(), Bytecodes::java_code(code), CHECK); - patch_field_offset = result.offset(); - - // If we're patching a field which is volatile then at compile it - // must not have been known to be volatile, so the generated code - // isn't correct for a volatile reference. The nmethod has to be - // deoptimized so that the code can be regenerated correctly. - // This check is only needed for access_field_patching since this - // is the path for patching field offsets. load_klass is only - // used for patching references to oops which don't need special - // handling in the volatile case. - deoptimize_for_volatile = result.access_flags().is_volatile(); - } else if (load_klass_or_mirror_patch_id) { - Klass* k = NULL; - switch (code) { - case Bytecodes::_putstatic: - case Bytecodes::_getstatic: - { Klass* klass = resolve_field_return_klass(caller_method, bci, CHECK); - init_klass = KlassHandle(THREAD, klass); - mirror = Handle(THREAD, klass->java_mirror()); - } - break; - case Bytecodes::_new: - { Bytecode_new bnew(caller_method(), caller_method->bcp_from(bci)); - k = caller_method->constants()->klass_at(bnew.index(), CHECK); - } - break; - case Bytecodes::_multianewarray: - { Bytecode_multianewarray mna(caller_method(), caller_method->bcp_from(bci)); - k = caller_method->constants()->klass_at(mna.index(), CHECK); - } - break; - case Bytecodes::_instanceof: - { Bytecode_instanceof io(caller_method(), caller_method->bcp_from(bci)); - k = caller_method->constants()->klass_at(io.index(), CHECK); - } - break; - case Bytecodes::_checkcast: - { Bytecode_checkcast cc(caller_method(), caller_method->bcp_from(bci)); - k = caller_method->constants()->klass_at(cc.index(), CHECK); - } - break; - case Bytecodes::_anewarray: - { Bytecode_anewarray anew(caller_method(), caller_method->bcp_from(bci)); - Klass* ek = caller_method->constants()->klass_at(anew.index(), CHECK); - k = ek->array_klass(CHECK); - } - break; - case Bytecodes::_ldc: - case Bytecodes::_ldc_w: - { - Bytecode_loadconstant cc(caller_method, bci); - oop m = cc.resolve_constant(CHECK); - mirror = Handle(THREAD, m); - } - break; - default: Unimplemented(); - } - // convert to handle - load_klass = KlassHandle(THREAD, k); - } else { - ShouldNotReachHere(); + NOT_PRODUCT(_patch_code_slowcase_cnt++;) + // According to the ARMv8 ARM, "Concurrent modification and + // execution of instructions can lead to the resulting instruction + // performing any behavior that can be achieved by executing any + // sequence of instructions that can be executed from the same + // Exception level, except where the instruction before + // modification and the instruction after modification is a B, BL, + // NOP, BKPT, SVC, HVC, or SMC instruction." + // + // This effectively makes the games we play when patching + // impossible, so when we come across an access that needs + // patching we must deoptimize. + + if (TracePatching) { + tty->print_cr("Deoptimizing because patch is needed"); } - if (deoptimize_for_volatile) { - // At compile time we assumed the field wasn't volatile but after - // loading it turns out it was volatile so we have to throw the - // compiled code out and let it be regenerated. - if (TracePatching) { - tty->print_cr("Deoptimizing for patching volatile field reference"); - } - // It's possible the nmethod was invalidated in the last - // safepoint, but if it's still alive then make it not_entrant. - nmethod* nm = CodeCache::find_nmethod(caller_frame.pc()); - if (nm != NULL) { - nm->make_not_entrant(); - } - - Deoptimization::deoptimize_frame(thread, caller_frame.id()); - - // Return to the now deoptimized frame. - } + frame runtime_frame = thread->last_frame(); + frame caller_frame = runtime_frame.sender(®_map); - // If we are patching in a non-perm oop, make sure the nmethod - // is on the right list. - if (ScavengeRootsInCode && mirror.not_null() && mirror()->is_scavengable()) { - MutexLockerEx ml_code (CodeCache_lock, Mutex::_no_safepoint_check_flag); - nmethod* nm = CodeCache::find_nmethod(caller_frame.pc()); - guarantee(nm != NULL, "only nmethods can contain non-perm oops"); - if (!nm->on_scavenge_root_list()) - CodeCache::add_scavenge_root_nmethod(nm); + // It's possible the nmethod was invalidated in the last + // safepoint, but if it's still alive then make it not_entrant. + nmethod* nm = CodeCache::find_nmethod(caller_frame.pc()); + if (nm != NULL) { + nm->make_not_entrant(); } - // Now copy code back - { - MutexLockerEx ml_patch (Patching_lock, Mutex::_no_safepoint_check_flag); - // - // Deoptimization may have happened while we waited for the lock. - // In that case we don't bother to do any patching we just return - // and let the deopt happen - if (!caller_is_deopted()) { - NativeGeneralJump* jump = nativeGeneralJump_at(caller_frame.pc()); - address instr_pc = jump->jump_destination(); - NativeInstruction* ni = nativeInstruction_at(instr_pc); - if (ni->is_jump() ) { - // the jump has not been patched yet - address stub_location = caller_frame.pc() + PatchingStub::patch_info_offset(); - unsigned char* byte_count = (unsigned char*) (stub_location - 1); - unsigned char* byte_skip = (unsigned char*) (stub_location - 2); - unsigned char* being_initialized_entry_offset = (unsigned char*) (stub_location - 3); - address copy_buff = stub_location - *byte_skip - *byte_count; - address being_initialized_entry = stub_location - *being_initialized_entry_offset; - if (TracePatching) { - tty->print_cr(" Patching %s at bci %d at address " INTPTR_FORMAT " (%s)", Bytecodes::name(code), bci, - p2i(instr_pc), (stub_id == Runtime1::access_field_patching_id) ? "field" : "klass"); - nmethod* caller_code = CodeCache::find_nmethod(caller_frame.pc()); - assert(caller_code != NULL, "nmethod not found"); - - // NOTE we use pc() not original_pc() because we already know they are - // identical otherwise we'd have never entered this block of code - OopMap* map = caller_code->oop_map_for_return_address(caller_frame.pc()); - assert(map != NULL, "null check"); - map->print(); - tty->cr(); - - Disassembler::decode(copy_buff, copy_buff + *byte_count, tty); - } - - // The word in the constant pool needs fixing. - unsigned insn = *(unsigned*)copy_buff; - unsigned long *cpool_addr - = (unsigned long *)MacroAssembler::target_addr_for_insn(instr_pc, insn); - - nmethod* nm = CodeCache::find_nmethod(caller_frame.pc()); - CodeBlob *cb = CodeCache::find_blob(caller_frame.pc()); - assert(nm != NULL, "invalid nmethod_pc"); - assert(address(cpool_addr) >= nm->consts_begin() - && address(cpool_addr) < nm->consts_end(), - "constant address should be inside constant pool"); - - switch(stub_id) { - case access_field_patching_id: - *cpool_addr = patch_field_offset; break; - case load_mirror_patching_id: - *cpool_addr = cast_from_oop<uint64_t>(mirror()); break; - case load_klass_patching_id: - *cpool_addr = (uint64_t)load_klass(); break; - default: - ShouldNotReachHere(); - } - - // Update the location in the nmethod with the proper - // metadata. When the code was generated, a NULL was stuffed - // in the metadata table and that table needs to be update to - // have the right value. On intel the value is kept - // directly in the instruction instead of in the metadata - // table, so set_data above effectively updated the value. - // - // FIXME: It's tempting to think that rather them putting OOPs - // in the cpool we could refer directly to the locations in the - // nmethod. However, we can't guarantee that an ADRP would be - // able to reach them: an ADRP can only reach within +- 4GiB of - // the PC using two instructions. While it's pretty unlikely - // that we will exceed this limit, it's not impossible. - RelocIterator mds(nm, (address)cpool_addr, (address)cpool_addr + 1); - bool found = false; - while (mds.next() && !found) { - if (mds.type() == relocInfo::oop_type) { - assert(stub_id == Runtime1::load_mirror_patching_id, "wrong stub id"); - oop_Relocation* r = mds.oop_reloc(); - oop* oop_adr = r->oop_addr(); - *oop_adr = mirror(); - r->fix_oop_relocation(); - found = true; - } else if (mds.type() == relocInfo::metadata_type) { - assert(stub_id == Runtime1::load_klass_patching_id, "wrong stub id"); - metadata_Relocation* r = mds.metadata_reloc(); - Metadata** metadata_adr = r->metadata_addr(); - *metadata_adr = load_klass(); - r->fix_metadata_relocation(); - found = true; - } - } - - // And we overwrite the jump - NativeGeneralJump::replace_mt_safe(instr_pc, copy_buff); - - } - } - } + Deoptimization::deoptimize_frame(thread, caller_frame.id()); + // Return to the now deoptimized frame. +} JRT_END int Runtime1::access_field_patching(JavaThread* thread) { diff --git a/src/cpu/aarch64/vm/compiledIC_aarch64.cpp b/src/cpu/aarch64/vm/compiledIC_aarch64.cpp index c0076c8ce..f72977a28 100644 --- a/src/cpu/aarch64/vm/compiledIC_aarch64.cpp +++ b/src/cpu/aarch64/vm/compiledIC_aarch64.cpp @@ -139,6 +139,7 @@ void CompiledStaticCall::set_to_interpreted(methodHandle callee, address entry) // Update stub. method_holder->set_data((intptr_t)callee()); + method_holder->flush(); jump->set_jump_destination(entry); // Update jump to call. diff --git a/src/cpu/aarch64/vm/globals_aarch64.hpp b/src/cpu/aarch64/vm/globals_aarch64.hpp index b32553e22..9c2933452 100644 --- a/src/cpu/aarch64/vm/globals_aarch64.hpp +++ b/src/cpu/aarch64/vm/globals_aarch64.hpp @@ -94,9 +94,6 @@ define_pd_global(intx, InlineSmallCode, 1000); product(bool, NearCpool, true, \ "constant pool is close to instructions") \ \ - product(bool, DeoptimizeWhenPatching, true, \ - "doptimize instead of patching instructions") \ - \ notproduct(bool, UseAcqRelForVolatileFields, false, \ "Use acquire and release insns for volatile fields") @@ -114,9 +111,6 @@ define_pd_global(intx, InlineSmallCode, 1000); product(bool, NearCpool, true, \ "constant pool is close to instructions") \ \ - product(bool, DeoptimizeWhenPatching, true, \ - "doptimize instead of patching instructions") \ - \ notproduct(bool, UseAcqRelForVolatileFields, false, \ "Use acquire and release insns for volatile fields") \ product(bool, UseNeon, false, \ diff --git a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp index 0b308f2b1..3f9c85ef0 100644 --- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp +++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp @@ -65,6 +65,7 @@ #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") void MacroAssembler::pd_patch_instruction(address branch, address target) { + assert((uint64_t)target < (1ul << 48), "48-bit overflow in address constant"); long offset = (target - branch) >> 2; unsigned insn = *(unsigned*)branch; if ((Instruction_aarch64::extract(insn, 29, 24) & 0b111011) == 0b011000) { @@ -136,13 +137,15 @@ void MacroAssembler::pd_patch_instruction(address branch, address target) { offset >>= 2; Instruction_aarch64::spatch(branch, 23, 5, offset); Instruction_aarch64::patch(branch, 30, 29, offset_lo); - } else if (Instruction_aarch64::extract(insn, 31, 23) == 0b110100101) { - // Move wide constant + } else if (Instruction_aarch64::extract(insn, 31, 21) == 0b11010010100) { u_int64_t dest = (u_int64_t)target; + // Move wide constant + assert(nativeInstruction_at(branch+4)->is_movk(), "wrong insns in patch"); + assert(nativeInstruction_at(branch+8)->is_movk(), "wrong insns in patch"); Instruction_aarch64::patch(branch, 20, 5, dest & 0xffff); - Instruction_aarch64::patch(branch += 4, 20, 5, (dest >>= 16) & 0xffff); - Instruction_aarch64::patch(branch += 4, 20, 5, (dest >>= 16) & 0xffff); - Instruction_aarch64::patch(branch += 4, 20, 5, (dest >>= 16)); + Instruction_aarch64::patch(branch+4, 20, 5, (dest >>= 16) & 0xffff); + Instruction_aarch64::patch(branch+8, 20, 5, (dest >>= 16) & 0xffff); + assert(pd_call_destination(branch) == target, "should be"); } else if (Instruction_aarch64::extract(insn, 31, 22) == 0b1011100101 && Instruction_aarch64::extract(insn, 4, 0) == 0b11111) { // nothing to do @@ -152,6 +155,19 @@ void MacroAssembler::pd_patch_instruction(address branch, address target) { } } +void MacroAssembler::patch_oop(address insn_addr, address o) { + unsigned insn = *(unsigned*)insn_addr; + if (Instruction_aarch64::extract(insn, 31, 21) == 0b11010010101) { + // Move narrow constant + assert(nativeInstruction_at(insn_addr+4)->is_movk(), "wrong insns in patch"); + narrowOop n = oopDesc::encode_heap_oop((oop)o); + Instruction_aarch64::patch(insn_addr, 20, 5, n >> 16); + Instruction_aarch64::patch(insn_addr+4, 20, 5, n & 0xffff); + } else { + pd_patch_instruction(insn_addr, o); + } +} + address MacroAssembler::target_addr_for_insn(address insn_addr, unsigned insn) { long offset = 0; if ((Instruction_aarch64::extract(insn, 29, 24) & 0b011011) == 0b00011000) { @@ -216,14 +232,13 @@ address MacroAssembler::target_addr_for_insn(address insn_addr, unsigned insn) { ShouldNotReachHere(); } } else if (Instruction_aarch64::extract(insn, 31, 23) == 0b110100101) { - // Move wide constant - // FIXME: We assume these instructions are movz, movk, movk, movk. - // We don't assert this; we should. u_int32_t *insns = (u_int32_t *)insn_addr; + // Move wide constant: movz, movk, movk. See movptr(). + assert(nativeInstruction_at(insns+1)->is_movk(), "wrong insns in patch"); + assert(nativeInstruction_at(insns+2)->is_movk(), "wrong insns in patch"); return address(u_int64_t(Instruction_aarch64::extract(insns[0], 20, 5)) + (u_int64_t(Instruction_aarch64::extract(insns[1], 20, 5)) << 16) - + (u_int64_t(Instruction_aarch64::extract(insns[2], 20, 5)) << 32) - + (u_int64_t(Instruction_aarch64::extract(insns[3], 20, 5)) << 48)); + + (u_int64_t(Instruction_aarch64::extract(insns[2], 20, 5)) << 32)); } else if (Instruction_aarch64::extract(insn, 31, 22) == 0b1011100101 && Instruction_aarch64::extract(insn, 4, 0) == 0b11111) { return 0; @@ -607,9 +622,10 @@ void MacroAssembler::call(Address entry) { void MacroAssembler::ic_call(address entry) { RelocationHolder rh = virtual_call_Relocation::spec(pc()); - address const_ptr = long_constant((jlong)Universe::non_oop_word()); - unsigned long offset; - ldr_constant(rscratch2, const_ptr); + // address const_ptr = long_constant((jlong)Universe::non_oop_word()); + // unsigned long offset; + // ldr_constant(rscratch2, const_ptr); + movptr(rscratch2, (uintptr_t)Universe::non_oop_word()); call(Address(entry, rh)); } @@ -1246,10 +1262,14 @@ void MacroAssembler::mov(Register r, Address dest) { InstructionMark im(this); code_section()->relocate(inst_mark(), dest.rspec()); u_int64_t imm64 = (u_int64_t)dest.target(); - mov64(r, imm64); + movptr(r, imm64); } -void MacroAssembler::mov64(Register r, uintptr_t imm64) { +// Move a constant pointer into r. In AArch64 mode the virtual +// address space is 48 bits in size, so we only need three +// instructions to create a patchable instruction sequence that can +// reach anywhere. +void MacroAssembler::movptr(Register r, uintptr_t imm64) { #ifndef PRODUCT { char buffer[64]; @@ -1257,13 +1277,12 @@ void MacroAssembler::mov64(Register r, uintptr_t imm64) { block_comment(buffer); } #endif + assert(imm64 < (1ul << 48), "48-bit overflow in address constant"); movz(r, imm64 & 0xffff); imm64 >>= 16; movk(r, imm64 & 0xffff, 16); imm64 >>= 16; movk(r, imm64 & 0xffff, 32); - imm64 >>= 16; - movk(r, imm64 & 0xffff, 48); } void MacroAssembler::mov_immediate64(Register dst, u_int64_t imm64) @@ -2704,29 +2723,33 @@ void MacroAssembler::decode_klass_not_null(Register r) { decode_klass_not_null(r, r); } -// TODO -// -// these next two methods load a narrow oop or klass constant into a -// register. they currently do the dumb thing of installing 64 bits of -// unencoded constant into the register and then encoding it. -// installing the encoded 32 bit constant directly requires updating -// the relocation code so it can recognize that this is a 32 bit load -// rather than a 64 bit load. - void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { - assert (UseCompressedOops, "should only be used for compressed headers"); + assert (UseCompressedOops, "should only be used for compressed oops"); assert (Universe::heap() != NULL, "java heap should be initialized"); assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); - movoop(dst, obj); - encode_heap_oop_not_null(dst); -} + int oop_index = oop_recorder()->find_index(obj); + assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop"); + + InstructionMark im(this); + RelocationHolder rspec = oop_Relocation::spec(oop_index); + code_section()->relocate(inst_mark(), rspec); + movz(dst, 0xDEAD, 16); + movk(dst, 0xBEEF); +} void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { assert (UseCompressedClassPointers, "should only be used for compressed headers"); assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); - mov_metadata(dst, k); - encode_klass_not_null(dst); + int index = oop_recorder()->find_index(k); + assert(! Universe::heap()->is_in_reserved(k), "should not be an oop"); + + InstructionMark im(this); + RelocationHolder rspec = metadata_Relocation::spec(index); + code_section()->relocate(inst_mark(), rspec); + narrowKlass nk = Klass::encode_klass(k); + movz(dst, (nk >> 16), 16); + movk(dst, nk & 0xffff); } void MacroAssembler::load_heap_oop(Register dst, Address src) @@ -2736,7 +2759,7 @@ void MacroAssembler::load_heap_oop(Register dst, Address src) decode_heap_oop(dst); } else { ldr(dst, src); - } + } } void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) @@ -2948,7 +2971,11 @@ Address MacroAssembler::allocate_metadata_address(Metadata* obj) { return Address((address)obj, rspec); } -void MacroAssembler::movoop(Register dst, jobject obj) { +// Move an oop into a register. immediate is true if we want +// immediate instrcutions, i.e. we are not going to patch this +// instruction while the code is being executed by another thread. In +// that case we can use move immediates rather than the constant pool. +void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) { int oop_index; if (obj == NULL) { oop_index = oop_recorder()->allocate_oop_index(obj); @@ -2957,15 +2984,14 @@ void MacroAssembler::movoop(Register dst, jobject obj) { assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "should be real oop"); } RelocationHolder rspec = oop_Relocation::spec(oop_index); - address const_ptr = long_constant((jlong)obj); - if (! const_ptr) { + if (! immediate) { + address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address + ldr_constant(dst, Address(dummy, rspec)); + } else mov(dst, Address((address)obj, rspec)); - } else { - code()->consts()->relocate(const_ptr, rspec); - ldr_constant(dst, const_ptr); - } } +// Move a metadata address into a register. void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { int oop_index; if (obj == NULL) { @@ -2974,13 +3000,7 @@ void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { oop_index = oop_recorder()->find_index(obj); } RelocationHolder rspec = metadata_Relocation::spec(oop_index); - address const_ptr = long_constant((jlong)obj); - if (! const_ptr) { - mov(dst, Address((address)obj, rspec)); - } else { - code()->consts()->relocate(const_ptr, rspec); - ldr_constant(dst, const_ptr); - } + mov(dst, Address((address)obj, rspec)); } Address MacroAssembler::constant_oop_address(jobject obj) { @@ -3268,12 +3288,12 @@ address MacroAssembler::read_polling_page(Register r, relocInfo::relocType rtype void MacroAssembler::adrp(Register reg1, const Address &dest, unsigned long &byte_offset) { relocInfo::relocType rtype = dest.rspec().reloc()->type(); - guarantee(rtype == relocInfo::none - || rtype == relocInfo::external_word_type - || rtype == relocInfo::poll_type - || rtype == relocInfo::poll_return_type, - "can only use a fixed address with an ADRP"); if (labs(pc() - dest.target()) >= (1LL << 32)) { + guarantee(rtype == relocInfo::none + || rtype == relocInfo::external_word_type + || rtype == relocInfo::poll_type + || rtype == relocInfo::poll_return_type, + "can only use a fixed address with an ADRP"); // Out of range. This doesn't happen very often, but we have to // handle it mov(reg1, dest); diff --git a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp index c9a0ad21d..c3ebea34b 100644 --- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp +++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp @@ -406,6 +406,8 @@ private: int push(unsigned int bitset, Register stack); int pop(unsigned int bitset, Register stack); + void mov(Register dst, Address a); + public: int push(RegSet regs, Register stack) { if (regs.bits()) push(regs.bits(), stack); } int pop(RegSet regs, Register stack) { if (regs.bits()) pop(regs.bits(), stack); } @@ -438,8 +440,7 @@ public: mov(dst, (long)i); } - void mov(Register dst, Address a); - void mov64(Register r, uintptr_t imm64); + void movptr(Register r, uintptr_t imm64); // macro instructions for accessing and updating floating point // status register @@ -493,6 +494,8 @@ public: static void pd_print_patched_instruction(address branch); #endif + static void patch_oop(address insn_addr, address o); + // The following 4 methods return the offset of the appropriate move instruction // Support for fast byte/short loading with zero extension (depending on particular CPU) @@ -1111,7 +1114,7 @@ public: void pushoop(jobject obj); #endif - void movoop(Register dst, jobject obj); + void movoop(Register dst, jobject obj, bool immediate = false); // sign extend as need a l to ptr sized element void movl2ptr(Register dst, Address src) { Unimplemented(); } @@ -1253,13 +1256,12 @@ public: Label* retaddr = NULL ); - void ldr_constant(Register dest, address const_addr) { - guarantee(const_addr, "constant pool overflow"); + void ldr_constant(Register dest, const Address &const_addr) { if (NearCpool) { - ldr(dest, const_addr, relocInfo::internal_word_type); + ldr(dest, const_addr); } else { unsigned long offset; - adrp(dest, InternalAddress(const_addr), offset); + adrp(dest, InternalAddress(const_addr.target()), offset); ldr(dest, Address(dest, offset)); } } diff --git a/src/cpu/aarch64/vm/nativeInst_aarch64.cpp b/src/cpu/aarch64/vm/nativeInst_aarch64.cpp index 0ff5edf1c..30112d1df 100644 --- a/src/cpu/aarch64/vm/nativeInst_aarch64.cpp +++ b/src/cpu/aarch64/vm/nativeInst_aarch64.cpp @@ -53,13 +53,6 @@ void NativeCall::print() { Unimplemented(); } // Inserts a native call instruction at a given pc void NativeCall::insert(address code_pos, address entry) { Unimplemented(); } -// MT-safe patching of a call instruction. -// First patches first word of instruction to two jmp's that jmps to them -// selfs (spinlock). Then patches the last byte, and then atomicly replaces -// the jmp's with the first 4 byte of the new instruction. -void NativeCall::replace_mt_safe(address instr_addr, address code_buffer) { Unimplemented(); } - - void NativeMovConstReg::verify() { // make sure code pattern is actually mov reg64, imm64 instructions } @@ -83,7 +76,6 @@ void NativeMovConstReg::set_data(intptr_t x) { } }; - void NativeMovConstReg::print() { tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT, p2i(instruction_address()), data()); @@ -207,6 +199,14 @@ bool NativeInstruction::is_ldrw_to_zr(address instr) { Instruction_aarch64::extract(insn, 4, 0) == 0b11111); } +bool NativeInstruction::is_movz() { + return Instruction_aarch64::extract(int_at(0), 30, 23) == 0b10100101; +} + +bool NativeInstruction::is_movk() { + return Instruction_aarch64::extract(int_at(0), 30, 23) == 0b11100101; +} + // MT safe inserting of a jump over an unknown instruction sequence (used by nmethod::makeZombie) void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) { @@ -242,8 +242,7 @@ void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { // MT-safe patching of a long jump instruction. void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) { - assert((! DeoptimizeWhenPatching) - || nativeInstruction_at(instr_addr)->is_jump_or_nop(), + assert(nativeInstruction_at(instr_addr)->is_jump_or_nop(), "Aarch64 cannot replace non-jump with jump"); uint32_t instr = *(uint32_t*)code_buffer; *(uint32_t*)instr_addr = instr; diff --git a/src/cpu/aarch64/vm/nativeInst_aarch64.hpp b/src/cpu/aarch64/vm/nativeInst_aarch64.hpp index f9b485937..a84a768ed 100644 --- a/src/cpu/aarch64/vm/nativeInst_aarch64.hpp +++ b/src/cpu/aarch64/vm/nativeInst_aarch64.hpp @@ -65,6 +65,8 @@ class NativeInstruction VALUE_OBJ_CLASS_SPEC { inline bool is_cond_jump(); bool is_safepoint_poll(); inline bool is_mov_literal64(); + bool is_movz(); + bool is_movk(); protected: address addr_at(int offset) const { return address(this) + offset; } @@ -105,11 +107,12 @@ class NativeInstruction VALUE_OBJ_CLASS_SPEC { }; inline NativeInstruction* nativeInstruction_at(address address) { - NativeInstruction* inst = (NativeInstruction*)address; -#ifdef ASSERT - //inst->verify(); -#endif - return inst; + return (NativeInstruction*)address; +} + +// The natural type of an AArch64 instruction is uint32_t +inline NativeInstruction* nativeInstruction_at(uint32_t *address) { + return (NativeInstruction*)address; } inline NativeCall* nativeCall_at(address address); @@ -199,29 +202,37 @@ inline NativeCall* nativeCall_before(address return_address) { return call; } -// An interface for accessing/manipulating native mov reg, imm32 instructions. -// (used to manipulate inlined 32bit data dll calls, etc.) +// An interface for accessing/manipulating native mov reg, imm instructions. +// (used to manipulate inlined 64-bit data calls, etc.) class NativeMovConstReg: public NativeInstruction { public: enum Aarch64_specific_constants { - instruction_size = 4 * 4, + instruction_size = 3 * 4, // movz, movk, movk. See movptr(). instruction_offset = 0, displacement_offset = 0, }; address instruction_address() const { return addr_at(instruction_offset); } address next_instruction_address() const { - if (is_adrp_at(instruction_address())) + if (nativeInstruction_at(instruction_address())->is_movz()) + // Assume movz, movk, movk + return addr_at(instruction_size); + else if (is_adrp_at(instruction_address())) return addr_at(2*4); else if (is_ldr_literal_at(instruction_address())) return(addr_at(4)); - else - return addr_at(instruction_size); + assert(false, "Unknown instruction in NativeMovConstReg"); } intptr_t data() const; void set_data(intptr_t x); + void flush() { + if (! maybe_cpool_ref(instruction_address())) { + ICache::invalidate_range(instruction_address(), instruction_size); + } + } + void verify(); void print(); diff --git a/src/cpu/aarch64/vm/relocInfo_aarch64.cpp b/src/cpu/aarch64/vm/relocInfo_aarch64.cpp index 28d2c6f21..12c5bc974 100644 --- a/src/cpu/aarch64/vm/relocInfo_aarch64.cpp +++ b/src/cpu/aarch64/vm/relocInfo_aarch64.cpp @@ -33,10 +33,25 @@ void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { - MacroAssembler::pd_patch_instruction(addr(), x); + switch(type()) { + case relocInfo::oop_type: + { + oop_Relocation *reloc = (oop_Relocation *)this; + if (NativeInstruction::is_ldr_literal_at(addr())) { + address constptr = (address)code()->oop_addr_at(reloc->oop_index()); + MacroAssembler::pd_patch_instruction(addr(), constptr); + assert(*(address*)constptr == x, "error in oop relocation"); + } else{ + MacroAssembler::patch_oop(addr(), x); + } + } + break; + default: + MacroAssembler::pd_patch_instruction(addr(), x); + break; + } } - address Relocation::pd_call_destination(address orig_addr) { if (orig_addr != NULL) { return MacroAssembler::pd_call_destination(orig_addr); @@ -75,78 +90,3 @@ void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, Co void metadata_Relocation::pd_fix_value(address x) { } - -// We have a relocation that points to a pair of instructions that -// load a constant from the constant pool. These are -// ARDP; LDR reg [reg, #ofs]. However, until the constant is resolved -// the first instruction may be a branch to a resolver stub, and the -// resolver stub contains a copy of the ADRP that will replace the -// branch instruction. -// -// So, when we relocate this code we have to adjust the offset in the -// LDR instruction and the page offset in the copy of the ADRP -// instruction that will overwrite the branch instruction. This is -// done by Runtime1::patch_code_aarch64. - -void section_word_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { - unsigned insn1 = *(unsigned*)addr(); - if (! (Instruction_aarch64::extract(insn1, 30, 26) == 0b00101)) { - // Unconditional branch (immediate) - internal_word_Relocation::fix_relocation_after_move(src, dest); - return; - } - - address new_address = target(); -#ifdef ASSERT - // Make sure this really is a cpool address - address old_cpool_start = const_cast<CodeBuffer*>(src)->consts()->start(); - address old_cpool_end = const_cast<CodeBuffer*>(src)->consts()->end(); - address new_cpool_start = const_cast<CodeBuffer*>(dest)->consts()->start(); - address new_cpool_end = const_cast<CodeBuffer*>(dest)->consts()->end(); - address old_address = old_addr_for(target(), src, dest); - assert(new_address >= new_cpool_start - && new_address < new_cpool_end, - "should be"); - assert(old_address >= old_cpool_start - && old_address < old_cpool_end, - "should be"); -#endif - - address stub_location = pd_call_destination(addr()); - unsigned char* byte_count = (unsigned char*) (stub_location - 1); - unsigned char* byte_skip = (unsigned char*) (stub_location - 2); - address copy_buff = stub_location - *byte_skip - *byte_count; - unsigned insn3 = *(unsigned*)copy_buff; - - if (NearCpool) { - int offset = new_address - addr(); - Instruction_aarch64::spatch(copy_buff, 23, 5, offset >> 2); - } else { - // Unconditional branch (immediate) - unsigned insn2 = ((unsigned*)addr())[1]; - if (Instruction_aarch64::extract(insn2, 29, 24) == 0b111001) { - // Load/store register (unsigned immediate) - unsigned size = Instruction_aarch64::extract(insn2, 31, 30); - - // Offset of address in a 4k page - uint64_t new_offset = (uint64_t)target() & ((1<<12) - 1); - // Fix the LDR instruction's offset - Instruction_aarch64::patch(addr() + sizeof (unsigned), - 21, 10, new_offset >> size); - - assert(Instruction_aarch64::extract(insn3, 28, 24) == 0b10000 - && Instruction_aarch64::extract(insn3, 31, 31), - "instruction should be an ADRP"); - - uint64_t insn_page = (uint64_t)addr() >> 12; - uint64_t target_page = (uint64_t)target() >> 12; - int page_offset = target_page - insn_page; - int page_offset_lo = page_offset & 3; - page_offset >>= 2; - Instruction_aarch64::spatch(copy_buff, 23, 5, page_offset); - Instruction_aarch64::patch(copy_buff, 30, 29, page_offset_lo); - - // Phew. - } - } -} diff --git a/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp b/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp index 87757db68..61d11f09f 100644 --- a/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp +++ b/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp @@ -316,7 +316,7 @@ static void patch_callers_callsite(MacroAssembler *masm) { __ mov(c_rarg0, rmethod); __ mov(c_rarg1, lr); - __ mov(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite))); + __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite))); __ blrt(rscratch1, 2, 0, 0); __ pop_CPU_state(); @@ -1168,7 +1168,7 @@ static void rt_call(MacroAssembler* masm, address dest, int gpargs, int fpargs, } else { assert((unsigned)gpargs < 256, "eek!"); assert((unsigned)fpargs < 32, "eek!"); - __ mov(rscratch1, RuntimeAddress(dest)); + __ lea(rscratch1, RuntimeAddress(dest)); __ mov(rscratch2, (gpargs << 6) | (fpargs << 2) | type); __ blrt(rscratch1, rscratch2); // __ blrt(rscratch1, gpargs, fpargs, type); @@ -1735,7 +1735,9 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, if (method->is_static() && !is_critical_native) { // load oop into a register - __ movoop(oop_handle_reg, JNIHandles::make_local(method->method_holder()->java_mirror())); + __ movoop(oop_handle_reg, + JNIHandles::make_local(method->method_holder()->java_mirror()), + /*immediate*/true); // Now handlize the static class mirror it's known not-null. __ str(oop_handle_reg, Address(sp, klass_offset)); @@ -1965,9 +1967,9 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); #endif if (!is_critical_native) { - __ mov(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans))); + __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans))); } else { - __ mov(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition))); + __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition))); } __ blrt(rscratch1, 1, 0, 1); // Restore any method result value @@ -2388,7 +2390,7 @@ void SharedRuntime::generate_deopt_blob() { } #endif // ASSERT __ mov(c_rarg0, rthread); - __ mov(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info))); + __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info))); __ blrt(rscratch1, 1, 0, 1); __ bind(retaddr); @@ -2518,7 +2520,7 @@ void SharedRuntime::generate_deopt_blob() { __ mov(c_rarg0, rthread); __ movw(c_rarg1, rcpool); // second arg: exec_mode - __ mov(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames))); + __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames))); __ blrt(rscratch1, 2, 0, 0); // Set an oopmap for the call site @@ -2871,7 +2873,7 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const cha __ set_last_Java_frame(sp, noreg, retaddr, rscratch1); __ mov(c_rarg0, rthread); - __ mov(rscratch1, RuntimeAddress(destination)); + __ lea(rscratch1, RuntimeAddress(destination)); __ blrt(rscratch1, 1, 0, 1); __ bind(retaddr); diff --git a/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp b/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp index dd9523dc3..a669c1ce5 100644 --- a/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp +++ b/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp @@ -754,10 +754,10 @@ class StubGenerator: public StubCodeGenerator { // Stack after saving c_rarg3: // [tos + 0]: saved c_rarg3 // [tos + 1]: saved c_rarg2 - // [tos + 2]: saved rscratch2 - // [tos + 3]: saved lr - // [tos + 4]: saved rscratch1 - // [tos + 5]: saved r0 + // [tos + 2]: saved lr + // [tos + 3]: saved rscratch2 + // [tos + 4]: saved r0 + // [tos + 5]: saved rscratch1 address generate_verify_oop() { StubCodeMark mark(this, "StubRoutines", "verify_oop"); @@ -765,9 +765,6 @@ class StubGenerator: public StubCodeGenerator { Label exit, error; - // __ pushf(); - // __ push(r12); - // save c_rarg2 and c_rarg3 __ stp(c_rarg3, c_rarg2, Address(__ pre(sp, -16))); @@ -807,21 +804,15 @@ class StubGenerator: public StubCodeGenerator { __ push(RegSet::range(r0, r29), sp); // debug(char* msg, int64_t pc, int64_t regs[]) - __ ldr(c_rarg0, Address(sp, rscratch1->encoding())); // pass address of error message - __ mov(c_rarg1, Address(sp, lr)); // pass return address - __ mov(c_rarg2, sp); // pass address of regs on stack + __ mov(c_rarg0, rscratch1); // pass address of error message + __ mov(c_rarg1, lr); // pass return address + __ mov(c_rarg2, sp); // pass address of regs on stack #ifndef PRODUCT assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); #endif BLOCK_COMMENT("call MacroAssembler::debug"); __ mov(rscratch1, CAST_FROM_FN_PTR(address, MacroAssembler::debug64)); __ blrt(rscratch1, 3, 0, 1); - __ pop(RegSet::range(r0, r29), sp); - - __ ldp(rscratch2, lr, Address(__ post(sp, 2 * wordSize))); - __ ldp(r0, rscratch1, Address(__ post(sp, 2 * wordSize))); - - __ ret(lr); return start; } diff --git a/src/cpu/aarch64/vm/templateTable_aarch64.cpp b/src/cpu/aarch64/vm/templateTable_aarch64.cpp index 4c8fab47b..d2e717e0f 100644 --- a/src/cpu/aarch64/vm/templateTable_aarch64.cpp +++ b/src/cpu/aarch64/vm/templateTable_aarch64.cpp @@ -2431,7 +2431,7 @@ void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is // we take the time to call into the VM. Label L1; assert_different_registers(cache, index, r0); - __ mov(rscratch1, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr())); + __ lea(rscratch1, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr())); __ ldrw(r0, Address(rscratch1)); __ cbz(r0, L1); diff --git a/src/os_cpu/linux_aarch64/vm/os_linux_aarch64.cpp b/src/os_cpu/linux_aarch64/vm/os_linux_aarch64.cpp index ecd0d51d9..3912f0276 100644 --- a/src/os_cpu/linux_aarch64/vm/os_linux_aarch64.cpp +++ b/src/os_cpu/linux_aarch64/vm/os_linux_aarch64.cpp @@ -100,7 +100,7 @@ char* os::non_memory_address_word() { // even in its subfields (as defined by the CPU immediate fields, // if the CPU splits constants across multiple instructions). - return (char*) -1; + return (char*) 0xffffffffffff; } void os::initialize_thread(Thread *thr) { diff --git a/src/share/vm/code/relocInfo.cpp b/src/share/vm/code/relocInfo.cpp index e5fac02c8..4d09a3276 100644 --- a/src/share/vm/code/relocInfo.cpp +++ b/src/share/vm/code/relocInfo.cpp @@ -147,11 +147,6 @@ void RelocIterator::initialize(nmethod* nm, address begin, address limit) { _section_end [CodeBuffer::SECT_STUBS ] = nm->stub_end() ; assert(!has_current(), "just checking"); -#ifndef TARGET_ARCH_aarch64 - // aarch64 has relocs in the cpool - assert(begin == NULL || begin >= nm->code_begin(), "in bounds"); - assert(limit == NULL || limit <= nm->code_end(), "in bounds"); -#endif set_limits(begin, limit); } diff --git a/src/share/vm/code/relocInfo.hpp b/src/share/vm/code/relocInfo.hpp index db9a14437..ad55a2fd9 100644 --- a/src/share/vm/code/relocInfo.hpp +++ b/src/share/vm/code/relocInfo.hpp @@ -1307,10 +1307,6 @@ class section_word_Relocation : public internal_word_Relocation { //void pack_data_to -- inherited void unpack_data(); -#ifdef TARGET_ARCH_aarch64 - void fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest); -#endif - private: friend class RelocIterator; section_word_Relocation() { } |