diff options
author | aph <none@none> | 2014-07-22 08:45:24 -0400 |
---|---|---|
committer | aph <none@none> | 2014-07-22 08:45:24 -0400 |
commit | 3965f786fc6334099e4278d1c298fe9955ad49f1 (patch) | |
tree | 17ff6f1f2a9144c7b9dff2cd4ac27167a3a266fa | |
parent | c25c392c6fadc9264361b95ea81f5bd8eaa146c5 (diff) |
Remove "v_" prefixes from all SIMD instructions.
-rw-r--r-- | src/cpu/aarch64/vm/assembler_aarch64.hpp | 116 | ||||
-rw-r--r-- | src/cpu/aarch64/vm/macroAssembler_aarch64.cpp | 212 | ||||
-rw-r--r-- | src/cpu/aarch64/vm/macroAssembler_aarch64.hpp | 2 | ||||
-rw-r--r-- | src/cpu/aarch64/vm/stubGenerator_aarch64.cpp | 388 |
4 files changed, 360 insertions, 358 deletions
diff --git a/src/cpu/aarch64/vm/assembler_aarch64.hpp b/src/cpu/aarch64/vm/assembler_aarch64.hpp index a39e8c840..ac7649d0d 100644 --- a/src/cpu/aarch64/vm/assembler_aarch64.hpp +++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp @@ -1857,14 +1857,14 @@ public: S32, D64, Q128 }; -void v_ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, int op1, int op2) +void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, int op1, int op2) { starti; f(0,31), f((int)T & 1, 30); f(op1, 29, 21), f(0, 20, 16), f(op2, 15, 12); f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); } -void v_ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, +void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, int imm, int op1, int op2) { starti; @@ -1872,7 +1872,7 @@ void v_ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, f(op1 | 0b100, 29, 21), f(0b11111, 20, 16), f(op2, 15, 12); f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0); } -void v_ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, +void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, Register Xm, int op1, int op2) { starti; @@ -1883,90 +1883,90 @@ void v_ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, #define INSN1(NAME, op1, op2) \ void NAME(FloatRegister Vt, SIMD_Arrangement T, Register Xn) { \ - v_ld_st(Vt, T, Xn, op1, op2); \ + ld_st(Vt, T, Xn, op1, op2); \ } \ void NAME(FloatRegister Vt, SIMD_Arrangement T, Register Xn, int imm) { \ - v_ld_st(Vt, T, Xn, imm, op1, op2); \ + ld_st(Vt, T, Xn, imm, op1, op2); \ } \ void NAME(FloatRegister Vt, SIMD_Arrangement T, Register Xn, Register Xm) { \ - v_ld_st(Vt, T, Xn, Xm, op1, op2); \ + ld_st(Vt, T, Xn, Xm, op1, op2); \ } #define INSN2(NAME, op1, op2) \ void NAME(FloatRegister Vt, FloatRegister Vt2, SIMD_Arrangement T, Register Xn) { \ assert(Vt->successor() == Vt2, "Registers must be ordered"); \ - v_ld_st(Vt, T, Xn, op1, op2); \ + ld_st(Vt, T, Xn, op1, op2); \ } \ void NAME(FloatRegister Vt, FloatRegister Vt2, SIMD_Arrangement T, Register Xn, \ int imm) { \ assert(Vt->successor() == Vt2, "Registers must be ordered"); \ - v_ld_st(Vt, T, Xn, imm, op1, op2); \ + ld_st(Vt, T, Xn, imm, op1, op2); \ } \ void NAME(FloatRegister Vt, FloatRegister Vt2, SIMD_Arrangement T, Register Xn, \ Register Xm) { \ assert(Vt->successor() == Vt2, "Registers must be ordered"); \ - v_ld_st(Vt, T, Xn, Xm, op1, op2); \ + ld_st(Vt, T, Xn, Xm, op1, op2); \ } #define INSN3(NAME, op1, op2) \ void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, \ SIMD_Arrangement T, Register Xn) { \ assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3, \ "Registers must be ordered"); \ - v_ld_st(Vt, T, Xn, op1, op2); \ + ld_st(Vt, T, Xn, op1, op2); \ } \ void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, \ SIMD_Arrangement T, Register Xn, int imm) { \ assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3, \ "Registers must be ordered"); \ - v_ld_st(Vt, T, Xn, imm, op1, op2); \ + ld_st(Vt, T, Xn, imm, op1, op2); \ } \ void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, \ SIMD_Arrangement T, Register Xn, Register Xm) { \ assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3, \ "Registers must be ordered"); \ - v_ld_st(Vt, T, Xn, Xm, op1, op2); \ + ld_st(Vt, T, Xn, Xm, op1, op2); \ } #define INSN4(NAME, op1, op2) \ void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, \ FloatRegister Vt4, SIMD_Arrangement T, Register Xn) { \ assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3 && \ Vt3->successor() == Vt4, "Registers must be ordered"); \ - v_ld_st(Vt, T, Xn, op1, op2); \ + ld_st(Vt, T, Xn, op1, op2); \ } \ void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, \ FloatRegister Vt4, SIMD_Arrangement T, Register Xn, int imm) { \ assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3 && \ Vt3->successor() == Vt4, "Registers must be ordered"); \ - v_ld_st(Vt, T, Xn, imm, op1, op2); \ + ld_st(Vt, T, Xn, imm, op1, op2); \ } \ void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, \ FloatRegister Vt4, SIMD_Arrangement T, Register Xn, Register Xm) { \ assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3 && \ Vt3->successor() == Vt4, "Registers must be ordered"); \ - v_ld_st(Vt, T, Xn, Xm, op1, op2); \ + ld_st(Vt, T, Xn, Xm, op1, op2); \ } - INSN1(v_ld1, 0b001100010, 0b0111); - INSN2(v_ld1, 0b001100010, 0b1010); - INSN3(v_ld1, 0b001100010, 0b0110); - INSN4(v_ld1, 0b001100010, 0b0010); + INSN1(ld1, 0b001100010, 0b0111); + INSN2(ld1, 0b001100010, 0b1010); + INSN3(ld1, 0b001100010, 0b0110); + INSN4(ld1, 0b001100010, 0b0010); - INSN2(v_ld2, 0b001100010, 0b1000); - INSN3(v_ld3, 0b001100010, 0b0100); - INSN4(v_ld4, 0b001100010, 0b0000); + INSN2(ld2, 0b001100010, 0b1000); + INSN3(ld3, 0b001100010, 0b0100); + INSN4(ld4, 0b001100010, 0b0000); - INSN1(v_st1, 0b001100000, 0b0111); - INSN2(v_st1, 0b001100000, 0b1010); - INSN3(v_st1, 0b001100000, 0b0110); - INSN4(v_st1, 0b001100000, 0b0010); + INSN1(st1, 0b001100000, 0b0111); + INSN2(st1, 0b001100000, 0b1010); + INSN3(st1, 0b001100000, 0b0110); + INSN4(st1, 0b001100000, 0b0010); - INSN2(v_st2, 0b001100000, 0b1000); - INSN3(v_st3, 0b001100000, 0b0100); - INSN4(v_st4, 0b001100000, 0b0000); + INSN2(st2, 0b001100000, 0b1000); + INSN3(st3, 0b001100000, 0b0100); + INSN4(st4, 0b001100000, 0b0000); - INSN1(v_ld1r, 0b001101010, 0b1100); - INSN2(v_ld2r, 0b001101011, 0b1100); - INSN3(v_ld3r, 0b001101010, 0b1110); - INSN4(v_ld4r, 0b001101011, 0b1110); + INSN1(ld1r, 0b001101010, 0b1100); + INSN2(ld2r, 0b001101011, 0b1100); + INSN3(ld3r, 0b001101010, 0b1110); + INSN4(ld4r, 0b001101011, 0b1110); #undef INSN1 #undef INSN2 @@ -1981,14 +1981,14 @@ void v_ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, rf(Vm, 16), f(0b000111, 15, 10), rf(Vn, 5), rf(Vd, 0); \ } - INSN(v_eor, 0b101110001); - INSN(v_orr, 0b001110101); - INSN(v_and, 0b001110001); - INSN(v_bic, 0b001110011); - INSN(v_bif, 0b101110111); - INSN(v_bit, 0b101110101); - INSN(v_bsl, 0b101110011); - INSN(v_orn, 0b001110111); + INSN(eor, 0b101110001); + INSN(orr, 0b001110101); + INSN(andr, 0b001110001); + INSN(bic, 0b001110011); + INSN(bif, 0b101110111); + INSN(bit, 0b101110101); + INSN(bsl, 0b101110011); + INSN(orn, 0b001110111); #undef INSN @@ -1998,14 +1998,14 @@ void v_ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, f(opc, 31, 10), rf(Vn, 5), rf(Vd, 0); \ } - INSN(v_aese, 0b0100111000101000010010); - INSN(v_aesd, 0b0100111000101000010110); - INSN(v_aesmc, 0b0100111000101000011010); - INSN(v_aesimc, 0b0100111000101000011110); + INSN(aese, 0b0100111000101000010010); + INSN(aesd, 0b0100111000101000010110); + INSN(aesmc, 0b0100111000101000011010); + INSN(aesimc, 0b0100111000101000011110); #undef INSN - void v_shl(FloatRegister Vd, FloatRegister Vn, SIMD_Arrangement T, int shift){ + void shl(FloatRegister Vd, FloatRegister Vn, SIMD_Arrangement T, int shift){ starti; /* The encodings for the immh:immb fields (bits 22:16) are * 0001 xxx 8B/16B, shift = xxx @@ -2018,7 +2018,7 @@ void v_ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, f(0b010101, 15, 10), rf(Vn, 5), rf(Vd, 0); } - void v_ushll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { + void ushll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { starti; /* The encodings for the immh:immb fields (bits 22:16) are * 0001 xxx 8H, 8B/16b shift = xxx @@ -2031,22 +2031,22 @@ void v_ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, f(0, 31), f(Tb & 1, 30), f(0b1011110, 29, 23), f((1 << ((Tb>>1)+3))|shift, 22, 16); f(0b101001, 15, 10), rf(Vn, 5), rf(Vd, 0); } - void v_ushll2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { - v_ushll(Vd, Ta, Vn, Tb, shift); + void ushll2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { + ushll(Vd, Ta, Vn, Tb, shift); } - void v_uzp1(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement T, int op = 0){ + void uzp1(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement T, int op = 0){ starti; f(0, 31), f((T & 0x1), 30), f(0b001110, 29, 24), f((T >> 1), 23, 22), f(0, 21); rf(Vm, 16), f(0, 15), f(op, 14), f(0b0110, 13, 10), rf(Vn, 5), rf(Vd, 0); } - void v_uzp2(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement T){ - v_uzp1(Vd, Vn, Vm, T, 1); + void uzp2(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement T){ + uzp1(Vd, Vn, Vm, T, 1); } // Move from general purpose register // mov Vd.T[index], Rn - void v_mov(FloatRegister Vd, SIMD_Arrangement T, int index, Register Xn) { + void mov(FloatRegister Vd, SIMD_Arrangement T, int index, Register Xn) { starti; f(0b01001110000, 31, 21), f(((1 << (T >> 1)) | (index << ((T >> 1) + 1))), 20, 16); f(0b000111, 15, 10), rf(Xn, 5), rf(Vd, 0); @@ -2054,7 +2054,7 @@ void v_ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, // Move to general purpose register // mov Rd, Vn.T[index] - void v_mov(Register Xd, FloatRegister Vn, SIMD_Arrangement T, int index) { + void mov(Register Xd, FloatRegister Vn, SIMD_Arrangement T, int index) { starti; f(0, 31), f((T >= T1D) ? 1:0, 30), f(0b001110000, 29, 21); f(((1 << (T >> 1)) | (index << ((T >> 1) + 1))), 20, 16); @@ -2062,17 +2062,17 @@ void v_ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, } // We do not handle the 1Q arrangement. - void v_pmull(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) { + void pmull(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) { starti; assert(Ta == T8H && (Tb == T8B || Tb == T16B), "Invalid Size specifier"); f(0, 31), f(Tb & 1, 30), f(0b001110001, 29, 21), rf(Vm, 16), f(0b111000, 15, 10); rf(Vn, 5), rf(Vd, 0); } - void v_pmull2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) { - v_pmull(Vd, Ta, Vn, Vm, Tb); + void pmull2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) { + pmull(Vd, Ta, Vn, Vm, Tb); } - void v_rev32(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) + void rev32(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { starti; assert(T <= T8H, "must be one of T8B, T16B, T4H, T8H"); diff --git a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp index 722e20632..5a91d5d4f 100644 --- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp +++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp @@ -2239,131 +2239,131 @@ void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, if (UseNeon) { cmp(len, 64); br(Assembler::LT, L_by16); - v_eor(v16, T16B, v16, v16); + eor(v16, T16B, v16, v16); Label L_fold; add(tmp, table0, 4*256*sizeof(juint)); // Point at the Neon constants - v_ld1(v0, v1, T2D, buf, 32); - v_ld1r(v4, T2D, tmp, 8); - v_ld1r(v5, T2D, tmp, 8); - v_ld1r(v6, T2D, tmp, 8); - v_ld1r(v7, T2D, tmp, 8); - v_mov(v16, T4S, 0, crc); + ld1(v0, v1, T2D, buf, 32); + ld1r(v4, T2D, tmp, 8); + ld1r(v5, T2D, tmp, 8); + ld1r(v6, T2D, tmp, 8); + ld1r(v7, T2D, tmp, 8); + mov(v16, T4S, 0, crc); - v_eor(v0, T16B, v0, v16); + eor(v0, T16B, v0, v16); sub(len, len, 64); BIND(L_fold); - v_pmull(v22, T8H, v0, v5, T8B); - v_pmull(v20, T8H, v0, v7, T8B); - v_pmull(v23, T8H, v0, v4, T8B); - v_pmull(v21, T8H, v0, v6, T8B); - - v_pmull2(v18, T8H, v0, v5, T16B); - v_pmull2(v16, T8H, v0, v7, T16B); - v_pmull2(v19, T8H, v0, v4, T16B); - v_pmull2(v17, T8H, v0, v6, T16B); - - v_uzp1(v24, v20, v22, T8H); - v_uzp2(v25, v20, v22, T8H); - v_eor(v20, T16B, v24, v25); - - v_uzp1(v26, v16, v18, T8H); - v_uzp2(v27, v16, v18, T8H); - v_eor(v16, T16B, v26, v27); - - v_ushll2(v22, T4S, v20, T8H, 8); - v_ushll(v20, T4S, v20, T4H, 8); - - v_ushll2(v18, T4S, v16, T8H, 8); - v_ushll(v16, T4S, v16, T4H, 8); - - v_eor(v22, T16B, v23, v22); - v_eor(v18, T16B, v19, v18); - v_eor(v20, T16B, v21, v20); - v_eor(v16, T16B, v17, v16); - - v_uzp1(v17, v16, v20, T2D); - v_uzp2(v21, v16, v20, T2D); - v_eor(v17, T16B, v17, v21); - - v_ushll2(v20, T2D, v17, T4S, 16); - v_ushll(v16, T2D, v17, T2S, 16); - - v_eor(v20, T16B, v20, v22); - v_eor(v16, T16B, v16, v18); - - v_uzp1(v17, v20, v16, T2D); - v_uzp2(v21, v20, v16, T2D); - v_eor(v28, T16B, v17, v21); - - v_pmull(v22, T8H, v1, v5, T8B); - v_pmull(v20, T8H, v1, v7, T8B); - v_pmull(v23, T8H, v1, v4, T8B); - v_pmull(v21, T8H, v1, v6, T8B); - - v_pmull2(v18, T8H, v1, v5, T16B); - v_pmull2(v16, T8H, v1, v7, T16B); - v_pmull2(v19, T8H, v1, v4, T16B); - v_pmull2(v17, T8H, v1, v6, T16B); - - v_ld1(v0, v1, T2D, buf, 32); - - v_uzp1(v24, v20, v22, T8H); - v_uzp2(v25, v20, v22, T8H); - v_eor(v20, T16B, v24, v25); - - v_uzp1(v26, v16, v18, T8H); - v_uzp2(v27, v16, v18, T8H); - v_eor(v16, T16B, v26, v27); - - v_ushll2(v22, T4S, v20, T8H, 8); - v_ushll(v20, T4S, v20, T4H, 8); - - v_ushll2(v18, T4S, v16, T8H, 8); - v_ushll(v16, T4S, v16, T4H, 8); - - v_eor(v22, T16B, v23, v22); - v_eor(v18, T16B, v19, v18); - v_eor(v20, T16B, v21, v20); - v_eor(v16, T16B, v17, v16); - - v_uzp1(v17, v16, v20, T2D); - v_uzp2(v21, v16, v20, T2D); - v_eor(v16, T16B, v17, v21); - - v_ushll2(v20, T2D, v16, T4S, 16); - v_ushll(v16, T2D, v16, T2S, 16); - - v_eor(v20, T16B, v22, v20); - v_eor(v16, T16B, v16, v18); - - v_uzp1(v17, v20, v16, T2D); - v_uzp2(v21, v20, v16, T2D); - v_eor(v20, T16B, v17, v21); - - v_shl(v16, v28, T2D, 1); - v_shl(v17, v20, T2D, 1); - - v_eor(v0, T16B, v0, v16); - v_eor(v1, T16B, v1, v17); + pmull(v22, T8H, v0, v5, T8B); + pmull(v20, T8H, v0, v7, T8B); + pmull(v23, T8H, v0, v4, T8B); + pmull(v21, T8H, v0, v6, T8B); + + pmull2(v18, T8H, v0, v5, T16B); + pmull2(v16, T8H, v0, v7, T16B); + pmull2(v19, T8H, v0, v4, T16B); + pmull2(v17, T8H, v0, v6, T16B); + + uzp1(v24, v20, v22, T8H); + uzp2(v25, v20, v22, T8H); + eor(v20, T16B, v24, v25); + + uzp1(v26, v16, v18, T8H); + uzp2(v27, v16, v18, T8H); + eor(v16, T16B, v26, v27); + + ushll2(v22, T4S, v20, T8H, 8); + ushll(v20, T4S, v20, T4H, 8); + + ushll2(v18, T4S, v16, T8H, 8); + ushll(v16, T4S, v16, T4H, 8); + + eor(v22, T16B, v23, v22); + eor(v18, T16B, v19, v18); + eor(v20, T16B, v21, v20); + eor(v16, T16B, v17, v16); + + uzp1(v17, v16, v20, T2D); + uzp2(v21, v16, v20, T2D); + eor(v17, T16B, v17, v21); + + ushll2(v20, T2D, v17, T4S, 16); + ushll(v16, T2D, v17, T2S, 16); + + eor(v20, T16B, v20, v22); + eor(v16, T16B, v16, v18); + + uzp1(v17, v20, v16, T2D); + uzp2(v21, v20, v16, T2D); + eor(v28, T16B, v17, v21); + + pmull(v22, T8H, v1, v5, T8B); + pmull(v20, T8H, v1, v7, T8B); + pmull(v23, T8H, v1, v4, T8B); + pmull(v21, T8H, v1, v6, T8B); + + pmull2(v18, T8H, v1, v5, T16B); + pmull2(v16, T8H, v1, v7, T16B); + pmull2(v19, T8H, v1, v4, T16B); + pmull2(v17, T8H, v1, v6, T16B); + + ld1(v0, v1, T2D, buf, 32); + + uzp1(v24, v20, v22, T8H); + uzp2(v25, v20, v22, T8H); + eor(v20, T16B, v24, v25); + + uzp1(v26, v16, v18, T8H); + uzp2(v27, v16, v18, T8H); + eor(v16, T16B, v26, v27); + + ushll2(v22, T4S, v20, T8H, 8); + ushll(v20, T4S, v20, T4H, 8); + + ushll2(v18, T4S, v16, T8H, 8); + ushll(v16, T4S, v16, T4H, 8); + + eor(v22, T16B, v23, v22); + eor(v18, T16B, v19, v18); + eor(v20, T16B, v21, v20); + eor(v16, T16B, v17, v16); + + uzp1(v17, v16, v20, T2D); + uzp2(v21, v16, v20, T2D); + eor(v16, T16B, v17, v21); + + ushll2(v20, T2D, v16, T4S, 16); + ushll(v16, T2D, v16, T2S, 16); + + eor(v20, T16B, v22, v20); + eor(v16, T16B, v16, v18); + + uzp1(v17, v20, v16, T2D); + uzp2(v21, v20, v16, T2D); + eor(v20, T16B, v17, v21); + + shl(v16, v28, T2D, 1); + shl(v17, v20, T2D, 1); + + eor(v0, T16B, v0, v16); + eor(v1, T16B, v1, v17); subs(len, len, 32); br(Assembler::GE, L_fold); mov(crc, 0); - v_mov(tmp, v0, T1D, 0); + mov(tmp, v0, T1D, 0); update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, false); update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, true); - v_mov(tmp, v0, T1D, 1); + mov(tmp, v0, T1D, 1); update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, false); update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, true); - v_mov(tmp, v1, T1D, 0); + mov(tmp, v1, T1D, 0); update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, false); update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, true); - v_mov(tmp, v1, T1D, 1); + mov(tmp, v1, T1D, 1); update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, false); update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, true); @@ -2773,7 +2773,7 @@ void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) decode_heap_oop_not_null(dst); } else { ldr(dst, src); - } + } } void MacroAssembler::store_heap_oop(Address dst, Register src) { diff --git a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp index f8be280a9..5990e9f3c 100644 --- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp +++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp @@ -37,6 +37,8 @@ class MacroAssembler: public Assembler { friend class LIR_Assembler; + using Assembler::mov; + protected: // Support for VM calls diff --git a/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp b/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp index dc5d50795..7d5c3b61d 100644 --- a/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp +++ b/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp @@ -1901,75 +1901,75 @@ class StubGenerator: public StubCodeGenerator { __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); - __ v_ld1(v0, __ T16B, from); // get 16 bytes of input - - __ v_ld1(v1, v2, v3, v4, __ T16B, key, 64); - __ v_rev32(v1, __ T16B, v1); - __ v_rev32(v2, __ T16B, v2); - __ v_rev32(v3, __ T16B, v3); - __ v_rev32(v4, __ T16B, v4); - __ v_aese(v0, v1); - __ v_aesmc(v0, v0); - __ v_aese(v0, v2); - __ v_aesmc(v0, v0); - __ v_aese(v0, v3); - __ v_aesmc(v0, v0); - __ v_aese(v0, v4); - __ v_aesmc(v0, v0); - - __ v_ld1(v1, v2, v3, v4, __ T16B, key, 64); - __ v_rev32(v1, __ T16B, v1); - __ v_rev32(v2, __ T16B, v2); - __ v_rev32(v3, __ T16B, v3); - __ v_rev32(v4, __ T16B, v4); - __ v_aese(v0, v1); - __ v_aesmc(v0, v0); - __ v_aese(v0, v2); - __ v_aesmc(v0, v0); - __ v_aese(v0, v3); - __ v_aesmc(v0, v0); - __ v_aese(v0, v4); - __ v_aesmc(v0, v0); - - __ v_ld1(v1, v2, __ T16B, key, 32); - __ v_rev32(v1, __ T16B, v1); - __ v_rev32(v2, __ T16B, v2); + __ ld1(v0, __ T16B, from); // get 16 bytes of input + + __ ld1(v1, v2, v3, v4, __ T16B, key, 64); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); + __ rev32(v3, __ T16B, v3); + __ rev32(v4, __ T16B, v4); + __ aese(v0, v1); + __ aesmc(v0, v0); + __ aese(v0, v2); + __ aesmc(v0, v0); + __ aese(v0, v3); + __ aesmc(v0, v0); + __ aese(v0, v4); + __ aesmc(v0, v0); + + __ ld1(v1, v2, v3, v4, __ T16B, key, 64); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); + __ rev32(v3, __ T16B, v3); + __ rev32(v4, __ T16B, v4); + __ aese(v0, v1); + __ aesmc(v0, v0); + __ aese(v0, v2); + __ aesmc(v0, v0); + __ aese(v0, v3); + __ aesmc(v0, v0); + __ aese(v0, v4); + __ aesmc(v0, v0); + + __ ld1(v1, v2, __ T16B, key, 32); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); __ cmpw(keylen, 44); __ br(Assembler::EQ, L_doLast); - __ v_aese(v0, v1); - __ v_aesmc(v0, v0); - __ v_aese(v0, v2); - __ v_aesmc(v0, v0); + __ aese(v0, v1); + __ aesmc(v0, v0); + __ aese(v0, v2); + __ aesmc(v0, v0); - __ v_ld1(v1, v2, __ T16B, key, 32); - __ v_rev32(v1, __ T16B, v1); - __ v_rev32(v2, __ T16B, v2); + __ ld1(v1, v2, __ T16B, key, 32); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); __ cmpw(keylen, 52); __ br(Assembler::EQ, L_doLast); - __ v_aese(v0, v1); - __ v_aesmc(v0, v0); - __ v_aese(v0, v2); - __ v_aesmc(v0, v0); + __ aese(v0, v1); + __ aesmc(v0, v0); + __ aese(v0, v2); + __ aesmc(v0, v0); - __ v_ld1(v1, v2, __ T16B, key, 32); - __ v_rev32(v1, __ T16B, v1); - __ v_rev32(v2, __ T16B, v2); + __ ld1(v1, v2, __ T16B, key, 32); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); __ BIND(L_doLast); - __ v_aese(v0, v1); - __ v_aesmc(v0, v0); - __ v_aese(v0, v2); + __ aese(v0, v1); + __ aesmc(v0, v0); + __ aese(v0, v2); - __ v_ld1(v1, __ T16B, key); - __ v_rev32(v1, __ T16B, v1); - __ v_eor(v0, __ T16B, v0, v1); + __ ld1(v1, __ T16B, key); + __ rev32(v1, __ T16B, v1); + __ eor(v0, __ T16B, v0, v1); - __ v_st1(v0, __ T16B, to); + __ st1(v0, __ T16B, to); __ mov(r0, 0); @@ -2002,76 +2002,76 @@ class StubGenerator: public StubCodeGenerator { __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); - __ v_ld1(v0, __ T16B, from); // get 16 bytes of input - - __ v_ld1(v5, __ T16B, key, 16); - __ v_rev32(v5, __ T16B, v5); - - __ v_ld1(v1, v2, v3, v4, __ T16B, key, 64); - __ v_rev32(v1, __ T16B, v1); - __ v_rev32(v2, __ T16B, v2); - __ v_rev32(v3, __ T16B, v3); - __ v_rev32(v4, __ T16B, v4); - __ v_aesd(v0, v1); - __ v_aesimc(v0, v0); - __ v_aesd(v0, v2); - __ v_aesimc(v0, v0); - __ v_aesd(v0, v3); - __ v_aesimc(v0, v0); - __ v_aesd(v0, v4); - __ v_aesimc(v0, v0); - - __ v_ld1(v1, v2, v3, v4, __ T16B, key, 64); - __ v_rev32(v1, __ T16B, v1); - __ v_rev32(v2, __ T16B, v2); - __ v_rev32(v3, __ T16B, v3); - __ v_rev32(v4, __ T16B, v4); - __ v_aesd(v0, v1); - __ v_aesimc(v0, v0); - __ v_aesd(v0, v2); - __ v_aesimc(v0, v0); - __ v_aesd(v0, v3); - __ v_aesimc(v0, v0); - __ v_aesd(v0, v4); - __ v_aesimc(v0, v0); - - __ v_ld1(v1, v2, __ T16B, key, 32); - __ v_rev32(v1, __ T16B, v1); - __ v_rev32(v2, __ T16B, v2); + __ ld1(v0, __ T16B, from); // get 16 bytes of input + + __ ld1(v5, __ T16B, key, 16); + __ rev32(v5, __ T16B, v5); + + __ ld1(v1, v2, v3, v4, __ T16B, key, 64); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); + __ rev32(v3, __ T16B, v3); + __ rev32(v4, __ T16B, v4); + __ aesd(v0, v1); + __ aesimc(v0, v0); + __ aesd(v0, v2); + __ aesimc(v0, v0); + __ aesd(v0, v3); + __ aesimc(v0, v0); + __ aesd(v0, v4); + __ aesimc(v0, v0); + + __ ld1(v1, v2, v3, v4, __ T16B, key, 64); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); + __ rev32(v3, __ T16B, v3); + __ rev32(v4, __ T16B, v4); + __ aesd(v0, v1); + __ aesimc(v0, v0); + __ aesd(v0, v2); + __ aesimc(v0, v0); + __ aesd(v0, v3); + __ aesimc(v0, v0); + __ aesd(v0, v4); + __ aesimc(v0, v0); + + __ ld1(v1, v2, __ T16B, key, 32); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); __ cmpw(keylen, 44); __ br(Assembler::EQ, L_doLast); - __ v_aesd(v0, v1); - __ v_aesimc(v0, v0); - __ v_aesd(v0, v2); - __ v_aesimc(v0, v0); + __ aesd(v0, v1); + __ aesimc(v0, v0); + __ aesd(v0, v2); + __ aesimc(v0, v0); - __ v_ld1(v1, v2, __ T16B, key, 32); - __ v_rev32(v1, __ T16B, v1); - __ v_rev32(v2, __ T16B, v2); + __ ld1(v1, v2, __ T16B, key, 32); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); __ cmpw(keylen, 52); __ br(Assembler::EQ, L_doLast); - __ v_aesd(v0, v1); - __ v_aesimc(v0, v0); - __ v_aesd(v0, v2); - __ v_aesimc(v0, v0); + __ aesd(v0, v1); + __ aesimc(v0, v0); + __ aesd(v0, v2); + __ aesimc(v0, v0); - __ v_ld1(v1, v2, __ T16B, key, 32); - __ v_rev32(v1, __ T16B, v1); - __ v_rev32(v2, __ T16B, v2); + __ ld1(v1, v2, __ T16B, key, 32); + __ rev32(v1, __ T16B, v1); + __ rev32(v2, __ T16B, v2); __ BIND(L_doLast); - __ v_aesd(v0, v1); - __ v_aesimc(v0, v0); - __ v_aesd(v0, v2); + __ aesd(v0, v1); + __ aesimc(v0, v0); + __ aesd(v0, v2); - __ v_eor(v0, __ T16B, v0, v5); + __ eor(v0, __ T16B, v0, v5); - __ v_st1(v0, __ T16B, to); + __ st1(v0, __ T16B, to); __ mov(r0, 0); @@ -2114,65 +2114,65 @@ class StubGenerator: public StubCodeGenerator { __ mov(rscratch1, len_reg); __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); - __ v_ld1(v0, __ T16B, rvec); + __ ld1(v0, __ T16B, rvec); __ cmpw(keylen, 52); __ br(Assembler::CC, L_loadkeys_44); __ br(Assembler::EQ, L_loadkeys_52); - __ v_ld1(v17, v18, __ T16B, key, 32); - __ v_rev32(v17, __ T16B, v17); - __ v_rev32(v18, __ T16B, v18); + __ ld1(v17, v18, __ T16B, key, 32); + __ rev32(v17, __ T16B, v17); + __ rev32(v18, __ T16B, v18); __ BIND(L_loadkeys_52); - __ v_ld1(v19, v20, __ T16B, key, 32); - __ v_rev32(v19, __ T16B, v19); - __ v_rev32(v20, __ T16B, v20); + __ ld1(v19, v20, __ T16B, key, 32); + __ rev32(v19, __ T16B, v19); + __ rev32(v20, __ T16B, v20); __ BIND(L_loadkeys_44); - __ v_ld1(v21, v22, v23, v24, __ T16B, key, 64); - __ v_rev32(v21, __ T16B, v21); - __ v_rev32(v22, __ T16B, v22); - __ v_rev32(v23, __ T16B, v23); - __ v_rev32(v24, __ T16B, v24); - __ v_ld1(v25, v26, v27, v28, __ T16B, key, 64); - __ v_rev32(v25, __ T16B, v25); - __ v_rev32(v26, __ T16B, v26); - __ v_rev32(v27, __ T16B, v27); - __ v_rev32(v28, __ T16B, v28); - __ v_ld1(v29, v30, v31, __ T16B, key); - __ v_rev32(v29, __ T16B, v29); - __ v_rev32(v30, __ T16B, v30); - __ v_rev32(v31, __ T16B, v31); + __ ld1(v21, v22, v23, v24, __ T16B, key, 64); + __ rev32(v21, __ T16B, v21); + __ rev32(v22, __ T16B, v22); + __ rev32(v23, __ T16B, v23); + __ rev32(v24, __ T16B, v24); + __ ld1(v25, v26, v27, v28, __ T16B, key, 64); + __ rev32(v25, __ T16B, v25); + __ rev32(v26, __ T16B, v26); + __ rev32(v27, __ T16B, v27); + __ rev32(v28, __ T16B, v28); + __ ld1(v29, v30, v31, __ T16B, key); + __ rev32(v29, __ T16B, v29); + __ rev32(v30, __ T16B, v30); + __ rev32(v31, __ T16B, v31); __ BIND(L_aes_loop); - __ v_ld1(v1, __ T16B, from, 16); - __ v_eor(v0, __ T16B, v0, v1); + __ ld1(v1, __ T16B, from, 16); + __ eor(v0, __ T16B, v0, v1); __ br(Assembler::CC, L_rounds_44); __ br(Assembler::EQ, L_rounds_52); - __ v_aese(v0, v17); __ v_aesmc(v0, v0); - __ v_aese(v0, v18); __ v_aesmc(v0, v0); + __ aese(v0, v17); __ aesmc(v0, v0); + __ aese(v0, v18); __ aesmc(v0, v0); __ BIND(L_rounds_52); - __ v_aese(v0, v19); __ v_aesmc(v0, v0); - __ v_aese(v0, v20); __ v_aesmc(v0, v0); + __ aese(v0, v19); __ aesmc(v0, v0); + __ aese(v0, v20); __ aesmc(v0, v0); __ BIND(L_rounds_44); - __ v_aese(v0, v21); __ v_aesmc(v0, v0); - __ v_aese(v0, v22); __ v_aesmc(v0, v0); - __ v_aese(v0, v23); __ v_aesmc(v0, v0); - __ v_aese(v0, v24); __ v_aesmc(v0, v0); - __ v_aese(v0, v25); __ v_aesmc(v0, v0); - __ v_aese(v0, v26); __ v_aesmc(v0, v0); - __ v_aese(v0, v27); __ v_aesmc(v0, v0); - __ v_aese(v0, v28); __ v_aesmc(v0, v0); - __ v_aese(v0, v29); __ v_aesmc(v0, v0); - __ v_aese(v0, v30); - __ v_eor(v0, __ T16B, v0, v31); - - __ v_st1(v0, __ T16B, to, 16); + __ aese(v0, v21); __ aesmc(v0, v0); + __ aese(v0, v22); __ aesmc(v0, v0); + __ aese(v0, v23); __ aesmc(v0, v0); + __ aese(v0, v24); __ aesmc(v0, v0); + __ aese(v0, v25); __ aesmc(v0, v0); + __ aese(v0, v26); __ aesmc(v0, v0); + __ aese(v0, v27); __ aesmc(v0, v0); + __ aese(v0, v28); __ aesmc(v0, v0); + __ aese(v0, v29); __ aesmc(v0, v0); + __ aese(v0, v30); + __ eor(v0, __ T16B, v0, v31); + + __ st1(v0, __ T16B, to, 16); __ sub(len_reg, len_reg, 16); __ cbnz(len_reg, L_aes_loop); - __ v_st1(v0, __ T16B, rvec); + __ st1(v0, __ T16B, rvec); __ mov(r0, rscratch2); @@ -2213,70 +2213,70 @@ class StubGenerator: public StubCodeGenerator { __ mov(rscratch2, len_reg); __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); - __ v_ld1(v2, __ T16B, rvec); + __ ld1(v2, __ T16B, rvec); - __ v_ld1(v31, __ T16B, key, 16); - __ v_rev32(v31, __ T16B, v31); + __ ld1(v31, __ T16B, key, 16); + __ rev32(v31, __ T16B, v31); __ cmpw(keylen, 52); __ br(Assembler::CC, L_loadkeys_44); __ br(Assembler::EQ, L_loadkeys_52); - __ v_ld1(v17, v18, __ T16B, key, 32); - __ v_rev32(v17, __ T16B, v17); - __ v_rev32(v18, __ T16B, v18); + __ ld1(v17, v18, __ T16B, key, 32); + __ rev32(v17, __ T16B, v17); + __ rev32(v18, __ T16B, v18); __ BIND(L_loadkeys_52); - __ v_ld1(v19, v20, __ T16B, key, 32); - __ v_rev32(v19, __ T16B, v19); - __ v_rev32(v20, __ T16B, v20); + __ ld1(v19, v20, __ T16B, key, 32); + __ rev32(v19, __ T16B, v19); + __ rev32(v20, __ T16B, v20); __ BIND(L_loadkeys_44); - __ v_ld1(v21, v22, v23, v24, __ T16B, key, 64); - __ v_rev32(v21, __ T16B, v21); - __ v_rev32(v22, __ T16B, v22); - __ v_rev32(v23, __ T16B, v23); - __ v_rev32(v24, __ T16B, v24); - __ v_ld1(v25, v26, v27, v28, __ T16B, key, 64); - __ v_rev32(v25, __ T16B, v25); - __ v_rev32(v26, __ T16B, v26); - __ v_rev32(v27, __ T16B, v27); - __ v_rev32(v28, __ T16B, v28); - __ v_ld1(v29, v30, __ T16B, key); - __ v_rev32(v29, __ T16B, v29); - __ v_rev32(v30, __ T16B, v30); + __ ld1(v21, v22, v23, v24, __ T16B, key, 64); + __ rev32(v21, __ T16B, v21); + __ rev32(v22, __ T16B, v22); + __ rev32(v23, __ T16B, v23); + __ rev32(v24, __ T16B, v24); + __ ld1(v25, v26, v27, v28, __ T16B, key, 64); + __ rev32(v25, __ T16B, v25); + __ rev32(v26, __ T16B, v26); + __ rev32(v27, __ T16B, v27); + __ rev32(v28, __ T16B, v28); + __ ld1(v29, v30, __ T16B, key); + __ rev32(v29, __ T16B, v29); + __ rev32(v30, __ T16B, v30); __ BIND(L_aes_loop); - __ v_ld1(v0, __ T16B, from, 16); - __ v_orr(v1, __ T16B, v0, v0); + __ ld1(v0, __ T16B, from, 16); + __ orr(v1, __ T16B, v0, v0); __ br(Assembler::CC, L_rounds_44); __ br(Assembler::EQ, L_rounds_52); - __ v_aesd(v0, v17); __ v_aesimc(v0, v0); - __ v_aesd(v0, v17); __ v_aesimc(v0, v0); + __ aesd(v0, v17); __ aesimc(v0, v0); + __ aesd(v0, v17); __ aesimc(v0, v0); __ BIND(L_rounds_52); - __ v_aesd(v0, v19); __ v_aesimc(v0, v0); - __ v_aesd(v0, v20); __ v_aesimc(v0, v0); + __ aesd(v0, v19); __ aesimc(v0, v0); + __ aesd(v0, v20); __ aesimc(v0, v0); __ BIND(L_rounds_44); - __ v_aesd(v0, v21); __ v_aesimc(v0, v0); - __ v_aesd(v0, v22); __ v_aesimc(v0, v0); - __ v_aesd(v0, v23); __ v_aesimc(v0, v0); - __ v_aesd(v0, v24); __ v_aesimc(v0, v0); - __ v_aesd(v0, v25); __ v_aesimc(v0, v0); - __ v_aesd(v0, v26); __ v_aesimc(v0, v0); - __ v_aesd(v0, v27); __ v_aesimc(v0, v0); - __ v_aesd(v0, v28); __ v_aesimc(v0, v0); - __ v_aesd(v0, v29); __ v_aesimc(v0, v0); - __ v_aesd(v0, v30); - __ v_eor(v0, __ T16B, v0, v31); - __ v_eor(v0, __ T16B, v0, v2); - - __ v_st1(v0, __ T16B, to, 16); - __ v_orr(v2, __ T16B, v1, v1); + __ aesd(v0, v21); __ aesimc(v0, v0); + __ aesd(v0, v22); __ aesimc(v0, v0); + __ aesd(v0, v23); __ aesimc(v0, v0); + __ aesd(v0, v24); __ aesimc(v0, v0); + __ aesd(v0, v25); __ aesimc(v0, v0); + __ aesd(v0, v26); __ aesimc(v0, v0); + __ aesd(v0, v27); __ aesimc(v0, v0); + __ aesd(v0, v28); __ aesimc(v0, v0); + __ aesd(v0, v29); __ aesimc(v0, v0); + __ aesd(v0, v30); + __ eor(v0, __ T16B, v0, v31); + __ eor(v0, __ T16B, v0, v2); + + __ st1(v0, __ T16B, to, 16); + __ orr(v2, __ T16B, v1, v1); __ sub(len_reg, len_reg, 16); __ cbnz(len_reg, L_aes_loop); - __ v_st1(v2, __ T16B, rvec); + __ st1(v2, __ T16B, rvec); __ mov(r0, rscratch2); |