diff options
Diffstat (limited to 'src/cpu/aarch64/vm/macroAssembler_aarch64.cpp')
-rw-r--r-- | src/cpu/aarch64/vm/macroAssembler_aarch64.cpp | 212 |
1 files changed, 106 insertions, 106 deletions
diff --git a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp index 722e20632..5a91d5d4f 100644 --- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp +++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp @@ -2239,131 +2239,131 @@ void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, if (UseNeon) { cmp(len, 64); br(Assembler::LT, L_by16); - v_eor(v16, T16B, v16, v16); + eor(v16, T16B, v16, v16); Label L_fold; add(tmp, table0, 4*256*sizeof(juint)); // Point at the Neon constants - v_ld1(v0, v1, T2D, buf, 32); - v_ld1r(v4, T2D, tmp, 8); - v_ld1r(v5, T2D, tmp, 8); - v_ld1r(v6, T2D, tmp, 8); - v_ld1r(v7, T2D, tmp, 8); - v_mov(v16, T4S, 0, crc); + ld1(v0, v1, T2D, buf, 32); + ld1r(v4, T2D, tmp, 8); + ld1r(v5, T2D, tmp, 8); + ld1r(v6, T2D, tmp, 8); + ld1r(v7, T2D, tmp, 8); + mov(v16, T4S, 0, crc); - v_eor(v0, T16B, v0, v16); + eor(v0, T16B, v0, v16); sub(len, len, 64); BIND(L_fold); - v_pmull(v22, T8H, v0, v5, T8B); - v_pmull(v20, T8H, v0, v7, T8B); - v_pmull(v23, T8H, v0, v4, T8B); - v_pmull(v21, T8H, v0, v6, T8B); - - v_pmull2(v18, T8H, v0, v5, T16B); - v_pmull2(v16, T8H, v0, v7, T16B); - v_pmull2(v19, T8H, v0, v4, T16B); - v_pmull2(v17, T8H, v0, v6, T16B); - - v_uzp1(v24, v20, v22, T8H); - v_uzp2(v25, v20, v22, T8H); - v_eor(v20, T16B, v24, v25); - - v_uzp1(v26, v16, v18, T8H); - v_uzp2(v27, v16, v18, T8H); - v_eor(v16, T16B, v26, v27); - - v_ushll2(v22, T4S, v20, T8H, 8); - v_ushll(v20, T4S, v20, T4H, 8); - - v_ushll2(v18, T4S, v16, T8H, 8); - v_ushll(v16, T4S, v16, T4H, 8); - - v_eor(v22, T16B, v23, v22); - v_eor(v18, T16B, v19, v18); - v_eor(v20, T16B, v21, v20); - v_eor(v16, T16B, v17, v16); - - v_uzp1(v17, v16, v20, T2D); - v_uzp2(v21, v16, v20, T2D); - v_eor(v17, T16B, v17, v21); - - v_ushll2(v20, T2D, v17, T4S, 16); - v_ushll(v16, T2D, v17, T2S, 16); - - v_eor(v20, T16B, v20, v22); - v_eor(v16, T16B, v16, v18); - - v_uzp1(v17, v20, v16, T2D); - v_uzp2(v21, v20, v16, T2D); - v_eor(v28, T16B, v17, v21); - - v_pmull(v22, T8H, v1, v5, T8B); - v_pmull(v20, T8H, v1, v7, T8B); - v_pmull(v23, T8H, v1, v4, T8B); - v_pmull(v21, T8H, v1, v6, T8B); - - v_pmull2(v18, T8H, v1, v5, T16B); - v_pmull2(v16, T8H, v1, v7, T16B); - v_pmull2(v19, T8H, v1, v4, T16B); - v_pmull2(v17, T8H, v1, v6, T16B); - - v_ld1(v0, v1, T2D, buf, 32); - - v_uzp1(v24, v20, v22, T8H); - v_uzp2(v25, v20, v22, T8H); - v_eor(v20, T16B, v24, v25); - - v_uzp1(v26, v16, v18, T8H); - v_uzp2(v27, v16, v18, T8H); - v_eor(v16, T16B, v26, v27); - - v_ushll2(v22, T4S, v20, T8H, 8); - v_ushll(v20, T4S, v20, T4H, 8); - - v_ushll2(v18, T4S, v16, T8H, 8); - v_ushll(v16, T4S, v16, T4H, 8); - - v_eor(v22, T16B, v23, v22); - v_eor(v18, T16B, v19, v18); - v_eor(v20, T16B, v21, v20); - v_eor(v16, T16B, v17, v16); - - v_uzp1(v17, v16, v20, T2D); - v_uzp2(v21, v16, v20, T2D); - v_eor(v16, T16B, v17, v21); - - v_ushll2(v20, T2D, v16, T4S, 16); - v_ushll(v16, T2D, v16, T2S, 16); - - v_eor(v20, T16B, v22, v20); - v_eor(v16, T16B, v16, v18); - - v_uzp1(v17, v20, v16, T2D); - v_uzp2(v21, v20, v16, T2D); - v_eor(v20, T16B, v17, v21); - - v_shl(v16, v28, T2D, 1); - v_shl(v17, v20, T2D, 1); - - v_eor(v0, T16B, v0, v16); - v_eor(v1, T16B, v1, v17); + pmull(v22, T8H, v0, v5, T8B); + pmull(v20, T8H, v0, v7, T8B); + pmull(v23, T8H, v0, v4, T8B); + pmull(v21, T8H, v0, v6, T8B); + + pmull2(v18, T8H, v0, v5, T16B); + pmull2(v16, T8H, v0, v7, T16B); + pmull2(v19, T8H, v0, v4, T16B); + pmull2(v17, T8H, v0, v6, T16B); + + uzp1(v24, v20, v22, T8H); + uzp2(v25, v20, v22, T8H); + eor(v20, T16B, v24, v25); + + uzp1(v26, v16, v18, T8H); + uzp2(v27, v16, v18, T8H); + eor(v16, T16B, v26, v27); + + ushll2(v22, T4S, v20, T8H, 8); + ushll(v20, T4S, v20, T4H, 8); + + ushll2(v18, T4S, v16, T8H, 8); + ushll(v16, T4S, v16, T4H, 8); + + eor(v22, T16B, v23, v22); + eor(v18, T16B, v19, v18); + eor(v20, T16B, v21, v20); + eor(v16, T16B, v17, v16); + + uzp1(v17, v16, v20, T2D); + uzp2(v21, v16, v20, T2D); + eor(v17, T16B, v17, v21); + + ushll2(v20, T2D, v17, T4S, 16); + ushll(v16, T2D, v17, T2S, 16); + + eor(v20, T16B, v20, v22); + eor(v16, T16B, v16, v18); + + uzp1(v17, v20, v16, T2D); + uzp2(v21, v20, v16, T2D); + eor(v28, T16B, v17, v21); + + pmull(v22, T8H, v1, v5, T8B); + pmull(v20, T8H, v1, v7, T8B); + pmull(v23, T8H, v1, v4, T8B); + pmull(v21, T8H, v1, v6, T8B); + + pmull2(v18, T8H, v1, v5, T16B); + pmull2(v16, T8H, v1, v7, T16B); + pmull2(v19, T8H, v1, v4, T16B); + pmull2(v17, T8H, v1, v6, T16B); + + ld1(v0, v1, T2D, buf, 32); + + uzp1(v24, v20, v22, T8H); + uzp2(v25, v20, v22, T8H); + eor(v20, T16B, v24, v25); + + uzp1(v26, v16, v18, T8H); + uzp2(v27, v16, v18, T8H); + eor(v16, T16B, v26, v27); + + ushll2(v22, T4S, v20, T8H, 8); + ushll(v20, T4S, v20, T4H, 8); + + ushll2(v18, T4S, v16, T8H, 8); + ushll(v16, T4S, v16, T4H, 8); + + eor(v22, T16B, v23, v22); + eor(v18, T16B, v19, v18); + eor(v20, T16B, v21, v20); + eor(v16, T16B, v17, v16); + + uzp1(v17, v16, v20, T2D); + uzp2(v21, v16, v20, T2D); + eor(v16, T16B, v17, v21); + + ushll2(v20, T2D, v16, T4S, 16); + ushll(v16, T2D, v16, T2S, 16); + + eor(v20, T16B, v22, v20); + eor(v16, T16B, v16, v18); + + uzp1(v17, v20, v16, T2D); + uzp2(v21, v20, v16, T2D); + eor(v20, T16B, v17, v21); + + shl(v16, v28, T2D, 1); + shl(v17, v20, T2D, 1); + + eor(v0, T16B, v0, v16); + eor(v1, T16B, v1, v17); subs(len, len, 32); br(Assembler::GE, L_fold); mov(crc, 0); - v_mov(tmp, v0, T1D, 0); + mov(tmp, v0, T1D, 0); update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, false); update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, true); - v_mov(tmp, v0, T1D, 1); + mov(tmp, v0, T1D, 1); update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, false); update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, true); - v_mov(tmp, v1, T1D, 0); + mov(tmp, v1, T1D, 0); update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, false); update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, true); - v_mov(tmp, v1, T1D, 1); + mov(tmp, v1, T1D, 1); update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, false); update_word_crc32(crc, tmp, tmp2, table0, table1, table2, table3, true); @@ -2773,7 +2773,7 @@ void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) decode_heap_oop_not_null(dst); } else { ldr(dst, src); - } + } } void MacroAssembler::store_heap_oop(Address dst, Register src) { |