############################################################################### # Copyright (c) 2010 Linaro Limited # All rights reserved. This program and the accompanying materials # are made available under the terms of the Eclipse Public License v1.0 # which accompanies this distribution, and is available at # http://www.eclipse.org/legal/epl-v10.html # # Contributors: # Peter Maydell (Linaro) - initial implementation ############################################################################### # Input file for risugen defining ARM instructions .mode arm # Some random patterns #ADD A1 cond:4 0000 100 s rn:4 rd:4 imm:5 type:2 0 rm:4 #RBIT A1 cond:4 0110 1111 1111 rd:4 1111 0011 rm:4 # various 32x32->64 multiplies # we omit the v5-and-below constraint that rn must not be rdhi or rdlo UMAAL A1 cond:4 0000 0100 rdhi:4 rdlo:4 rm:4 1001 rn:4 { $rdhi != $rdlo; } UMLAL A1 cond:4 0000 101 s rdhi:4 rdlo:4 rm:4 1001 rn:4 { $rdhi != $rdlo; } UMULL A1 cond:4 0000 100 s rdhi:4 rdlo:4 rm:4 1001 rn:4 { $rdhi != $rdlo; } SMLAL A1 cond:4 0000 111 s rdhi:4 rdlo:4 rm:4 1001 rn:4 { $rdhi != $rdlo; } SMULL A1 cond:4 0000 110 s rdhi:4 rdlo:4 rm:4 1001 rn:4 { $rdhi != $rdlo; } # 32x32->64 but result is high word only SMMLA A1 cond:4 01110101 rd:4 ra:4 rm:4 00 r 1 rn:4 SMMLS A1 cond:4 01110101 rd:4 ra:4 rm:4 11 r 1 rn:4 # Note that this doesn't overlap with SMMLA because of the implicit # constraints on registers fields (ie not 13 or 15) SMMUL A1 cond:4 01110101 rd:4 1111 rm:4 00 r 1 rn:4 # dual multiplies SMLAD A1 cond:4 0111 0000 rd:4 ra:4 rm:4 00 m 1 rn:4 SMUAD A1 cond:4 0111 0000 rd:4 1111 rm:4 00 m 1 rn:4 SMLSD A1 cond:4 0111 0000 rd:4 ra:4 rm:4 01 m 1 rn:4 SMUSD A1 cond:4 0111 0000 rd:4 1111 rm:4 01 m 1 rn:4 SMLALD A1 cond:4 0111 0100 rdhi:4 rdlo:4 rm:4 00 m 1 rn:4 { $rdhi != $rdlo; } SMLSLD A1 cond:4 0111 0100 rdhi:4 rdlo:4 rm:4 01 m 1 rn:4 { $rdhi != $rdlo; } # divide (A15 and above only!) SDIV A1 cond:4 01110 001 rd:4 1111 rm:4 000 1 rn:4 UDIV A1 cond:4 01110 011 rd:4 1111 rm:4 000 1 rn:4 USAT A1 cond:4 0110111 satimm:5 rd:4 imm:5 sh 0 1 rn:4 SSAT A1 cond:4 0110101 satimm:5 rd:4 imm:5 sh 0 1 rn:4 SSAT16 A1 cond:4 01101010 satimm:4 rd:4 1111 0011 rn:4 USAT16 A1 cond:4 01101110 satimm:4 rd:4 1111 0011 rn:4 # various preload and hint instructions # see table A5-24 for this unallocated hint insn block (must NOP on v7MP) UNALLOC_HINT A1 11110 100 x 001 anything:20 UNALLOC_HINT_b A1 11110 110 x 001 anything:15 0 any:4 PLI_imm A1 1111 0100 u 101 rn:4 1111 imm:12 PLI_reg A1 1111 0110 u 101 rn:4 1111 imm:5 type:2 0 rm:4 PLD_imm A1 1111 0101 u 101 rn:4 1111 imm:12 PLD_reg A1 1111 0111 u 101 rn:4 1111 imm:5 type:2 0 rm:4 PLDW_imm A1 1111 0101 u 001 rn:4 1111 imm:12 PLDW_reg A1 1111 0111 u 001 rn:4 1111 imm:5 type:2 0 rm:4 # no overlap with PLD_imm because rn can't be 15 PLD_lit A1 1111 0101 u 101 1111 1111 imm:12 # Unsigned saturating add/subtract # UQADD16, UQSUB16, UQADD8, UQSUB8 UQADD16 A1 cond:4 01100110 rn:4 rd:4 1111 0001 rm:4 UQADD8 A1 cond:4 01100110 rn:4 rd:4 1111 1001 rm:4 UQSUB16 A1 cond:4 01100110 rn:4 rd:4 1111 0111 rm:4 UQSUB8 A1 cond:4 01100110 rn:4 rd:4 1111 1111 rm:4 # Signed ditto QADD16 A1 cond:4 01100010 rn:4 rd:4 1111 0001 rm:4 QADD8 A1 cond:4 01100010 rn:4 rd:4 1111 1001 rm:4 QSUB16 A1 cond:4 01100010 rn:4 rd:4 1111 0111 rm:4 QSUB8 A1 cond:4 01100010 rn:4 rd:4 1111 1111 rm:4 # Signed parallel add/subtract SADD8 A1 cond:4 0110 0001 rn:4 rd:4 1111 1001 rm:4 SADD16 A1 cond:4 0110 0001 rn:4 rd:4 1111 0001 rm:4 SSUB8 A1 cond:4 0110 0001 rn:4 rd:4 1111 1111 rm:4 SSUB16 A1 cond:4 0110 0001 rn:4 rd:4 1111 0111 rm:4 # unsigned ditto UADD8 A1 cond:4 0110 0101 rn:4 rd:4 1111 1001 rm:4 UADD16 A1 cond:4 0110 0101 rn:4 rd:4 1111 0001 rm:4 USUB8 A1 cond:4 0110 0101 rn:4 rd:4 1111 1111 rm:4 USUB16 A1 cond:4 0110 0101 rn:4 rd:4 1111 0111 rm:4 # UNDEF cases: op1 == 0 or op2 == 101 or 110 SADD_UNDEF A1a cond:4 011000 00 any:12 op2:3 1 any2:4 SADD_UNDEF A1b cond:4 011000 op1:2 any:12 101 1 any2:4 SADD_UNDEF A1c cond:4 011000 op1:2 any:12 110 1 any2:4 SASX A1 cond:4 0110 0001 rn:4 rd:4 1111 0011 rm:4 SSAX A1 cond:4 0110 0001 rn:4 rd:4 1111 0101 rm:4 # SUBS PC, LR: these are actually unpredictable... #SUBS_PC_LR A1 cond:4 001 0010 1 rn:4 1111 imm:12 #MOVS_PC_LR A1 cond:4 001 1101 1 rn:4 1111 imm:12 # MLS - v6T2 and later only MLS A1 cond:4 00000110 rd:4 ra:4 rm:4 1001 rn:4 REVSH A1 cond:4 01101 111 1111 rd:4 1111 1011 rm:4 # ADC/SBC various forms ADC_imm A1 cond:4 0010101 s:1 rn:4 rd:4 imm:12 ADC_reg A1 cond:4 0000101 s:1 rn:4 rd:4 imm:5 type:2 0 rm:4 ADC_rsr A1 cond:4 0000101 s:1 rn:4 rd:4 rs:4 0 type:2 1 rm:4 SBC_imm A1 cond:4 0010110 s:1 rn:4 rd:4 imm:12 SBC_reg A1 cond:4 0000110 s:1 rn:4 rd:4 imm:5 type:2 0 rm:4 SBC_rsr A1 cond:4 0000110 s:1 rn:4 rd:4 rs:4 0 type:2 1 rm:4 ########### Neon loads and stores ######################### # These patterns cover all the Neon element/structure # load store insns, ie the whole of the space in section # A7.7 of the ARM ARM DDI0406B, including the UNDEF space. # This is all of VLD1,VLD2,VLD3,VLD4,VST1,VST2,VST3,VST4. ########################################################### # VLD*, single element to all lanes # All addressing modes (reg, reg postindex reg, reg postindex eltsz) # Note use of 'xm' for 'rm' to avoid the implicit "not 13 or 15" # constraint -- 13 and 15 encode the other two addr modes so are OK here. # The constraints are avoiding: d+regs > 32 (UNPREDICTABLE); # also the UNDEF sz/a combinations; and the risugen restriction # that the two regs in reg postindex reg must be different. # Max alignment requirement for VLD1 is 4 bytes. VLD1_stoa A1a 1111 0100 1 d 10 rn:4 vd:4 11 00 sz:2 t a xm:4 \ !constraints { ($d == 0 || $t == 0 || $vd != 0xf) && $sz != 3 && ($sz != 0 || $a != 1) && ($rn != $xm); } \ !memory { reg($rn); } # As usual we need to separate out the UNDEF cases as they # must not have !memory blocks # sz 11: UNDEF VLD1_stoa A1b 1111 0100 1 d 10 rn:4 vd:4 11 00 11 t a rm:4 # sz 00, a 1 : UNDEF VLD1_stoa A1c 1111 0100 1 d 10 rn:4 vd:4 11 00 00 t 1 rm:4 # VLD2: d+t+1 > 31 is unpredictable VLD2_stoa A1a 1111 0100 1 d 10 rn:4 vd:4 11 01 sz:2 t a xm:4 \ !constraints { (((($d << 4)|$vd) + $t + 1) < 32) && $sz != 3 && ($rn != $xm); } \ !memory { align(8); reg($rn); } # UNDEF case : sz 11 VLD2_stoa A1b 1111 0100 1 d 10 rn:4 vd:4 11 01 11 t a xm:4 # VLD3: d+(t+1)*2 > 31 is unpredictable VLD3_stoa A1a 1111 0100 1 d 10 rn:4 vd:4 11 01 sz:2 t 0 xm:4 \ !constraints { (((($d << 4)|$vd) + ($t + 1)*2) < 32) && $sz != 3 && ($rn != $xm); } \ !memory { reg($rn); } # UNDEF case : sz 11 or a 1 VLD3_stoa A1b 1111 0100 1 d 10 rn:4 vd:4 11 10 11 t a xm:4 VLD3_stoa A1c 1111 0100 1 d 10 rn:4 vd:4 11 10 sz:2 t 1 xm:4 # VLD4: d+(t+1)*3 > 31 is unpredictable VLD4_stoa A1a 1111 0100 1 d 10 rn:4 vd:4 11 11 sz:2 t a xm:4 \ !constraints { (((($d << 4)|$vd) + ($t + 1)*3) < 32) && ($sz != 3 || $a != 0) && ($rn != $xm); } \ !memory { align(16); reg($rn); } # UNDEF case : sz 11 and a 0 VLD4_stoa A1b 1111 0100 1 d 10 rn:4 vd:4 11 11 11 t 0 xm:4 # VLD*, single element to one lane. We split the sz cases out # for convenience of filtering the UNDEF cases and the VLD1-to-all-lanes # sz == 00 VLD1_s A1a 1111 0100 1 d 10 rn:4 vd:4 00 00 idx:3 0 xm:4 \ !constraints { ($rn != $xm); } \ !memory { reg($rn); } # sz == 01 VLD1_s A1b 1111 0100 1 d 10 rn:4 vd:4 01 00 idx:2 0 idx0 xm:4 \ !constraints { ($rn != $xm); } \ !memory { reg($rn); } # sz == 10 VLD1_s A1c 1111 0100 1 d 10 rn:4 vd:4 10 00 idx3 0 idx:2 xm:4 \ !constraints { ($rn != $xm) && ($idx == 0 || $idx == 3); } \ !memory { reg($rn); } # UNDEF cases: bad index fields for each size VLD1_s A1d 1111 0100 1 d 10 rn:4 vd:4 00 00 idx:3 1 xm:4 VLD1_s A1e 1111 0100 1 d 10 rn:4 vd:4 01 00 idx:2 1 idx0 xm:4 VLD1_s A1f 1111 0100 1 d 10 rn:4 vd:4 10 00 idx:4 xm:4 \ !constraints { ($idx & 4) == 1 || ($idx & 3) == 1 || ($idx & 3) == 2; } # VLD2 has an UNPREDICTABLE case for d+inc > 31. # sz == 00, 01 (no UNDEF cases) VLD2_s A1a 1111 0100 1 d 10 rn:4 vd:4 0 sz 01 idx:4 xm:4 \ !constraints { ($rn != $xm) && ((($d << 4)|$vd) + 1 + (($idx >> $sz) & 1)) < 32; } \ !memory { reg($rn); } # sz == 10 VLD2_s A1b 1111 0100 1 d 10 rn:4 vd:4 10 01 idx:2 0 idx0 xm:4 \ !constraints { ($rn != $xm) && ((($d << 4)|$vd) + 1 + ($idx & 1)) < 32; } \ !memory { align(8); reg($rn); } # only UNDEF case is sz=10, idx<1>=1 VLD2_s A1c 1111 0100 1 d 10 rn:4 vd:4 10 01 idx:2 1 idx0 xm:4 # UNPREDICTABLE here is for d+inc+inc > 31 # sz == 00, 01 VLD3_s A1a 1111 0100 1 d 10 rn:4 vd:4 0 sz 10 idx:3 0 xm:4 \ !constraints { ($rn != $xm) && ((($d << 4)|$vd) + 2 * (1 + ((($idx << 1) >> $sz) & 1))) < 32; } \ !memory { reg($rn); } # sz == 10 VLD3_s A1b 1111 0100 1 d 10 rn:4 vd:4 10 10 idx:2 00 xm:4 \ !constraints { ($rn != $xm) && ((($d << 4)|$vd) + 2 * (1 + ($idx & 1))) < 32; } \ !memory { reg($rn); } # UNDEF: sz == 00, 01, idx<0> != 0 VLD3_s A1c 1111 0100 1 d 10 rn:4 vd:4 0 sz 10 idx:3 1 xm:4 # UNDEF: sz == 10, idx<1:0> != 00 VLD3_s A1d 1111 0100 1 d 10 rn:4 vd:4 10 10 idx:4 xm:4 \ !constraints { ($idx & 3) != 0; } # VLD4 has an UNPREDICTABLE case for d+3*inc > 31. # sz == 00, 01 (no UNDEF cases) VLD4_s A1a 1111 0100 1 d 10 rn:4 vd:4 0 sz 11 idx:4 xm:4 \ !constraints { ($rn != $xm) && ((($d << 4)|$vd) + 3 * (1 + (($idx & (1 << $sz)) >> 1))) < 32; } \ !memory { align(8); reg($rn); } # sz == 10 VLD4_s A1b 1111 0100 1 d 10 rn:4 vd:4 10 01 idx:2 0 idx0 xm:4 \ !constraints { ($rn != $xm) && ((($d << 4)|$vd) + 3 * (1 + ($idx & 1))) < 32; } \ !memory { align(16); reg($rn); } # only UNDEF case is sz=10, idx<1:0>==11 VLD4_s A1c 1111 0100 1 d 10 rn:4 vd:4 10 01 idx:2 11 xm:4 # VST* single element from one lane # These are actually identical to the VLD* patterns except that # bit 21 is clear to indicate store rather than load. # sz == 00 VST1_s A1a 1111 0100 1 d 00 rn:4 vd:4 00 00 idx:3 0 xm:4 \ !constraints { ($rn != $xm); } \ !memory { reg($rn); } # sz == 01 VST1_s A1b 1111 0100 1 d 00 rn:4 vd:4 01 00 idx:2 0 idx0 xm:4 \ !constraints { ($rn != $xm); } \ !memory { reg($rn); } # sz == 10 VST1_s A1c 1111 0100 1 d 00 rn:4 vd:4 10 00 idx3 0 idx:2 xm:4 \ !constraints { ($rn != $xm) && ($idx == 0 || $idx == 3); } \ !memory { reg($rn); } # UNDEF cases: bad index fields for each size VST1_s A1d 1111 0100 1 d 00 rn:4 vd:4 00 00 idx:3 1 xm:4 VST1_s A1e 1111 0100 1 d 00 rn:4 vd:4 01 00 idx:2 1 idx0 xm:4 VST1_s A1f 1111 0100 1 d 00 rn:4 vd:4 10 00 idx:4 xm:4 \ !constraints { ($idx & 4) == 1 || ($idx & 3) == 1 || ($idx & 3) == 2; } # sz == 00, 01 (no UNDEF cases) VST2_s A1a 1111 0100 1 d 00 rn:4 vd:4 0 sz 01 idx:4 xm:4 \ !constraints { ($rn != $xm) && ((($d << 4)|$vd) + 1 + (($idx >> $sz) & 1)) < 32; } \ !memory { reg($rn); } # sz == 10 VST2_s A1b 1111 0100 1 d 00 rn:4 vd:4 10 01 idx:2 0 idx0 xm:4 \ !constraints { ($rn != $xm) && ((($d << 4)|$vd) + 1 + ($idx & 1)) < 32; } \ !memory { align(8); reg($rn); } # only UNDEF case is sz=10, idx<1>=1 VST2_s A1c 1111 0100 1 d 00 rn:4 vd:4 10 01 idx:2 1 idx0 xm:4 # UNPREDICTABLE here is for d+inc+inc > 31 # sz == 00, 01 VST3_s A1a 1111 0100 1 d 00 rn:4 vd:4 0 sz 10 idx:3 0 xm:4 \ !constraints { ($rn != $xm) && ((($d << 4)|$vd) + 2 * (1 + ((($idx << 1) >> $sz) & 1))) < 32; } \ !memory { reg($rn); } # sz == 10 VST3_s A1b 1111 0100 1 d 00 rn:4 vd:4 10 10 idx:2 00 xm:4 \ !constraints { ($rn != $xm) && ((($d << 4)|$vd) + 2 * (1 + ($idx & 1))) < 32; } \ !memory { reg($rn); } # UNDEF: sz == 00, 01, idx<0> != 0 VST3_s A1c 1111 0100 1 d 00 rn:4 vd:4 0 sz 10 idx:3 1 xm:4 # UNDEF: sz == 10, idx<1:0> != 00 VST3_s A1d 1111 0100 1 d 00 rn:4 vd:4 10 10 idx:4 xm:4 \ !constraints { ($idx & 3) != 0; } # VST4 has an UNPREDICTABLE case for d+3*inc > 31. # sz == 00, 01 (no UNDEF cases) VST4_s A1a 1111 0100 1 d 00 rn:4 vd:4 0 sz 11 idx:4 xm:4 \ !constraints { ($rn != $xm) && ((($d << 4)|$vd) + 3 * (1 + (($idx & (1 << $sz)) >> 1))) < 32; } \ !memory { align(8); reg($rn); } # sz == 10 VST4_s A1b 1111 0100 1 d 00 rn:4 vd:4 10 01 idx:2 0 idx0 xm:4 \ !constraints { ($rn != $xm) && ((($d << 4)|$vd) + 3 * (1 + ($idx & 1))) < 32; } \ !memory { align(16); reg($rn); } # only UNDEF case is sz=10, idx<1:0>==11 VST4_s A1c 1111 0100 1 d 00 rn:4 vd:4 10 01 idx:2 11 xm:4 # VLD*, multiple single elements # We separate these out by 'type' field # type 0111: VLD1 regs=1 VLD1_m A1a 1111 0100 0 d 10 rn:4 vd:4 0111 sz:2 0 align xm:4 \ !constraints { ($rn != $xm) && (((($d << 4)|$vd) + 1) <= 32); } \ !memory { align(8); reg($rn); } # type 1010: VLD1 regs=2 VLD1_m A1b 1111 0100 0 d 10 rn:4 vd:4 1010 sz:2 align:2 xm:4 \ !constraints { ($rn != $xm) && (((($d << 4)|$vd) + 2) <= 32) && ($align != 3); } \ !memory { align(16); reg($rn); } # type 0110: VLD1 regs=3 VLD1_m A1c 1111 0100 0 d 10 rn:4 vd:4 0110 sz:2 0 align xm:4 \ !constraints { ($rn != $xm) && (((($d << 4)|$vd) + 3) <= 32); } \ !memory { align(8); reg($rn); } # type 0010: VLD1 regs=4 VLD1_m A1d 1111 0100 0 d 10 rn:4 vd:4 0010 sz:2 align:2 xm:4 \ !constraints { ($rn != $xm) && (((($d << 4)|$vd) + 4) <= 32); } \ !memory { align(32); reg($rn); } # type 1000: VLD2 regs=1 inc=1 VLD2_m A1a 1111 0100 0 d 10 rn:4 vd:4 1000 sz:2 align:2 xm:4 \ !constraints { ($rn != $xm) && (((($d << 4)|$vd) + 1 + 1) <= 32) && ($align != 3) && ($sz != 3); } \ !memory { align(16); reg($rn); } # type 1001: VLD2 regs=1 inc=2 VLD2_m A1b 1111 0100 0 d 10 rn:4 vd:4 1001 sz:2 align:2 xm:4 \ !constraints { ($rn != $xm) && (((($d << 4)|$vd) + 2 + 1) <= 32) && ($align != 3) && ($sz != 3); } \ !memory { align(16); reg($rn); } # type 0011: VLD2 regs=2 inc=2 VLD2_m A1c 1111 0100 0 d 10 rn:4 vd:4 0011 sz:2 align:2 xm:4 \ !constraints { ($rn != $xm) && (((($d << 4)|$vd) + 2 + 2) <= 32) && ($sz != 3); } \ !memory { align(32); reg($rn); } # type 0100: VLD3 inc=1 VLD3_m A1a 1111 0100 0 d 10 rn:4 vd:4 0100 sz:2 0 align xm:4 \ !constraints { ($rn != $xm) && (((($d << 4)|$vd) + 2) <= 31) && ($sz != 3); } \ !memory { align(8); reg($rn); } # type 0101: VLD3 inc=2 VLD3_m A1b 1111 0100 0 d 10 rn:4 vd:4 0101 sz:2 0 align xm:4 \ !constraints { ($rn != $xm) && (((($d << 4)|$vd) + 4) <= 31) && ($sz != 3); } \ !memory { align(8); reg($rn); } # type 0000: VLD4 inc=1 VLD4_m A1a 1111 0100 0 d 10 rn:4 vd:4 0000 sz:2 align:2 xm:4 \ !constraints { ($rn != $xm) && (((($d << 4)|$vd) + 3) <= 31) && ($sz != 3); } \ !memory { align(32); reg($rn); } # type 0001: VLD4 inc=2 VLD4_m A1b 1111 0100 0 d 10 rn:4 vd:4 0000 sz:2 align:2 xm:4 \ !constraints { ($rn != $xm) && (((($d << 4)|$vd) + 6) <= 31) && ($sz != 3); } \ !memory { align(32); reg($rn); } # UNDEF cases for all VLD*_m: # These are the patterns for VLD*_m UNDEFs on align bits being wrong # Conveniently the conditions for all the VLD* line up with the top # bits of the type field. # type = 01xx align = 1x VLDn_m A1a 1111 0100 0 d 10 rn:4 vd:4 01 type:2 sz:2 1 align xm:4 # type = 10xx align = 11 VLDn_m A1b 1111 0100 0 d 10 rn:4 vd:4 10 type:2 sz:2 11 xm:4 # VLD2/3/4 UNDEF on SZ=1; slightly tedious to pick these out # but they are type = xx0x or type = 0011 VLDn_m A1c 1111 0100 0 d 10 rn:4 vd:4 type:2 0 type0 11 align:2 xm:4 VLDn_m A1d 1111 0100 0 d 10 rn:4 vd:4 0011 11 align:2 xm:4 # VST*, multiple elements # We separate these out by 'type' field # type 0111: VST1 regs=1 VST1_m A1a 1111 0100 0 d 00 rn:4 vd:4 0111 sz:2 0 align xm:4 \ !constraints { ($rn != $xm) && (((($d << 4)|$vd) + 1) <= 32); } \ !memory { align(8); reg($rn); } # type 1010: VST1 regs=2 VST1_m A1b 1111 0100 0 d 00 rn:4 vd:4 1010 sz:2 align:2 xm:4 \ !constraints { ($rn != $xm) && (((($d << 4)|$vd) + 2) <= 32) && ($align != 3); } \ !memory { align(16); reg($rn); } # type 0110: VST1 regs=3 VST1_m A1c 1111 0100 0 d 00 rn:4 vd:4 0110 sz:2 0 align xm:4 \ !constraints { ($rn != $xm) && (((($d << 4)|$vd) + 3) <= 32); } \ !memory { align(8); reg($rn); } # type 0010: VST1 regs=4 VST1_m A1d 1111 0100 0 d 00 rn:4 vd:4 0010 sz:2 align:2 xm:4 \ !constraints { ($rn != $xm) && (((($d << 4)|$vd) + 4) <= 32); } \ !memory { align(32); reg($rn); } # type 1000: VST2 regs=1 inc=1 VST2_m A1a 1111 0100 0 d 00 rn:4 vd:4 1000 sz:2 align:2 xm:4 \ !constraints { ($rn != $xm) && (((($d << 4)|$vd) + 1 + 1) <= 32) && ($align != 3) && ($sz != 3); } \ !memory { align(16); reg($rn); } # type 1001: VST2 regs=1 inc=2 VST2_m A1b 1111 0100 0 d 00 rn:4 vd:4 1001 sz:2 align:2 xm:4 \ !constraints { ($rn != $xm) && (((($d << 4)|$vd) + 2 + 1) <= 32) && ($align != 3) && ($sz != 3); } \ !memory { align(16); reg($rn); } # type 0011: VST2 regs=2 inc=2 VST2_m A1c 1111 0100 0 d 00 rn:4 vd:4 0011 sz:2 align:2 xm:4 \ !constraints { ($rn != $xm) && (((($d << 4)|$vd) + 2 + 2) <= 32) && ($sz != 3); } \ !memory { align(32); reg($rn); } # type 0100: VST3 inc=1 VST3_m A1a 1111 0100 0 d 00 rn:4 vd:4 0100 sz:2 0 align xm:4 \ !constraints { ($rn != $xm) && (((($d << 4)|$vd) + 2) <= 31) && ($sz != 3); } \ !memory { align(8); reg($rn); } # type 0101: VST3 inc=2 VST3_m A1b 1111 0100 0 d 00 rn:4 vd:4 0101 sz:2 0 align xm:4 \ !constraints { ($rn != $xm) && (((($d << 4)|$vd) + 4) <= 31) && ($sz != 3); } \ !memory { align(8); reg($rn); } # type 0000: VST4 inc=1 VST4_m A1a 1111 0100 0 d 00 rn:4 vd:4 0000 sz:2 align:2 xm:4 \ !constraints { ($rn != $xm) && (((($d << 4)|$vd) + 3) <= 31) && ($sz != 3); } \ !memory { align(32); reg($rn); } # type 0001: VST4 inc=2 VST4_m A1b 1111 0100 0 d 00 rn:4 vd:4 0000 sz:2 align:2 xm:4 \ !constraints { ($rn != $xm) && (((($d << 4)|$vd) + 6) <= 31) && ($sz != 3); } \ !memory { align(32); reg($rn); } # UNDEF cases for all VST*_m: # These are the patterns for VST*_m UNDEFs on align bits being wrong # Conveniently the conditions for all the VST* line up with the top # bits of the type field. # type = 01xx align = 1x VSTn_m A1a 1111 0100 0 d 10 rn:4 vd:4 01 type:2 sz:2 1 align xm:4 # type = 10xx align = 11 VSTn_m A1b 1111 0100 0 d 10 rn:4 vd:4 10 type:2 sz:2 11 xm:4 # VST2/3/4 UNDEF on SZ=1; slightly tedious to pick these out # but they are type = xx0x or type = 0011 VSTn_m A1c 1111 0100 0 d 10 rn:4 vd:4 type:2 0 type0 11 align:2 xm:4 VSTn_m A1d 1111 0100 0 d 10 rn:4 vd:4 0011 11 align:2 xm:4 # Unused spaces in the load/store instruction encoding space # (everything else is some sort of VLDn/VSTn) # A = 0 cases: # type = 11xx : always UNDEF VLDST_UNDEF A1a 1111 0100 0 any:2 0 any2:8 11 type:2 any3:8 # type = 1011 : always UNDEF VLDST_UNDEF A1b 1111 0100 0 any:2 0 any2:8 1011 any3:8 # A = 1 cases: only stores with B=11xx, all else is allocated VLDST_UNDEF A1c 1111 0100 1 x 0 0 any:8 11 any2:10 ########### Neon Data Processing ########################## # The following sets of patterns cover the whole of the # "Advanced SIMD data-processing instructions" space # as described in DDI0406B table A7-8 and the subtables # it refers to. ########################################################### ########### Neon 3 reg same length ######################## # Instructions from the Neon "3 register same length" # space (table A7-9 in DDI0406B) # We include UNDEF combinations here; there are no # UNPREDICTABLE encodings we need to avoid. # We avoid size encodings that are fp16 under the v8.2-FP16 extension: # sz=1 for the float insns which have a 1-bit size field in bit 20, # and instead hard-wire that bit to 0. ########################################################### VHADD A1 1111 001 u 0 d sz:2 vn:4 vd:4 0000 n q m 0 vm:4 VHSUB A1 1111 001 u 0 d sz:2 vn:4 vd:4 0010 n q m 0 vm:4 VQADD A1 1111 001 u 0 d sz:2 vn:4 vd:4 0000 n q m 1 vm:4 VRHADD A1 1111 001 u 0 d sz:2 vn:4 vd:4 0001 n q m 0 vm:4 VAND A1 1111 0010 0 d 00 vn:4 vd:4 0001 n q m 1 vm:4 VBIC A1 1111 0010 0 d 01 vn:4 vd:4 0001 n q m 1 vm:4 # includes what the ARM ARM calls VMOV T1/A1, which is just VORR with vn=vm VORR A1 1111 0010 0 d 10 vn:4 vd:4 0001 n q m 1 vm:4 VORN A1 1111 0010 0 d 11 vn:4 vd:4 0001 n q m 1 vm:4 # VEOR, VBIF, VBIT and VBSL: VEOR_VBIT A1 1111 0011 0 d op:2 vn:4 vd:4 0001 n q m 1 vm:4 VQSUB A1 1111 001 u 0 d sz:2 vn:4 vd:4 0010 n q m 1 vm:4 VCGT A1 1111 001 u 0 d sz:2 vn:4 vd:4 0011 n q m 0 vm:4 VCGT A2 1111 0011 0 d 1 0 vn:4 vd:4 1110 n q m 0 vm:4 VCGE A1 1111 001 u 0 d sz:2 vn:4 vd:4 0011 n q m 1 vm:4 VCGE A2 1111 0011 0 d 0 0 vn:4 vd:4 1110 n q m 0 vm:4 VSHL A1 1111 001 u 0 d sz:2 vn:4 vd:4 0100 n q m 0 vm:4 VQSHL A1 1111 001 u 0 d sz:2 vn:4 vd:4 0100 n q m 1 vm:4 VRSHL A1 1111 001 u 0 d sz:2 vn:4 vd:4 0101 n q m 0 vm:4 VQRSHL A1 1111 001 u 0 d sz:2 vn:4 vd:4 0101 n q m 1 vm:4 VMAX A1 1111 001 u 0 d sz:2 vn:4 vd:4 0110 n q m 0 vm:4 VMIN A1 1111 001 u 0 d sz:2 vn:4 vd:4 0110 n q m 1 vm:4 VABD A1 1111 001 u 0 d sz:2 vn:4 vd:4 0111 n q m 0 vm:4 VABA A1 1111 001 u 0 d sz:2 vn:4 vd:4 0111 n q m 1 vm:4 VADD A1 1111 0010 0 d sz:2 vn:4 vd:4 1000 n q m 0 vm:4 VSUB A1 1111 0011 0 d sz:2 vn:4 vd:4 1000 n q m 0 vm:4 VTST A1 1111 0010 0 d sz:2 vn:4 vd:4 1000 n q m 1 vm:4 VCEQ A1 1111 0011 0 d sz:2 vn:4 vd:4 1000 n q m 1 vm:4 VCEQ A2 1111 0010 0 d 0 0 vn:4 vd:4 1110 n q m 0 vm:4 VMLA A1 1111 001 op 0 d sz:2 vn:4 vd:4 1001 n q m 0 vm:4 VMUL A1 1111 001 op 0 d sz:2 vn:4 vd:4 1001 n q m 1 vm:4 VPMAX A1 1111 001 u 0 d sz:2 vn:4 vd:4 1010 n q m 0 vm:4 VPMIN A1 1111 001 u 0 d sz:2 vn:4 vd:4 1010 n q m 1 vm:4 VQDMULH A1 1111 0010 0 d sz:2 vn:4 vd:4 1011 n q m 0 vm:4 VQRDMULH A1 1111 0011 0 d sz:2 vn:4 vd:4 1011 n q m 0 vm:4 VPADD A1 1111 0010 0 d sz:2 vn:4 vd:4 1011 n q m 1 vm:4 # NB: VFM is VFPv4 only. There is no Neon encoding for VFNM. VFM A1 1111 0010 0 d op 0 vn:4 vd:4 1100 n q m 1 vm:4 VADD_float A1 1111 0010 0 d 0 0 vn:4 vd:4 1101 n q m 0 vm:4 VSUB_float A1 1111 0010 0 d 1 0 vn:4 vd:4 1101 n q m 0 vm:4 VPADD_float A1 1111 0011 0 d 0 0 vn:4 vd:4 1101 n q m 0 vm:4 VABD_float A1 1111 0011 0 d 1 0 vn:4 vd:4 1101 n q m 0 vm:4 VMLA_float A1 1111 0010 0 d 0 0 vn:4 vd:4 1101 n q m 1 vm:4 VMLS_float A1 1111 0010 0 d 1 0 vn:4 vd:4 1101 n q m 1 vm:4 VMUL_float A1 1111 0011 0 d 0 0 vn:4 vd:4 1101 n q m 1 vm:4 VACGE A1 1111 0011 0 d 0 0 vn:4 vd:4 1110 n q m 1 vm:4 VACGT A1 1111 0011 0 d 1 0 vn:4 vd:4 1110 n q m 1 vm:4 VMAX_float A1 1111 0010 0 d 0 0 vn:4 vd:4 1111 n q m 0 vm:4 VMIN_float A1 1111 0010 0 d 1 0 vn:4 vd:4 1111 n q m 0 vm:4 VPMAX_float A1 1111 0011 0 d 0 0 vn:4 vd:4 1111 n q m 0 vm:4 VPMIN_float A1 1111 0011 0 d 1 0 vn:4 vd:4 1111 n q m 0 vm:4 VRECPS A1 1111 0010 0 d 0 0 vn:4 vd:4 1111 n q m 1 vm:4 VRSQRTS A1 1111 0010 0 d 1 0 vn:4 vd:4 1111 n q m 1 vm:4 ########### Neon 1 reg + modified immediate ############### # Instructions from the Neon "1 reg + modified immediate" # space (table A7-14 in DDI0406B) # We include UNDEF combinations here. There are some # UNPREDICTABLEs in the constant encoding: # abcdefgh == 0 and cmode == 001x 010x 011x 101x 110x ########################################################### # We don't try to break these down into the separate # VMOV, VORR, VMVN, VBIC ops (which are encoded in 'op' # and 'cmode'); we just have a single pattern with a constraint # which avoids the UNPREDICTABLE space. Vimm A1 1111 001 imm1 1 d 000 imm3:3 vd:4 cmode:4 0 q op 1 imm4:4 \ !constraints { $imm1 != 0 || $imm3 != 0 || $imm4 != 0 || ($cmode & 0xe) == 0 || ($cmode & 0xe) == 8 || ($cmode & 0xe == 0xe); } ########### Neon 2 regs + shift ########################### # Instructions from the Neon "2 regs + shift" space # (table A7-12 in DDI0406B) # UNDEF cases included (but not generally the ones that # fall in gaps in the table). # NB L:imm == 0000xxx is in one-reg+modified-imm space. ########################################################### VSHR A1 1111 001 u 1 d imm:6 vd:4 0000 l q m 1 vm:4 { $l != 0 || ($imm & 0x38) != 0; } VSRA A1 1111 001 u 1 d imm:6 vd:4 0001 l q m 1 vm:4 { $l != 0 || ($imm & 0x38) != 0; } VRSHR A1 1111 001 u 1 d imm:6 vd:4 0010 l q m 1 vm:4 { $l != 0 || ($imm & 0x38) != 0; } VRSRA A1 1111 001 u 1 d imm:6 vd:4 0011 l q m 1 vm:4 { $l != 0 || ($imm & 0x38) != 0; } VSRI A1 1111 0011 1 d imm:6 vd:4 0100 l q m 1 vm:4 { $l != 0 || ($imm & 0x38) != 0; } VSHL_imm A1 1111 0010 1 d imm:6 vd:4 0101 l q m 1 vm:4 { $l != 0 || ($imm & 0x38) != 0; } VSLI A1 1111 0011 1 d imm:6 vd:4 0101 l q m 1 vm:4 { $l != 0 || ($imm & 0x38) != 0; } VQSHL_imm A1 1111 001 u 1 d imm:6 vd:4 011 op l q m 1 vm:4 { $l != 0 || ($imm & 0x38) != 0; } # this includes VSHRN (if U=0 and op=0) VQSHRN A1 1111 001 u 1 d imm:6 vd:4 100 op 0 0 m 1 vm:4 { ($imm & 0x38) != 0; } # this includes VRSHRN (if U=0 and op=0) VQRSHRN A1 1111 001 u 1 d imm:6 vd:4 100 op 0 1 m 1 vm:4 { ($imm & 0x38) != 0; } # includes VMOVL where the shift amount is zero VSHLL A1 1111 001 u 1 d imm:6 vd:4 1010 0 0 m 1 vm:4 { ($imm & 0x38) != 0; } VCVT A1 1111 001 u 1 d imm:6 vd:4 111 op 0 q m 1 vm:4 { ($imm & 0x38) != 0; } ########### Neon 3 regs different lengths ################# # Instructions from the Neon "3 regs different lengths" # space (table A7-10 in DDI0406B) # UNDEF cases included. # sz = 11 is in vext/vtbl/vtbx/vdup/2reg-misc space. ########################################################### VADDL A1 1111 001 u 1 d sz:2 vn:4 vd:4 000 0 n 0 m 0 vm:4 { $sz != 3; } VADDW A1 1111 001 u 1 d sz:2 vn:4 vd:4 000 1 n 0 m 0 vm:4 { $sz != 3; } VSUBL A1 1111 001 u 1 d sz:2 vn:4 vd:4 001 0 n 0 m 0 vm:4 { $sz != 3; } VSUBW A1 1111 001 u 1 d sz:2 vn:4 vd:4 001 1 n 0 m 0 vm:4 { $sz != 3; } VADDHN A1 1111 0010 1 d sz:2 vn:4 vd:4 0100 n 0 m 0 vm:4 { $sz != 3; } VRADDHN A1 1111 0011 1 d sz:2 vn:4 vd:4 0100 n 0 m 0 vm:4 { $sz != 3; } VABAL A2 1111 001 u 1 d sz:2 vn:4 vd:4 0101 n 0 m 0 vm:4 { $sz != 3; } VSUBHN A1 1111 0010 1 d sz:2 vn:4 vd:4 0110 n 0 m 0 vm:4 { $sz != 3; } VRSUBHN A1 1111 0011 1 d sz:2 vn:4 vd:4 0110 n 0 m 0 vm:4 { $sz != 3; } VABDL A2 1111 001 u 1 d sz:2 vn:4 vd:4 0111 n 0 m 0 vm:4 { $sz != 3; } VMLAL A2 1111 001 u 1 d sz:2 vn:4 vd:4 10 0 0 n 0 m 0 vm:4 { $sz != 3; } VMLSL A2 1111 001 u 1 d sz:2 vn:4 vd:4 10 1 0 n 0 m 0 vm:4 { $sz != 3; } VQDMLAL A1 1111 0010 1 d sz:2 vn:4 vd:4 10 0 1 n 0 m 0 vm:4 { $sz != 3; } VQDMLSL A1 1111 0010 1 d sz:2 vn:4 vd:4 10 1 1 n 0 m 0 vm:4 { $sz != 3; } VMULL A2 1111 001 u 1 d sz:2 vn:4 vd:4 11 op 0 n 0 m 0 vm:4 { $sz != 3; } VQDMULL A1 1111 0010 1 d sz:2 vn:4 vd:4 1101 n 0 m 0 vm:4 { $sz != 3; } ########### Neon 2 regs + scalar ########################## # Instructions from the Neon "2 regs + scalar" space # (table A7-11 in DDI0406B) # UNDEF cases included. # sz = 11 is in vext/vtbl/vtbx/vdup/2reg-misc space. # We avoid f=1 sz=01 which is v8.2-FP16 ########################################################### # includes float variants VMLA_scalar A1 1111 001 q 1 d sz:2 vn:4 vd:4 0 0 0 f n 1 m 0 vm:4 \ { $sz != 3 && ($f == 0 || $sz != 1); } VMLS_scalar A1 1111 001 q 1 d sz:2 vn:4 vd:4 0 1 0 f n 1 m 0 vm:4 \ { $sz != 3 && ($f == 0 || $sz != 1); } VMLAL_scalar A2 1111 001 u 1 d sz:2 vn:4 vd:4 0 0 1 0 n 1 m 0 vm:4 { $sz != 3; } VMLSL_scalar A2 1111 001 u 1 d sz:2 vn:4 vd:4 0 1 1 0 n 1 m 0 vm:4 { $sz != 3; } VQDMLAL_scalar A2 1111 0010 1 d sz:2 vn:4 vd:4 0 0 11 n 1 m 0 vm:4 { $sz != 3; } VQDMLSL_scalar A2 1111 0010 1 d sz:2 vn:4 vd:4 0 1 11 n 1 m 0 vm:4 { $sz != 3; } VMUL_scalar A1 1111 001 q 1 d sz:2 vn:4 vd:4 100 f n 1 m 0 vm:4 \ { $sz != 3 && ($f == 0 || $sz != 1); } VMULL_scalar A2 1111 001 u 1 d sz:2 vn:4 vd:4 1010 n 1 m 0 vm:4 { $sz != 3; } VQDMULL_scalar A2 1111 0010 1 d sz:2 vn:4 vd:4 1011 n 1 m 0 vm:4 { $sz != 3; } VQDMULH_scalar A2 1111 001 q 1 d sz:2 vn:4 vd:4 1100 n 1 m 0 vm:4 { $sz != 3; } VQRDMULH_scalar A2 1111 001 q 1 d sz:2 vn:4 vd:4 1101 n 1 m 0 vm:4 { $sz != 3; } ########### Neon 2 regs miscellaneous ##################### # Instructions from the Neon "2 regs miscellaneous" space # (table A7-13 in DDI0406B) # UNDEF cases included. # We avoid f=1 sz=01 which is v8.2-FP16 ########################################################### VREV A1 1111 0011 1 d 11 sz:2 00 vd:4 000 op:2 q m 0 vm:4 VPADDL A1 1111 0011 1 d 11 sz:2 00 vd:4 0010 op q m 0 vm:4 VCLS A1 1111 0011 1 d 11 sz:2 00 vd:4 0 1000 q m 0 vm:4 VCLZ A1 1111 0011 1 d 11 sz:2 00 vd:4 0 1001 q m 0 vm:4 VCNT A1 1111 0011 1 d 11 sz:2 00 vd:4 0 1010 q m 0 vm:4 VMVN A1 1111 0011 1 d 11 sz:2 00 vd:4 0 1011 q m 0 vm:4 VPADAL A1 1111 0011 1 d 11 sz:2 00 vd:4 0110 op q m 0 vm:4 VQABS A1 1111 0011 1 d 11 sz:2 00 vd:4 0111 0 q m 0 vm:4 VQNEG A1 1111 0011 1 d 11 sz:2 00 vd:4 0111 1 q m 0 vm:4 VCGT0 A1 1111 0011 1 d 11 sz:2 01 vd:4 0 f 000 q m 0 vm:4 { $f == 0 || $sz != 1; } VCGE0 A1 1111 0011 1 d 11 sz:2 01 vd:4 0 f 001 q m 0 vm:4 { $f == 0 || $sz != 1; } VCEQ0 A1 1111 0011 1 d 11 sz:2 01 vd:4 0 f 010 q m 0 vm:4 { $f == 0 || $sz != 1; } VCLE0 A1 1111 0011 1 d 11 sz:2 01 vd:4 0 f 011 q m 0 vm:4 { $f == 0 || $sz != 1; } VCLT0 A1 1111 0011 1 d 11 sz:2 01 vd:4 0 f 100 q m 0 vm:4 { $f == 0 || $sz != 1; } VABS A1 1111 0011 1 d 11 sz:2 01 vd:4 0 f 110 q m 0 vm:4 { $f == 0 || $sz != 1; } VNEG A1 1111 0011 1 d 11 sz:2 01 vd:4 0 f 111 q m 0 vm:4 { $f == 0 || $sz != 1; } VSWP A1 1111 0011 1 d 11 sz:2 10 vd:4 00000 q m 0 vm:4 # d == m gives UNKNOWN results, so avoid it VTRN A1 1111 0011 1 d 11 sz:2 10 vd:4 00001 q m 0 vm:4 { ($d != $m) || ($vd != $vm); } # d == m gives UNKNOWN results, so avoid it VUZP A1 1111 0011 1 d 11 sz:2 10 vd:4 00010 q m 0 vm:4 { ($d != $m) || ($vd != $vm); } VZIP A1 1111 0011 1 d 11 sz:2 10 vd:4 00011 q m 0 vm:4 { ($d != $m) || ($vd != $vm); } # includes VMOVN, VQMOVUN VQMOVN A1 1111 0011 1 d 11 sz:2 10 vd:4 0010 op:2 m 0 vm:4 VSHLL A2 1111 0011 1 d 11 sz:2 10 vd:4 0011 0 0 m 0 vm:4 # float-halfprec (A8.6.299) # NB that half-precision needs at least an A9; A8 doesn't have it VCVT_half A1 1111 0011 1 d 11 sz:2 10 vd:4 011 op 0 0 m 0 vm:4 VRECPE A1 1111 0011 1 d 11 sz:2 11 vd:4 010 f 0 q m 0 vm:4 { $f == 0 || $sz != 1; } VRSQRTE A1 1111 0011 1 d 11 sz:2 11 vd:4 010 f 1 q m 0 vm:4 { $f == 0 || $sz != 1; } # float to int, neon versions (A8.6.294); avoid sz=01 which is FP16 VCVT_neon A1 1111 0011 1 d 11 sz:2 11 vd:4 0 11 op:2 q m 0 vm:4 { $sz != 1; } ########### Neon other #################################### # Instructions which have their own entry in the top level # Neon data processing instructions decode table A7-8: # VEXT, VTBL, VTBX, VDUP (scalar) ########################################################### VEXT A1 1111 0010 1 d 11 vn:4 vd:4 imm:4 n q m 0 vm:4 # VTBL and VTBX; n + length > 32 is UNPREDICTABLE VTBL A1 1111 0011 1 d 11 vn:4 vd:4 1 0 len:2 n op m 0 vm:4 \ !constraints { (($n << 4) | $vn) + $len + 1 <= 32; } VDUP_scalar A1 1111 0011 1 d 11 imm:4 vd:4 11000 q m 0 vm:4 ########################################################### # End of Neon Data Processing instruction patterns. ########################################################### ########### VFP Data Processing ########################### # The following sets of patterns cover the whole of the # "VFP data-processing instructions" space # as described in DDI0406B table A7-16 and the subtables # it refers to. # These don't include fp16, which has [11:9] 0b100 # (described in the Arm ARM as [11:9] 0b10 and a 2-bit size field) ########################################################### # VMLA, VMLS VMLA A2 cond:4 11100 d 00 vn:4 vd:4 101 sz n op m 0 vm:4 # VNMLA, VNMLS VNMLA A1 cond:4 11100 d 01 vn:4 vd:4 101 sz n op m 0 vm:4 VNMUL A2 cond:4 11100 d 10 vn:4 vd:4 101 sz n 1 m 0 vm:4 VMUL A2 cond:4 11100 d 10 vn:4 vd:4 101 sz n 0 m 0 vm:4 VADD A2 cond:4 11100 d 11 vn:4 vd:4 101 sz n 0 m 0 vm:4 VSUB A2 cond:4 11100 d 11 vn:4 vd:4 101 sz n 1 m 0 vm:4 VDIV A1 cond:4 11101 d 00 vn:4 vd:4 101 sz n 0 m 0 vm:4 # Other VFP data processing instructions (opc1 1x11) # We don't check that SBO/SBZ bits here UNDEF if wrong. VMOV_imm A2 cond:4 11101 d 11 immh:4 vd:4 101 sz 0000 imml:4 VMOV A2 cond:4 11101 d 11 0000 vd:4 101 sz 0 1 m 0 vm:4 VABS A2 cond:4 11101 d 11 0000 vd:4 101 sz 1 1 m 0 vm:4 VNEG A2 cond:4 11101 d 11 0001 vd:4 101 sz 0 1 m 0 vm:4 VSQRT A1 cond:4 11101 d 11 0001 vd:4 101 sz 1 1 m 0 vm:4 # VCVTB, VCVTT (A8.6.300) [requires half-precision extension] VCVT_B_TT A1 cond:4 1110 1 d 11 001 op vd:4 101 0 t 1 m 0 vm:4 VCMP A1 cond:4 11101 d 11 0100 vd:4 101 sz e 1 m 0 vm:4 VCMP A2 cond:4 11101 d 11 0101 vd:4 101 sz e 1 0 0 0000 # VCVT between double and single (A8.6.298) VCVT_298 A1 cond:4 1110 1 d 11 0111 vd:4 101 sz 1 1 m 0 vm:4 # VCVT between fp and int: split in two because opc2 must be 000 or 10x (A8.6.295) VCVT_a A1 cond:4 11101 d 111 000 vd:4 101 sz op 1 m 0 vm:4 VCVT_b A1 cond:4 11101 d 111 10 x vd:4 101 sz op 1 m 0 vm:4 # VCVT between fp and fixed point (A.8.6.297) # Ugh. UNPREDICTABLE unless the 32 bit int formed by imm4:i is at least # 16 (if sx is 0) or 32 (if sx is 1). That is, if sx==0 then either # bit 3 must be 0 or bits 2..0 and 5 must be 0. # sx==1 case first: VCVT_c A1 cond:4 11101 d 111 op 1 u vd:4 101 sf 1 1 i 0 imm:4 # sx==0, bit 3 == 0 VCVT_d A1 cond:4 11101 d 111 op 1 u vd:4 101 sf 0 1 i 0 0 imm:3 # sx==0, bit 3 == 1, bits 2..0 and 5 0 VCVT_e A1 cond:4 11101 d 111 op 1 u vd:4 101 sf 0 1 0 0 1000 # VFPv4 fused multiply-add VFM A2 cond:4 11101 d 10 vn:4 vd:4 101 sz n op m 0 vm:4 VFNM A1 cond:4 11101 d 01 vn:4 vd:4 101 sz n op m 0 vm:4 # UNDEF patterns in VFP data processing space (not currently checked): # opc1 1x00 opc3 x1 # opc1 1x11 opc2 0110 opc3 x1 # opc1 1x11 opc2 0111 opc3 01 # opc1 1x11 opc2 1001 opc3 x1 ########### Extension register load/store ################# # The following sets of patterns cover: # 'extension register load/store insns' (A7.6) # as described in DDI0406B ########################################################### # Note that the ARM ARM treats VPUSH/VPOP as special cases # but in fact they are the same as VLDM/VSTM # VSTM of 64 bit regs # NB that the constraints on imm are to avoid UNPREDICTABLEs. # We force the high bits of imm to 0 in the pattern to avoid # pointlessly generating things that fail the constraint anyway. # postincrement (U == 1) VSTM A1a cond:4 110 p 1 d w 0 rn:4 vd:4 1011 00 imm:5 x \ !constraints { $p != 1 && $imm != 0 && $imm <= 16 && ((($d << 4) | $vd) + $imm) <= 32; } \ !memory { reg($rn); } # predecrement (U == 0) VSTM A1b cond:4 110 p 0 d w 0 rn:4 vd:4 1011 00 imm:5 x \ !constraints { $p == 1 && $w == 1 && $imm != 0 && $imm <= 16 && ((($d << 4) | $vd) + $imm) <= 32; } \ !memory { reg_minus_imm($rn, $imm * 8); } # VSTM of 32 bit regs # postincrement (U == 1) VSTM A2a cond:4 110 p 1 d w 0 rn:4 vd:4 1010 00 imm:6 \ !constraints { $p != 1 && $imm != 0 && ((($vd << 1) | $d) + $imm) <= 32; } \ !memory { reg($rn); } # predecrement (U == 0) VSTM A2b cond:4 110 p 0 d w 0 rn:4 vd:4 1010 00 imm:6 \ !constraints { $p == 1 && $w == 1 && $imm != 0 && ((($vd << 1) | $d) + $imm) <= 32; } \ !memory { reg_minus_imm($rn, $imm * 8); } # UNDEF cases for both A1 and A2: P==U && W==1 VSTM A1c cond:4 110 p u d 1 0 rn:4 vd:4 101 x imm:8 !constraints { $p == $u; } # VLDM VLDM A1a cond:4 110 p 1 d w 1 rn:4 vd:4 1011 00 imm:5 x \ !constraints { $p != 1 && $imm != 0 && $imm <= 16 && ((($d << 4) | $vd) + $imm) <= 32; } \ !memory { reg($rn); } # predecrement (U == 0) VLDM A1b cond:4 110 p 0 d w 1 rn:4 vd:4 1011 00 imm:5 x \ !constraints { $p == 1 && $w == 1 && $imm != 0 && $imm <= 16 && ((($d << 4) | $vd) + $imm) <= 32; } \ !memory { reg_minus_imm($rn, $imm * 8); } # VLDM of 32 bit regs # postincrement (U == 1) VLDM A2a cond:4 110 p 1 d w 1 rn:4 vd:4 1010 00 imm:6 \ !constraints { $p != 1 && $imm != 0 && ((($vd << 1) | $d) + $imm) <= 32; } \ !memory { reg($rn); } # predecrement (U == 0) VLDM A2b cond:4 110 p 0 d w 1 rn:4 vd:4 1010 00 imm:6 \ !constraints { $p == 1 && $w == 1 && $imm != 0 && ((($vd << 1) | $d) + $imm) <= 32; } \ !memory { reg_minus_imm($rn, $imm * 8); } # UNDEF cases for both A1 and A2: P==U && W==1 VLDM A1c cond:4 110 p u d 1 1 rn:4 vd:4 101 x imm:8 !constraints { $p == $u; } # VSTR (no overlap with VSTR_f16) # both A1 and A2 encodings, U = 1 VSTR A1a cond:4 1101 1 d 00 rn:4 vd:4 101 x imm:8 \ !memory { reg_plus_imm($rn, $imm * 4); } # both A1 and A2 encodings, U = 0 VSTR A1b cond:4 1101 0 d 00 rn:4 vd:4 101 x imm:8 \ !memory { reg_minus_imm($rn, $imm * 4); } # VLDR (no overlap with VLDR_f16) # both A1 and A2 encodings, U = 1 VLDR A1a cond:4 1101 1 d 01 rn:4 vd:4 101 x imm:8 \ !memory { reg_plus_imm($rn, $imm * 4); } # both A1 and A2 encodings, U = 0 VLDR A1b cond:4 1101 0 d 01 rn:4 vd:4 101 x imm:8 \ !memory { reg_minus_imm($rn, $imm * 4); } ########### Extension register transfer ################### # The following sets of patterns cover: # '8, 16 and 32-bit transfer between ARM core and # extension registers' (A7.8) # as described in DDI0406C # with the exception of VMSR/VMRS. ########################################################### VMOV_core_single A1 cond:4 1110 000 op:1 vd:4 rt:4 1010 n:1 0010000 VMOV_core_scalar A1 cond:4 1110 0 opc:2 0 vd:4 rt:4 1011 d:1 opc2:2 10000 VMOV_scalar_core A1 cond:4 1110 u:1 opc:2 1 vn:4 rt:4 1011 n:1 opc2:2 10000 # vector duplicate (reg) # b:e == 11 UNDEF VDUP A1a cond:4 1110 1 b 1 0 vd:3 0 rt:4 1011 d 0 e 1 0000 { ($b == 0) || ($e == 0); } VDUP A1b cond:4 1110 1 b 0 0 vd:4 rt:4 1011 d 0 e 1 0000 { ($b == 0) || ($e == 0); } ########### Extension register transfer ################### # The following sets of patterns cover: # '64-bit transfers between ARM core and extension # registers' (A7.8) # as described in DDI0406C ########################################################### VMOV_core_2single A1 cond:4 1100 010 op:1 rt2:4 rt:4 1010 00 m:1 1 vm:4 { ($vm != 0xf || $m != 1) && ($op == 0 || $rt2 != $rt); } VMOV_core_double A1 cond:4 1100 010 op:1 rt2:4 rt:4 1011 00 m:1 1 vm:4 { $op == 0 || $rt2 != $rt; } ##### # v8 only insns # VSEL VSEL A1 1111 11100 d cc:2 vn:4 vd:4 101 sz n 0 m 0 vm:4 # VMINNM and VMAXNM # neon: sz=0 (avoiding sz=1 which is FP16) VMINMAXNM A1 1111 00110 d op 0 vn:4 vd:4 1111 n q m 1 vm:4 # vfp (does not overlap with FP16) VMINMAXNM A2 1111 11101 d 00 vn:4 vd:4 101 sz n op m 0 vm:4 # Crypto # AESD, AESE, AESIMC, AESMC AESD A1 1111 0011 1 d 11 sz:2 00 vd:4 0011 01 m 0 vm:4 AESE A1 1111 0011 1 d 11 sz:2 00 vd:4 0011 00 m 0 vm:4 AESIMC A1 1111 0011 1 d 11 sz:2 00 vd:4 0011 11 m 0 vm:4 AESMC A1 1111 0011 1 d 11 sz:2 00 vd:4 0011 10 m 0 vm:4 # SHA1, SHA256 SHA1C A1 1111 00100 d 00 vn:4 vd:4 1100 n q m 0 vm:4 SHA1M A1 1111 00100 d 10 vn:4 vd:4 1100 n q m 0 vm:4 SHA1P A1 1111 00100 d 01 vn:4 vd:4 1100 n q m 0 vm:4 SHA1SU0 A1 1111 00100 d 11 vn:4 vd:4 1100 n q m 0 vm:4 SHA256H A1 1111 00110 d 00 vn:4 vd:4 1100 n q m 0 vm:4 SHA256H2 A1 1111 00110 d 01 vn:4 vd:4 1100 n q m 0 vm:4 SHA256SU1 A1 1111 00110 d 10 vn:4 vd:4 1100 n q m 0 vm:4 SHA1H A1 1111 00111 d 11 sz:2 01 vd:4 0 01011 m 0 vm:4 SHA1SU1 A1 1111 00111 d 11 sz:2 10 vd:4 0 01110 m 0 vm:4 SHA256SU0 A1 1111 00111 d 11 sz:2 10 vd:4 0 01111 m 0 vm:4 # long polynomial multiply: op == 1, sz == 10 VMULL_P64 A2 1111 001 u 1 d 10 vn:4 vd:4 11 1 0 n 0 m 0 vm:4 # rounding, VFP: VRINTX, VRINTZ, VRINTR, VRINT[ANPM] VRINTX A1 cond:4 11101 d 110111 vd:4 101 sz 0 1 m 0 vm:4 VRINTZR A1 cond:4 11101 d 110110 vd:4 101 sz op 1 m 0 vm:4 VRINTANPM A1 1111 11101 d 1110 rmode:2 vd:4 101 sz 0 1 m 0 vm:4 # rounding, Neon: VRINTX_neon A1 1111 00111 d 11 sz:2 10 vd:4 01001 q m 0 vm:4 VRINTZ_neon A1 1111 00111 d 11 sz:2 10 vd:4 01011 q m 0 vm:4 VRINTANPM_neon A1 1111 00111 d 11 sz:2 10 vd:4 01 op:3 q m 0 vm:4 # VCVT with rounding mode specified VCVT_rm A1 1111 11101 d 1111 rm:2 vd:4 101 sz op 1 m 0 vm:4 # 64<->16 conversions (see also pattern earlier which is the sz==0 case) VCVT_B_TT_64 A1 cond:4 1110 1 d 11 001 op vd:4 101 1 t 1 m 0 vm:4 # VCVT with rounding mode specified, neon; avoid sz=0b01 which is FP16 VCVT_rm_neon A1 1111 00111 d 11 size:2 11 vd:4 00 rm:2 op q m 0 vm:4 { $size != 1; } # CRC # Note that sz == 0b11 is UNPREDICTABLE (either UNDEF, NOP or as if == 0b10) # as is cond != 1110 (either UNDEF, NOP, cond-exec or unconditional exec) CRC32 A1 1110 00010 sz:2 0 rn:4 rd:4 00 c 0 0100 rm:4 !constraints { $sz != 3; } # # ARMv8.1 extensions # @v8_1_simd VQRDMLAH A1 111100110 d:1 size:2 vn:4 vd:4 1011 n:1 q:1 m:1 1 vm:4 VQRDMLAH_s A1 1111001 q:1 1 d:1 size:2 vn:4 vd:4 1110 n:1 1 m:1 0 vm:4 VQRDMLSH A1 111100110 d:1 size:2 vn:4 vd:4 1100 n:1 q:1 m:1 1 vm:4 VQRDMLSH_s A1 1111001 q:1 1 d:1 size:2 vn:4 vd:4 1111 n:1 1 m:1 0 vm:4 # # ARMv8.2 extensions # @v8_2_dp VUDOT A1 1111110 00 d:1 10 vn:4 vd:4 1101 n:1 q:1 m:1 1 vm:4 VSDOT A1 1111110 00 d:1 10 vn:4 vd:4 1101 n:1 q:1 m:1 0 vm:4 VUDOT_s A1 11111110 0 d:1 10 vn:4 vd:4 1101 n:1 q:1 m:1 1 vm:4 VSDOT_s A1 11111110 0 d:1 10 vn:4 vd:4 1101 n:1 q:1 m:1 0 vm:4 @v8_2_fhm VFMAL A1 1111110 00 d:1 10 vn:4 vd:4 1000 n:1 q:1 m:1 1 vm:4 VFMSL A1 1111110 01 d:1 10 vn:4 vd:4 1000 n:1 q:1 m:1 1 vm:4 VFMAL_s A1 11111110 0 d:1 00 vn:4 vd:4 1000 n:1 q:1 m:1 1 vm:4 VFMSL_s A1 11111110 1 d:1 00 vn:4 vd:4 1000 n:1 q:1 m:1 1 vm:4 @v8_2_fp16 # v8.2-FP16 adds a lot of "and 16-bit flavour" to existing insn encodings; # these patterns are arranged in the same order as the earlier v7 patterns. # Pattern names follow the non-fp16 names with a _f16 suffix. # FP16: neon 3-reg-same: bit 20 sz=1 for f16 VCGT_f16 A2 1111 0011 0 d 1 1 vn:4 vd:4 1110 n q m 0 vm:4 VCGE_f16 A2 1111 0011 0 d 0 1 vn:4 vd:4 1110 n q m 0 vm:4 VCEQ_f16 A2 1111 0010 0 d 0 1 vn:4 vd:4 1110 n q m 0 vm:4 VFM_f16 A1 1111 0010 0 d op 1 vn:4 vd:4 1100 n q m 1 vm:4 VADD_float_f16 A1 1111 0010 0 d 0 1 vn:4 vd:4 1101 n q m 0 vm:4 VSUB_float_f16 A1 1111 0010 0 d 1 1 vn:4 vd:4 1101 n q m 0 vm:4 VPADD_float_f16 A1 1111 0011 0 d 0 1 vn:4 vd:4 1101 n q m 0 vm:4 VABD_float_f16 A1 1111 0011 0 d 1 1 vn:4 vd:4 1101 n q m 0 vm:4 VMLA_float_f16 A1 1111 0010 0 d 0 1 vn:4 vd:4 1101 n q m 1 vm:4 VMLS_float_f16 A1 1111 0010 0 d 1 1 vn:4 vd:4 1101 n q m 1 vm:4 VMUL_float_f16 A1 1111 0011 0 d 0 1 vn:4 vd:4 1101 n q m 1 vm:4 VACGE_f16 A1 1111 0011 0 d 0 1 vn:4 vd:4 1110 n q m 1 vm:4 VACGT_f16 A1 1111 0011 0 d 1 1 vn:4 vd:4 1110 n q m 1 vm:4 VMAX_float_f16 A1 1111 0010 0 d 0 1 vn:4 vd:4 1111 n q m 0 vm:4 VMIN_float_f16 A1 1111 0010 0 d 1 1 vn:4 vd:4 1111 n q m 0 vm:4 VPMAX_float_f16 A1 1111 0011 0 d 0 1 vn:4 vd:4 1111 n q m 0 vm:4 VPMIN_float_f16 A1 1111 0011 0 d 1 1 vn:4 vd:4 1111 n q m 0 vm:4 VRECPS_f16 A1 1111 0010 0 d 0 1 vn:4 vd:4 1111 n q m 1 vm:4 VRSQRTS_f16 A1 1111 0010 0 d 1 1 vn:4 vd:4 1111 n q m 1 vm:4 # FP16: neon 2-reg-scalar : f=1 sz=01 VMLA_scalar_f16 A1 1111 001 q 1 d 01 vn:4 vd:4 0 0 0 f n 1 m 0 vm:4 VMLS_scalar_f16 A1 1111 001 q 1 d 01 vn:4 vd:4 0 1 0 f n 1 m 0 vm:4 VMUL_scalar_f16 A1 1111 001 q 1 d 01 vn:4 vd:4 100 f n 1 m 0 vm:4 # FP16: Neon 2-reg-shift # this doesn't overlap with the non-fp16 insn, which has 111 in [11:9] VCVT_f16 A1 1111 001 u 1 d imm:6 vd:4 110 op 0 q m 1 vm:4 { ($imm & 0x38) != 0; } # FP16: neon 2-reg-misc: f=1 sz=01 VCGT0_f16 A1 1111 0011 1 d 11 01 01 vd:4 0 f 000 q m 0 vm:4 VCGE0_f16 A1 1111 0011 1 d 11 01 01 vd:4 0 f 001 q m 0 vm:4 VCEQ0_f16 A1 1111 0011 1 d 11 01 01 vd:4 0 f 010 q m 0 vm:4 VCLE0_f16 A1 1111 0011 1 d 11 01 01 vd:4 0 f 011 q m 0 vm:4 VCLT0_f16 A1 1111 0011 1 d 11 01 01 vd:4 0 f 100 q m 0 vm:4 VABS_f16 A1 1111 0011 1 d 11 01 01 vd:4 0 f 110 q m 0 vm:4 VNEG_f16 A1 1111 0011 1 d 11 01 01 vd:4 0 f 111 q m 0 vm:4 VRECPE_f16 A1 1111 0011 1 d 11 01 11 vd:4 010 f 0 q m 0 vm:4 VRSQRTE_f16 A1 1111 0011 1 d 11 01 11 vd:4 010 f 1 q m 0 vm:4 VCVT_neon_f16 A1 1111 0011 1 d 11 01 11 vd:4 0 11 op:2 q m 0 vm:4 # FP16: vfp: these have no overlap with non-fp16 patterns, where [11:9] is 101 VMLA_f16 A2 cond:4 11100 d 00 vn:4 vd:4 1001 n op m 0 vm:4 VNMLA_f16 A1 cond:4 11100 d 01 vn:4 vd:4 1001 n op m 0 vm:4 VNMUL_f16 A2 cond:4 11100 d 10 vn:4 vd:4 1001 n 1 m 0 vm:4 VMUL_f16 A2 cond:4 11100 d 10 vn:4 vd:4 1001 n 0 m 0 vm:4 VADD_f16 A2 cond:4 11100 d 11 vn:4 vd:4 1001 n 0 m 0 vm:4 VSUB_f16 A2 cond:4 11100 d 11 vn:4 vd:4 1001 n 1 m 0 vm:4 VDIV_f16 A1 cond:4 11101 d 00 vn:4 vd:4 1001 n 0 m 0 vm:4 VMOV_imm_f16 A2 cond:4 11101 d 11 immh:4 vd:4 1001 0000 imml:4 VABS_f16 A2 cond:4 11101 d 11 0000 vd:4 1001 1 1 m 0 vm:4 VNEG_f16 A2 cond:4 11101 d 11 0001 vd:4 1001 0 1 m 0 vm:4 VSQRT_f16 A1 cond:4 11101 d 11 0001 vd:4 1001 1 1 m 0 vm:4 VCMP_f16 A1 cond:4 11101 d 11 0100 vd:4 1001 e 1 m 0 vm:4 VCMP_f16 A2 cond:4 11101 d 11 0101 vd:4 1001 e 1 0 0 0000 VCVT_a_f16 A1 cond:4 11101 d 111 000 vd:4 1001 op 1 m 0 vm:4 VCVT_b_f16 A1 cond:4 11101 d 111 10 x vd:4 1001 op 1 m 0 vm:4 # VCVT between fp and fixed point (A.8.6.297); same UNPREDICTABLE as non-fp16 # sx==1 case first: VCVT_c_f16 A1 cond:4 11101 d 111 op 1 u vd:4 1001 1 1 i 0 imm:4 # sx==0, bit 3 == 0 VCVT_d_f16 A1 cond:4 11101 d 111 op 1 u vd:4 1001 0 1 i 0 0 imm:3 # sx==0, bit 3 == 1, bits 2..0 and 5 0 VCVT_e_f16 A1 cond:4 11101 d 111 op 1 u vd:4 1001 0 1 0 0 1000 VFM_f16 A2 cond:4 11101 d 10 vn:4 vd:4 1001 n op m 0 vm:4 VFNM_f16 A1 cond:4 11101 d 01 vn:4 vd:4 1001 n op m 0 vm:4 # both A1 and A2 encodings, U = 1 VSTR_f16 A1a cond:4 1101 1 d 00 rn:4 vd:4 1001 imm:8 \ !memory { reg_plus_imm($rn, $imm * 2); } # both A1 and A2 encodings, U = 0 VSTR_f16 A1b cond:4 1101 0 d 00 rn:4 vd:4 1001 imm:8 \ !memory { reg_minus_imm($rn, $imm * 2); } # both A1 and A2 encodings, U = 1 VLDR_f16 A1a cond:4 1101 1 d 01 rn:4 vd:4 1001 imm:8 \ !memory { reg_plus_imm($rn, $imm * 2); } # both A1 and A2 encodings, U = 0 VLDR_f16 A1b cond:4 1101 0 d 01 rn:4 vd:4 1001 imm:8 \ !memory { reg_minus_imm($rn, $imm * 2); } # FP16: v8-only insns # Neon insns with sz=0b01 VCVT_rm_neon_f16 A1 1111 00111 d 11 01 11 vd:4 00 rm:2 op q m 0 vm:4 VRINTX_neon_f16 A1 1111 00111 d 11 01 10 vd:4 01001 q m 0 vm:4 VRINTZ_neon_f16 A1 1111 00111 d 11 01 10 vd:4 01011 q m 0 vm:4 VRINTANPM_neon_f16 A1 1111 00111 d 11 01 10 vd:4 01 op:3 q m 0 vm:4 # Neon insn with sz=1 VMINMAXNM_f16 A1 1111 00110 d op 1 vn:4 vd:4 1111 n q m 1 vm:4 # VFP insns which don't overlap non-fp16 rules (which have 101 in [11:9]) VCVT_rm_f16 A1 1111 11101 d 1111 rm:2 vd:4 1001 op 1 m 0 vm:4 VSEL_f16 A1 1111 11100 d cc:2 vn:4 vd:4 1001 n 0 m 0 vm:4 VMINMAXNM_f16 A2 1111 11101 d 00 vn:4 vd:4 1001 n op m 0 vm:4 VRINTX_f16 A1 cond:4 11101 d 110111 vd:4 1001 0 1 m 0 vm:4 VRINTZR_f16 A1 cond:4 11101 d 110110 vd:4 1001 op 1 m 0 vm:4 VRINTANPM_f16 A1 1111 11101 d 1110 rmode:2 vd:4 1001 0 1 m 0 vm:4 # FP16: Insns which are new for v8.2 FP16: VINS_f16 A1 1111 11101 d 110000 vd:4 101011 m 0 vm:4 VMOVX_f16 A1 1111 11101 d 110000 vd:4 101001 m 0 vm:4 # VMOV between general-purpose register and half-precision VMOV_core_f16 A1 cond:4 1110000 op:1 vn:4 rt:4 1001 n 0010000 @v8_2_fp16_v8_3_compnum # These are only present if both v8.2-FP16 and v8.3-CompNum are implemented # sz=0 for FP16 VCADD_f16 A1 1111110 rot:1 1 d:1 0 0 vn:4 vd:4 1000 n:1 q:1 m:1 0 vm:4 VCMLA_f16 A1 1111110 rot:2 d:1 1 0 vn:4 vd:4 1000 n:1 q:1 m:1 0 vm:4 VCMLA_s_f16 A1 11111110 0 d:1 rot:2 vn:4 vd:4 1000 n:1 q:1 m:1 0 vm:4 # # ARMv8.3 extensions # @v8_3_compnum # We avoid the FP16 parts of this, which are in @v8_2_fp16_v8_3_compnum, # so here s=1 VCADD A1 1111110 rot:1 1 d:1 0 1 vn:4 vd:4 1000 n:1 q:1 m:1 0 vm:4 VCMLA A1 1111110 rot:2 d:1 1 1 vn:4 vd:4 1000 n:1 q:1 m:1 0 vm:4 VCMLA_s A1 11111110 1 d:1 rot:2 vn:4 vd:4 1000 n:1 q:1 m:1 0 vm:4