############################################################################### # Copyright (c) 2010 Linaro Limited # All rights reserved. This program and the accompanying materials # are made available under the terms of the Eclipse Public License v1.0 # which accompanies this distribution, and is available at # http://www.eclipse.org/legal/epl-v10.html # # Contributors: # Peter Maydell (Linaro) - initial implementation ############################################################################### # Input file for risugen defining ARM instructions # Some random patterns #ADD A1 cond:4 0000 100 s rn:4 rd:4 imm:5 type:2 0 rm:4 #RBIT A1 cond:4 0110 1111 1111 rd:4 1111 0011 rm:4 #VADD A2 cond:4 11100 d 11 vn:4 vd:4 101 sz n 0 m 0 vm:4 # Some patterns for testing basic VFP arithmetic, not # because we expect these to be wrong but so we can check # that they work when we fiddle with the FPSCR. VADD A2 cond:4 11100 d 11 vn:4 vd:4 101 sz n 0 m 0 vm:4 VSUB A2 cond:4 11100 d 11 vn:4 vd:4 101 sz n 1 m 0 vm:4 VMUL A2 cond:4 11100 d 10 vn:4 vd:4 101 sz n 0 m 0 vm:4 VDIV A1 cond:4 11101 d 00 vn:4 vd:4 101 sz n 0 m 0 vm:4 ########### VCVT ######################################### # These patterns should cover all the VCVT* instructions # in their ARM encodings. NB that the patterns for half # precision conversions are commented out and untested. ########################################################## # VCVT between fp and int: split in two because opc2 must be 000 or 10x (A8.6.295) VCVT_a A1 cond:4 11101 d 111 000 vd:4 101 sz op 1 m 0 vm:4 VCVT_b A1 cond:4 11101 d 111 10 x vd:4 101 sz op 1 m 0 vm:4 # VCVT between fp and fixed point (A.8.6.297) # Ugh. UNPREDICTABLE unless the 32 bit int formed by imm4:i is at least # 16 (if sx is 0) or 32 (if sx is 1). That is, if sx==0 then either # bit 3 must be 0 or bits 2..0 and 5 must be 0. # sx==1 case first: VCVT_c A1 cond:4 11101 d 111 op 1 u vd:4 101 sf 1 1 i 0 imm:4 # sx==0, bit 3 == 0 VCVT_d A1 cond:4 11101 d 111 op 1 u vd:4 101 sf 0 1 i 0 0 imm:3 # sx==0, bit 3 == 1, bits 2..0 and 5 0 VCVT_e A1 cond:4 11101 d 111 op 1 u vd:4 101 sf 0 1 0 0 1000 # VCVT fp to integer, neon (A8.6.294) # Split to not generate the Q=1 Vd<0> or Vm<0>=1 cases # (they UNDEF but qemu gets this wrong for just about all neon) VCVT_neon_q0 A1 1111 0011 1 d 11 10 11 vd:4 0 11 op:2 0 m 0 vm:4 VCVT_neon_q1 A1 1111 0011 1 d 11 10 11 vd:3 0 0 11 op:2 1 m 0 vm:3 0 # VCVT fp to fixed, neon (A8.6.296) # split to avoid generating undef case for Q=1, Vd<0> or Vm<0>=1 VCVT_neon_b_q0 A1 1111 001 u 1 d 1 imm:5 vd:4 111 op 0 0 m 1 vm:4 VCVT_neon_b_q1 A1 1111 001 u 1 d 1 imm:5 vd:3 0 111 op 0 1 m 1 vm:3 0 # VCVT between double and single (A8.6.298) VCVT_298 A1 cond:4 1110 1 d 11 0111 vd:4 101 sz 1 1 m 0 vm:4 # These three patterns deal with conversions to and from # half-precision (16 bit) floats. A8 doesn't have these; you'll # need an A9 as the master to use these. # we don't generate the sz!=01 UNDEF cases # two patterns to avoid the op==1 Vd<0>==1 and op==0 Vm<0>==1 UNDEF cases VCVT_299_a A1 1111 0011 1 d 11 01 10 vd:4 011 0 0 0 m 0 vm:3 0 VCVT_299_b A1 1111 0011 1 d 11 01 10 vd:3 0 011 1 0 0 m 0 vm:4 # VCVTB, VCVTT (A8.6.300) VCTV_B_TT A1 cond:4 1110 1 d 11 001 op vd:4 101 0 t 1 m 0 vm:4 ########### VQSHL ######################################## # These patterns should cover all the VQSHL* instructions # in their ARM encodings. ########################################################## # VQSHL reg: two patterns to avoid the UNDEF case for # Q==1 and lsbit of vd/vn/vm!=0 VQSHL_reg_a A1 1111 001 u 0 d sz:2 vn:4 vd:4 0100 n 0 m 1 vm:4 VQSHL_reg_b A1 1111 001 u 0 d sz:2 vn:3 0 vd:3 0 0100 n 1 m 1 vm:3 0 # VQSHLU: U==0 is UNDEF so don't generate it # Q=1 case: Vd<0> or Vm<0> == 1 => UNDEF, so avoid # L:imm6 == 0000xxx => some other insn (we use the custom constraint for this) VQSHLU_imm_a A1 1111 001 1 1 d imm:6 vd:3 0 011 0 l 1 m 1 vm:3 0 { ($l == 1) || (($imm & 0xca) != 0); } VQSHLU_imm_b A1 1111 001 1 1 d imm:6 vd:4 011 0 l 0 m 1 vm:4 { ($l == 1) || (($imm & 0xca) != 0); } # VQSHL imm: undefs as for VQSHLU except that U==0 is OK VQSHL_imm_a A1 1111 001 u 1 d imm:6 vd:3 0 011 1 l 1 m 1 vm:3 0 { ($l == 1) || (($imm & 0xca) != 0); } VQSHL_imm_b A1 1111 001 u 1 d imm:6 vd:4 011 1 l 0 m 1 vm:4 { ($l == 1) || (($imm & 0xca) != 0); } # Q=1: UNDEF if lsbit of vn/vd/vm is 1 VRSQRTS_a A1 1111 0010 0 d 1 0 vn:3 0 vd:3 0 1111 n 1 m 1 vm:3 0 VRSQRTS_b A1 1111 0010 0 d 1 0 vn:4 vd:4 1111 n 0 m 1 vm:4 # various 32x32->64 multiplies # we omit the v5-and-below constraint that rn must not be rdhi or rdlo UMAAL A1 cond:4 0000 0100 rdhi:4 rdlo:4 rm:4 1001 rn:4 { $rdhi != $rdlo; } UMLAL A1 cond:4 0000 101 s rdhi:4 rdlo:4 rm:4 1001 rn:4 { $rdhi != $rdlo; } UMULL A1 cond:4 0000 100 s rdhi:4 rdlo:4 rm:4 1001 rn:4 { $rdhi != $rdlo; } SMLAL A1 cond:4 0000 111 s rdhi:4 rdlo:4 rm:4 1001 rn:4 { $rdhi != $rdlo; } SMULL A1 cond:4 0000 110 s rdhi:4 rdlo:4 rm:4 1001 rn:4 { $rdhi != $rdlo; } # 32x32->64 but result is high word only SMMLA A1 cond:4 01110101 rd:4 ra:4 rm:4 00 r 1 rn:4 SMMLS A1 cond:4 01110101 rd:4 ra:4 rm:4 11 r 1 rn:4 # Note that this doesn't overlap with SMMLA because of the implicit # constraints on registers fields (ie not 13 or 15) SMMUL A1 cond:4 01110101 rd:4 1111 rm:4 00 r 1 rn:4 # dual multiplies SMLAD A1 cond:4 0111 0000 rd:4 ra:4 rm:4 00 m 1 rn:4 SMUAD A1 cond:4 0111 0000 rd:4 1111 rm:4 00 m 1 rn:4 SMLSD A1 cond:4 0111 0000 rd:4 ra:4 rm:4 01 m 1 rn:4 SMUSD A1 cond:4 0111 0000 rd:4 1111 rm:4 01 m 1 rn:4 SMLALD A1 cond:4 0111 0100 rdhi:4 rdlo:4 rm:4 00 m 1 rn:4 { $rdhi != $rdlo; } SMLSLD A1 cond:4 0111 0100 rdhi:4 rdlo:4 rm:4 01 m 1 rn:4 { $rdhi != $rdlo; } USAT A1 cond:4 0110111 satimm:5 rd:4 imm:5 sh 0 1 rn:4 SSAT A1 cond:4 0110101 satimm:5 rd:4 imm:5 sh 0 1 rn:4 SSAT16 A1 cond:4 01101010 satimm:4 rd:4 1111 0011 rn:4 USAT16 A1 cond:4 01101110 satimm:4 rd:4 1111 0011 rn:4 # VMLAL, VMLSL, VQDMLAL, VQDMLSL, VMULL, VQDMULL # NB that enc A1 is actually VMLA/VMLS only, A2 is VMLAL/VMLSL only VMLAL A2 1111 001 u 1 d sz:2 vn:4 vd:3 0 10 op 0 n 0 m 0 vm:4 { $sz != 3; } # VQDMLAL and VQDMLSL (not scalar form) VQDLAL A1 1111 0010 1 d sz:2 vn:4 vd:3 0 10 op 1 n 0 m 0 vm:4 { ($sz != 3) && ($sz != 0); } # VMULL (excludes the polynomial case!) VMULL A2 1111 001 u 1 d sz:2 vn:4 vd:3 0 11 0 0 n 0 m 0 vm:4 { ($sz != 3) && ($sz != 0); } # VQDMULL (not scalar form) VQDMULL A1 1111 0010 1 d sz:2 vn:4 vd:3 0 1101 n 0 m 0 vm:4 { ($sz != 3) && ($sz != 0); } # Scalar forms, VMLAL, VMLSL, VQDMLAL, VQDMLSL, VMULL, VQDMULL # VMLAL/VMLSL scalar VMLAL_scalar A2 1111 001 u 1 d sz:2 vn:4 vd:3 0 0 op 1 0 n 1 m 0 vm:4 { ($sz != 3) && ($sz != 0); } # VQDMLAL/VQDMLSL scalar VQDMLAL_scalar A2 1111 0010 1 d sz:2 vn:4 vd:3 0 0 op 11 n 1 m 0 vm:4 { ($sz != 3) && ($sz != 0); } # VMULL scalar VMULL_scalar A2 1111 001 u 1 d sz:2 vn:4 vd:3 0 1010 n 1 m 0 vm:4 { ($sz != 3) && ($sz != 0); } # VQDMULL scalar VQDMULL_scalar A2 1111 0010 1 d sz:2 vn:4 vd:3 0 1011 n 1 m 0 vm:4 { ($sz != 3) && ($sz != 0); } # Polynomial multiply # A1: op == 1 (need size == 0 for not UNDEF); q = 0 case VMULL_poly_a A1 1111 001 1 0 d 00 vn:4 vd:4 1001 n 0 m 1 vm:4 # q = 1 case VMULL_poly_b A1 1111 001 1 0 d 00 vn:3 0 vd:3 0 1001 n 1 m 1 vm:3 0 # A2: op == 1 (need U == 0, size == 0 for not UNDEF) VMULL_poly A2 1111 001 0 1 d 00 vn:4 vd:3 0 11 1 0 n 0 m 0 vm:4 # Neon saturating add/sub # VQADD VQSUB # Q=1 case: VQADD_a A1 1111 001 u 0 d sz:2 vn:3 0 vd:3 0 0000 n 1 m 1 vm:3 0 # Q=0: VQADD_b A1 1111 001 u 0 d sz:2 vn:4 vd:4 0000 n 0 m 1 vm:4 # VQSUB VQSUB_a A1 1111 001 u 0 d sz:2 vn:3 0 vd:3 0 0010 n 1 m 1 vm:3 0 VQSUB_b A1 1111 001 u 0 d sz:2 vn:4 vd:4 0010 n 0 m 1 vm:4 # VQ(R)DMULH: vector saturating (rounding) doubling multiply returning high half # Q=1 case: VQDMULH_a A1 1111 0010 0 d sz:2 vn:3 0 vd:3 0 1011 n 1 m 0 vm:3 0 { ($sz != 3) && ($sz != 0); } # Q=0: VQDMULH_b A1 1111 0010 0 d sz:2 vn:4 vd:4 1011 n 0 m 0 vm:4 { ($sz != 3) && ($sz != 0); } # scalar form Q=1 VQDMULH_a A2 1111 001 1 1 d sz:2 vn:3 0 vd:3 0 1100 n 1 m 0 vm:4 { ($sz != 3) && ($sz != 0); } # scalar, Q=0 VQDMULH_b A2 1111 001 0 1 d sz:2 vn:4 vd:4 1100 n 1 m 0 vm:4 { ($sz != 3) && ($sz != 0); } VQRDMULH_a A1 1111 0011 0 d sz:2 vn:3 0 vd:3 0 1011 n 1 m 0 vm:3 0 { ($sz != 3) && ($sz != 0); } # Q=0: VQRDMULH_b A1 1111 0011 0 d sz:2 vn:4 vd:4 1011 n 0 m 0 vm:4 { ($sz != 3) && ($sz != 0); } # scalar form Q=1 VQRDMULH_a A2 1111 001 1 1 d sz:2 vn:3 0 vd:3 0 1100 n 1 m 0 vm:4 { ($sz != 3) && ($sz != 0); } # scalar, Q=0 VQRDMULH_b A2 1111 001 0 1 d sz:2 vn:4 vd:4 1101 n 1 m 0 vm:4 { ($sz != 3) && ($sz != 0); } # various preload and hint instructions # see table A5-24 for this unallocated hint insn block (must NOP on v7MP) UNALLOC_HINT A1 11110 100 x 001 anything:20 UNALLOC_HINT_b A1 11110 110 x 001 anything:15 0 any:4 PLI_imm A1 1111 0100 u 101 rn:4 1111 imm:12 PLI_reg A1 1111 0110 u 101 rn:4 1111 imm:5 type:2 0 rm:4 PLD_imm A1 1111 0101 u 101 rn:4 1111 imm:12 PLD_reg A1 1111 0111 u 101 rn:4 1111 imm:5 type:2 0 rm:4 PLDW_imm A1 1111 0101 u 001 rn:4 1111 imm:12 PLDW_reg A1 1111 0111 u 001 rn:4 1111 imm:5 type:2 0 rm:4 # no overlap with PLD_imm because rn can't be 15 PLD_lit A1 1111 0101 u 101 1111 1111 imm:12 # VSRA # L:imm6 == 0000xxx is some other encoding # Q=0 VSRA_a A1 1111 001 u 1 d imm:6 vd:4 0001 l 0 m 1 vm:4 { ($l == 1) || (($imm & 0xca) != 0); } # Q=1 VSRA_b A1 1111 001 u 1 d imm:6 vd:3 0 0001 l 1 m 1 vm:3 0 { ($l == 1) || (($imm & 0xca) != 0); } # VSLI # Q=0 VSLI_a A1 1111 0011 1 d imm:6 vd:4 0101 l 0 m 1 vm:4 { ($l == 1) || (($imm & 0xca) != 0); } # Q=1 VSLI_b A1 1111 0011 1 d imm:6 vd:3 0 0101 l 1 m 1 vm:3 0 { ($l == 1) || (($imm & 0xca) != 0); } # VSRI # Q=0 VSRI_a A1 1111 0011 1 d imm:6 vd:4 0100 l 0 m 1 vm:4 { ($l == 1) || (($imm & 0xca) != 0); } # Q=1 VSRI_b A1 1111 0011 1 d imm:6 vd:3 0 0100 l 1 m 1 vm:3 0 { ($l == 1) || (($imm & 0xca) != 0); } # Various shifts # Q=0 VQRSHL_a A1 1111 001 u 0 d sz:2 vn:4 vd:4 0101 n 0 m 1 vm:4 # Q=1 VQRSHL_b A1 1111 001 u 0 d sz:2 vn:3 0 vd:3 0 0101 n 1 m 1 vm:3 0 # VQRSHRN, VQRSHRUN, VRSHRN (VRSHRN is the U=0 op=0 case) VQRSHRN A1 1111 001 u 1 d imm:6 vd:4 100 op 0 1 m 1 vm:3 0 { (($imm & 0xc8) != 0); } # Q=0 VQSHL_a A1 1111 001 u 0 d sz:2 vn:4 vd:4 0100 n 0 m 1 vm:4 # Q=1 VQSHL_b A1 1111 001 u 0 d sz:2 vn:3 0 vd:3 0 0100 n 1 m 1 vm:3 0 # VQSHRN, VQSHRUN, VSHRN (VSHRN is the U=0 op=0 case) VQSHRN A1 1111 001 u 1 d imm:6 vd:4 100 op 0 0 m 1 vm:3 0 { (($imm & 0xc8) != 0); } # q=0 VRSHL_a A1 1111 001 u 0 d sz:2 vn:4 vd:4 0101 n 0 m 0 vm:4 # q=1 VRSHL_b A1 1111 001 u 0 d sz:2 vn:3 0 vd:3 0 0101 n 1 m 0 vm:3 0 # VRSHR; q=0 VRSHR_a A1 1111 001 u 1 d imm:6 vd:4 0010 l 0 m 1 vm:4 { ($l == 1) || (($imm & 0xca) != 0); } # q=1 VRSHR_b A1 1111 001 u 1 d imm:6 vd:3 0 0010 l 1 m 1 vm:3 0 { ($l == 1) || (($imm & 0xca) != 0); } # VRSRA; q=0 VRSRA_a A1 1111 001 u 1 d imm:6 vd:4 0011 l 0 m 1 vm:4 { ($l == 1) || (($imm & 0xca) != 0); } # q=1 VRSRA_b A1 1111 001 u 1 d imm:6 vd:3 0 0011 l 1 m 1 vm:3 0 { ($l == 1) || (($imm & 0xca) != 0); } # VSHL (imm); q = 0 VSHL_a A1 1111 0010 1 d imm:6 vd:4 0101 l 0 m 1 vm:4 { ($l == 1) || (($imm & 0xca) != 0); } # q=1 VSHL_b A1 1111 0010 1 d imm:6 vd:3 0 0101 l 1 m 1 vm:3 0 { ($l == 1) || (($imm & 0xca) != 0); } # VSHL (reg): q=0 VSHL_c A1 1111 001 u 0 d sz:2 vn:4 vd:4 0100 n 0 m 0 vm:4 # q=1 VSHL_d A1 1111 001 u 0 d sz:2 vn:3 0 vd:3 0 0100 n 1 m 0 vm:3 0 # This includes VMOVL (when shift is 0) VSHLL A1 1111 001 u 1 d imm:6 vd:3 0 1010 0 0 m 1 vm:4 { (($imm & 0xc8) != 0); } VSHLL A2 1111 0011 1 d 11 sz:2 10 vd:3 0 0011 00 m 0 vm:4 { ($sz != 3); } # VSHR (q=0) VSHR_a A1 1111 001 u 1 d imm:6 vd:4 0000 l 0 m 1 vm:4 { ($l == 1) || (($imm & 0xca) != 0); } # q=1 VSHR_b A1 1111 001 u 1 d imm:6 vd:3 0 0000 l 1 m 1 vm:3 0 { ($l == 1) || (($imm & 0xca) != 0); } # VQMOVN, VQMOVUN # includes VMOVN when op=00 VQMOVN A1 1111 0011 1 d 11 sz:2 10 vd:4 0010 op:2 m 0 vm:3 0 { ($sz != 3); } # VUZP : Q=0 case (sz 11 or 10 undefs; d == m is UNKNOWN results) VUZP_a A1 1111 0011 1 d 11 0 sz 10 vd:4 0001 0 0 m 0 vm:4 { ($d != $m) || ($vd != $vm); } # Q=1 case (sz 11, vd<0> or vm<0> 1 undefs; d == m is UNKNOWN results) VUZP_b A1 1111 0011 1 d 11 sz:2 10 vd:3 0 0001 0 1 m 0 vm:3 0 { ($sz != 3) && (($d != $m) || ($vd != $vm)); } # VZIP : Q=0 case (sz 11 or 10 undefs; d == m is UNKNOWN results) VZIP_a A1 1111 0011 1 d 11 0 sz 10 vd:4 0001 1 0 m 0 vm:4 { ($d != $m) || ($vd != $vm); } # Q=1 case (sz 11, vd<0> or vm<0> 1 undefs; d == m is UNKNOWN results) VZIP_b A1 1111 0011 1 d 11 sz:2 10 vd:3 0 0001 1 1 m 0 vm:3 0 { ($sz != 3) && (($d != $m) || ($vd != $vm)); } # Q=0 VRECPE_a A1 1111 0011 1 d 11 10 11 vd:4 010 f 0 0 m 0 vm:4 # Q=1 VRECPE_b A1 1111 0011 1 d 11 10 11 vd:3 0 010 f 0 1 m 0 vm:3 0 # Q=0 VRSQRTE_a A1 1111 0011 1 d 11 10 11 vd:4 010 f 1 0 m 0 vm:4 # Q=1 VRSQRTE_b A1 1111 0011 1 d 11 10 11 vd:3 0 010 f 1 1 m 0 vm:3 0 # Unsigned saturating add/subtract # UQADD16, UQSUB16, UQADD8, UQSUB8 UQADD16 A1 cond:4 01100110 rn:4 rd:4 1111 0001 rm:4 UQADD8 A1 cond:4 01100110 rn:4 rd:4 1111 1001 rm:4 UQSUB16 A1 cond:4 01100110 rn:4 rd:4 1111 0111 rm:4 UQSUB8 A1 cond:4 01100110 rn:4 rd:4 1111 1111 rm:4 # Signed ditto QADD16 A1 cond:4 01100010 rn:4 rd:4 1111 0001 rm:4 QADD8 A1 cond:4 01100010 rn:4 rd:4 1111 1001 rm:4 QSUB16 A1 cond:4 01100010 rn:4 rd:4 1111 0111 rm:4 QSUB8 A1 cond:4 01100010 rn:4 rd:4 1111 1111 rm:4 # Signed parallel add/subtract SADD8 A1 cond:4 0110 0001 rn:4 rd:4 1111 1001 rm:4 SADD16 A1 cond:4 0110 0001 rn:4 rd:4 1111 0001 rm:4 SSUB8 A1 cond:4 0110 0001 rn:4 rd:4 1111 1111 rm:4 SSUB16 A1 cond:4 0110 0001 rn:4 rd:4 1111 0111 rm:4 # unsigned ditto UADD8 A1 cond:4 0110 0101 rn:4 rd:4 1111 1001 rm:4 UADD16 A1 cond:4 0110 0101 rn:4 rd:4 1111 0001 rm:4 USUB8 A1 cond:4 0110 0101 rn:4 rd:4 1111 1111 rm:4 USUB16 A1 cond:4 0110 0101 rn:4 rd:4 1111 0111 rm:4 SASX A1 cond:4 0110 0001 rn:4 rd:4 1111 0011 rm:4 SSAX A1 cond:4 0110 0001 rn:4 rd:4 1111 0101 rm:4 # vector duplicate (scalar) # Q=1 case VDUP_scalar A1a 1111 0011 1 d 11 imm:4 vd:3 0 110 00 1 m 0 vm:4 { ($imm & 7) != 0; } # Q=0 case VDUP_scalar A1b 1111 0011 1 d 11 imm:4 vd:4 110 00 0 m 0 vm:4 { ($imm & 7) != 0; } # vector duplicate (reg) # b:e == 11 UNDEF VDUP A1a cond:4 1110 1 b 1 0 vd:3 0 rt:4 1011 d 0 e 1 0000 { ($b == 0) || ($e == 0); } VDUP A1b cond:4 1110 1 b 0 0 vd:4 rt:4 1011 d 0 e 1 0000 { ($b == 0) || ($e == 0); } ########### Neon float ops ############################### # These patterns cover the Neon instructions which handle # floating-point data (but not the versions of the insns # which do integer data, or the VFP versions). ########################################################## # Neon float ops: # VMAX, VMIN Q=0 VMAXMIN_fp A1a 1111 0010 0 d op 0 vn:4 vd:4 1111 n 0 m 0 vm:4 # Q=1 VMAXMIN_fp A1b 1111 0010 0 d op 0 vn:3 0 vd:3 0 1111 n 1 m 0 vm:3 0 # VABD Q=0 VABD_fp A1a 1111 0011 0 d 1 0 vn:4 vd:4 1101 n 0 m 0 vm:4 # Q=1 VABD_fp A1b 1111 0011 0 d 1 0 vn:3 0 vd:3 0 1101 n 1 m 0 vm:3 0 # VADD Q=0, Q=1 VADD A1a 1111 0010 0 d 0 0 vn:4 vd:4 1101 n 0 m 0 vm:4 VADD A1b 1111 0010 0 d 0 0 vn:3 0 vd:3 0 1101 n 1 m 0 vm:3 0 # VSUB VSUB A1a 1111 0010 0 d 1 0 vn:4 vd:4 1101 n 0 m 0 vm:4 VSUB A1b 1111 0010 0 d 1 0 vn:3 0 vd:3 0 1101 n 1 m 0 vm:3 0 # VMUL VMUL A1a 1111 0011 0 d 0 0 vn:4 vd:4 1101 n 0 m 1 vm:4 VMUL A1b 1111 0011 0 d 0 0 vn:3 0 vd:3 0 1101 n 1 m 1 vm:3 0 # VCEQ, VCGE, VCGT VCEQ A2a 1111 0010 0 d 0 0 vn:4 vd:4 1110 n 0 m 0 vm:4 VCEQ A2b 1111 0010 0 d 0 0 vn:3 0 vd:3 0 1110 n 1 m 0 vm:3 0 VCGE A2a 1111 0011 0 d 0 0 vn:4 vd:4 1110 n 0 m 0 vm:4 VCGE A2b 1111 0011 0 d 0 0 vn:3 0 vd:3 0 1110 n 1 m 0 vm:3 0 VCGT A2a 1111 0011 0 d 1 0 vn:4 vd:4 1110 n 0 m 0 vm:4 VCGT A2b 1111 0011 0 d 1 0 vn:3 0 vd:3 0 1110 n 1 m 0 vm:3 0 # VCEQ, VCGE, VCGT, VCLT, VCLE with imm 0 -- F=1 forms only! VCEQ0 A1a 1111 0011 1 d 11 10 0 1 vd:4 0 1 010 0 m 0 vm:4 VCEQ0 A1b 1111 0011 1 d 11 10 0 1 vd:3 0 0 1 010 1 m 0 vm:3 0 VCGE0 A1a 1111 0011 1 d 11 10 0 1 vd:4 0 1 001 0 m 0 vm:4 VCGE0 A1b 1111 0011 1 d 11 10 0 1 vd:3 0 0 1 001 1 m 0 vm:3 0 VCGT0 A1a 1111 0011 1 d 11 10 0 1 vd:4 0 1 000 0 m 0 vm:4 VCGT0 A1b 1111 0011 1 d 11 10 0 1 vd:3 0 0 1 000 1 m 0 vm:3 0 VCLE0 A1a 1111 0011 1 d 11 10 0 1 vd:4 0 1 011 0 m 0 vm:4 VCLE0 A1b 1111 0011 1 d 11 10 0 1 vd:3 0 0 1 011 1 m 0 vm:3 0 VCLT0 A1a 1111 0011 1 d 11 10 0 1 vd:4 0 1 100 0 m 0 vm:4 VCLT0 A1b 1111 0011 1 d 11 10 0 1 vd:3 0 0 1 100 1 m 0 vm:3 0 # VACGE, VACGT VACG A1a 1111 0011 0 d op 0 vn:4 vd:4 1110 n 0 m 1 vm:4 VACG A1b 1111 0011 0 d op 0 vn:3 0 vd:3 0 1110 n 1 m 1 vm:3 0 # VRECPS: Q=0, Q=1 cases VRECPS A1a 1111 0010 0 d 0 0 vn:4 vd:4 1111 n 0 m 1 vm:4 VRECPS A1b 1111 0010 0 d 0 0 vn:3 0 vd:3 0 1111 n 1 m 1 vm:3 0 ########### Neon loads and stores ######################### # This set of patterns isn't complete yet... ########################################################## # VLD*, single element to all lanes # All addressing modes (reg, reg postindex reg, reg postindex eltsz) # Note use of 'xm' for 'rm' to avoid the implicit "not 13 or 15" # constraint -- 13 and 15 encode the other two addr modes so are OK here. # The constraints are avoiding: d+regs > 32 (UNPREDICTABLE); # also the UNDEF sz/a combinations; and the risugen restriction # that the two regs in reg postindex reg must be different. # Max alignment requirement for VLD1 is 4 bytes. VLD1_stoa A1a 1111 0100 1 d 10 rn:4 vd:4 11 00 sz:2 t a xm:4 \ !constraints { ($d == 0 || $t == 0 || $vd != 0xf) && $sz != 3 && ($sz != 0 || $a != 1) && ($rn != $xm); } \ !memory { reg($rn); } # As usual we need to separate out the UNDEF cases as they # must not have !memory blocks # sz 11: UNDEF VLD1_stoa A1b 1111 0100 1 d 10 rn:4 vd:4 11 00 11 t a rm:4 # sz 00, a 1 : UNDEF VLD1_stoa A1c 1111 0100 1 d 10 rn:4 vd:4 11 00 00 t 1 rm:4 # VLD2: d+t+1 > 31 is unpredictable VLD2_stoa A1a 1111 0100 1 d 10 rn:4 vd:4 11 01 sz:2 t a xm:4 \ !constraints { (((($d << 4)|$vd) + $t + 1) < 32) && $sz != 3 && ($rn != $xm); } \ !memory { align(8); reg($rn); } # UNDEF case : sz 11 VLD2_stoa A1b 1111 0100 1 d 10 rn:4 vd:4 11 01 11 t a xm:4 # VLD3: d+(t+1)*2 > 31 is unpredictable VLD3_stoa A1a 1111 0100 1 d 10 rn:4 vd:4 11 01 sz:2 t 0 xm:4 \ !constraints { (((($d << 4)|$vd) + ($t + 1)*2) < 32) && $sz != 3 && ($rn != $xm); } \ !memory { reg($rn); } # UNDEF case : sz 11 or a 1 VLD3_stoa A1b 1111 0100 1 d 10 rn:4 vd:4 11 10 11 t a xm:4 VLD3_stoa A1c 1111 0100 1 d 10 rn:4 vd:4 11 10 sz:2 t 1 xm:4 # VLD4: d+(t+1)*3 > 31 is unpredictable VLD4_stoa A1a 1111 0100 1 d 10 rn:4 vd:4 11 11 sz:2 t a xm:4 \ !constraints { (((($d << 4)|$vd) + ($t + 1)*3) < 32) && ($sz != 3 || $a != 0) && ($rn != $xm); } \ !memory { align(16); reg($rn); } # UNDEF case : sz 11 and a 0 VLD4_stoa A1b 1111 0100 1 d 10 rn:4 vd:4 11 11 11 t 0 xm:4 # VLD*, single element to one lane. We split the sz cases out # for convenience of filtering the UNDEF cases and the VLD1-to-all-lanes # sz == 00 VLD1_s A1a 1111 0100 1 d 10 rn:4 vd:4 00 00 idx:3 0 xm:4 \ !constraints { ($rn != $xm); } \ !memory { reg($rn); } # sz == 01 VLD1_s A1b 1111 0100 1 d 10 rn:4 vd:4 01 00 idx:2 0 idx0 xm:4 \ !constraints { ($rn != $xm); } \ !memory { reg($rn); } # sz == 10 VLD1_s A1c 1111 0100 1 d 10 rn:4 vd:4 10 00 idx3 0 idx:2 xm:4 \ !constraints { ($rn != $xm) && ($idx == 0 || $idx == 3); } \ !memory { reg($rn); } # UNDEF cases: bad index fields for each size VLD1_s A1d 1111 0100 1 d 10 rn:4 vd:4 00 00 idx:3 1 xm:4 VLD1_s A1e 1111 0100 1 d 10 rn:4 vd:4 01 00 idx:2 1 idx0 xm:4 VLD1_s A1f 1111 0100 1 d 10 rn:4 vd:4 10 00 idx:4 xm:4 \ !constraints { ($idx & 4) == 1 || ($idx & 3) == 1 || ($idx & 3) == 2; } # VLD2 has an UNPREDICTABLE case for d+inc > 31. # sz == 00, 01 (no UNDEF cases) VLD2_s A1a 1111 0100 1 d 10 rn:4 vd:4 0 sz 01 idx:4 xm:4 \ !constraints { ($rn != $xm) && ((($d << 4)|$vd) + 1 + (($idx >> $sz) & 1)) < 32; } \ !memory { reg($rn); } # sz == 10 VLD2_s A1b 1111 0100 1 d 10 rn:4 vd:4 10 01 idx:2 0 idx0 xm:4 \ !constraints { ($rn != $xm) && ((($d << 4)|$vd) + 1 + ($idx & 1)) < 32; } \ !memory { align(8); reg($rn); } # only UNDEF case is sz=10, idx<1>=1 VLD2_s A1c 1111 0100 1 d 10 rn:4 vd:4 10 01 idx:2 1 idx0 xm:4 # UNPREDICTABLE here is for d+inc+inc > 31 # sz == 00, 01 VLD3_s A1a 1111 0100 1 d 10 rn:4 vd:4 0 sz 10 idx:3 0 xm:4 \ !constraints { ($rn != $xm) && ((($d << 4)|$vd) + 2 * (1 + ((($idx << 1) >> $sz) & 1))) < 32; } \ !memory { reg($rn); } # sz == 10 VLD3_s A1b 1111 0100 1 d 10 rn:4 vd:4 10 10 idx:2 00 xm:4 \ !constraints { ($rn != $xm) && ((($d << 4)|$vd) + 2 * (1 + ($idx & 1))) < 32; } \ !memory { reg($rn); } # UNDEF: sz == 00, 01, idx<0> != 0 VLD3_s A1c 1111 0100 1 d 10 rn:4 vd:4 0 sz 10 idx:3 1 xm:4 # UNDEF: sz == 10, idx<1:0> != 00 VLD3_s A1d 1111 0100 1 d 10 rn:4 vd:4 10 10 idx:4 xm:4 \ !constraints { ($idx & 3) != 0; } # VLD4 has an UNPREDICTABLE case for d+3*inc > 31. # sz == 00, 01 (no UNDEF cases) VLD4_s A1a 1111 0100 1 d 10 rn:4 vd:4 0 sz 11 idx:4 xm:4 \ !constraints { ($rn != $xm) && ((($d << 4)|$vd) + 3 * (1 + (($idx & (1 << $sz)) >> 1))) < 32; } \ !memory { align(8); reg($rn); } # sz == 10 VLD4_s A1b 1111 0100 1 d 10 rn:4 vd:4 10 01 idx:2 0 idx0 xm:4 \ !constraints { ($rn != $xm) && ((($d << 4)|$vd) + 3 * (1 + ($idx & 1))) < 32; } \ !memory { align(16); reg($rn); } # only UNDEF case is sz=10, idx<1:0>==11 VLD4_s A1c 1111 0100 1 d 10 rn:4 vd:4 10 01 idx:2 11 xm:4 # VST* single element from one lane # These are actually identical to the VLD* patterns except that # bit 21 is clear to indicate store rather than load. # sz == 00 VST1_s A1a 1111 0100 1 d 00 rn:4 vd:4 00 00 idx:3 0 xm:4 \ !constraints { ($rn != $xm); } \ !memory { reg($rn); } # sz == 01 VST1_s A1b 1111 0100 1 d 00 rn:4 vd:4 01 00 idx:2 0 idx0 xm:4 \ !constraints { ($rn != $xm); } \ !memory { reg($rn); } # sz == 10 VST1_s A1c 1111 0100 1 d 00 rn:4 vd:4 10 00 idx3 0 idx:2 xm:4 \ !constraints { ($rn != $xm) && ($idx == 0 || $idx == 3); } \ !memory { reg($rn); } # UNDEF cases: bad index fields for each size VST1_s A1d 1111 0100 1 d 00 rn:4 vd:4 00 00 idx:3 1 xm:4 VST1_s A1e 1111 0100 1 d 00 rn:4 vd:4 01 00 idx:2 1 idx0 xm:4 VST1_s A1f 1111 0100 1 d 00 rn:4 vd:4 10 00 idx:4 xm:4 \ !constraints { ($idx & 4) == 1 || ($idx & 3) == 1 || ($idx & 3) == 2; } # sz == 00, 01 (no UNDEF cases) VST2_s A1a 1111 0100 1 d 00 rn:4 vd:4 0 sz 01 idx:4 xm:4 \ !constraints { ($rn != $xm) && ((($d << 4)|$vd) + 1 + (($idx >> $sz) & 1)) < 32; } \ !memory { reg($rn); } # sz == 10 VST2_s A1b 1111 0100 1 d 00 rn:4 vd:4 10 01 idx:2 0 idx0 xm:4 \ !constraints { ($rn != $xm) && ((($d << 4)|$vd) + 1 + ($idx & 1)) < 32; } \ !memory { align(8); reg($rn); } # only UNDEF case is sz=10, idx<1>=1 VST2_s A1c 1111 0100 1 d 00 rn:4 vd:4 10 01 idx:2 1 idx0 xm:4 # UNPREDICTABLE here is for d+inc+inc > 31 # sz == 00, 01 VST3_s A1a 1111 0100 1 d 00 rn:4 vd:4 0 sz 10 idx:3 0 xm:4 \ !constraints { ($rn != $xm) && ((($d << 4)|$vd) + 2 * (1 + ((($idx << 1) >> $sz) & 1))) < 32; } \ !memory { reg($rn); } # sz == 10 VST3_s A1b 1111 0100 1 d 00 rn:4 vd:4 10 10 idx:2 00 xm:4 \ !constraints { ($rn != $xm) && ((($d << 4)|$vd) + 2 * (1 + ($idx & 1))) < 32; } \ !memory { reg($rn); } # UNDEF: sz == 00, 01, idx<0> != 0 VST3_s A1c 1111 0100 1 d 00 rn:4 vd:4 0 sz 10 idx:3 1 xm:4 # UNDEF: sz == 10, idx<1:0> != 00 VST3_s A1d 1111 0100 1 d 00 rn:4 vd:4 10 10 idx:4 xm:4 \ !constraints { ($idx & 3) != 0; } # VST4 has an UNPREDICTABLE case for d+3*inc > 31. # sz == 00, 01 (no UNDEF cases) VST4_s A1a 1111 0100 1 d 00 rn:4 vd:4 0 sz 11 idx:4 xm:4 \ !constraints { ($rn != $xm) && ((($d << 4)|$vd) + 3 * (1 + (($idx & (1 << $sz)) >> 1))) < 32; } \ !memory { align(8); reg($rn); } # sz == 10 VST4_s A1b 1111 0100 1 d 00 rn:4 vd:4 10 01 idx:2 0 idx0 xm:4 \ !constraints { ($rn != $xm) && ((($d << 4)|$vd) + 3 * (1 + ($idx & 1))) < 32; } \ !memory { align(16); reg($rn); } # only UNDEF case is sz=10, idx<1:0>==11 VST4_s A1c 1111 0100 1 d 00 rn:4 vd:4 10 01 idx:2 11 xm:4