aboutsummaryrefslogtreecommitdiff
path: root/risugen
blob: 2b9ee6888a9ff3c24fd6aba04569d3e9d5ada018 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
#!/usr/bin/perl -w
###############################################################################
# Copyright (c) 2010 Linaro Limited
# All rights reserved. This program and the accompanying materials
# are made available under the terms of the Eclipse Public License v1.0
# which accompanies this distribution, and is available at
# http://www.eclipse.org/legal/epl-v10.html
#
# Contributors:
#     Peter Maydell (Linaro) - initial implementation
###############################################################################

# risugen -- generate a test binary file for use with risu
# See 'risugen --help' for usage information.

use strict;
use Getopt::Long;
use Data::Dumper;
use Text::Balanced qw { extract_bracketed extract_multiple };

my @insns;
my %insn_details;

my $is_thumb = 0;

my @pattern_re = ();

my $bytecount;

# An instruction pattern as parsed from the config file turns into
# a record like this:
#   name          # name of the pattern
#   width         # 16 or 32
#   fixedbits     # values of the fixed bits
#   fixedbitmask  # 1s indicate locations of the fixed bits
#   blocks        # hash of blockname->contents (for constraints etc)
#   fields        # array of arrays, each element is [ varname, bitpos, bitmask ]
#
# We store these in the insn_details hash.

# Valid block names (keys in blocks hash)
my %valid_blockname = ( constraints => 1, memory => 1 );

sub open_bin
{
    my ($fname) = @_;
    open(BIN, ">", $fname) or die "can't open %fname: $!";
    $bytecount = 0;
}

sub close_bin
{
    close(BIN) or die "can't close output file: $!";
}

sub insn32($)
{
    my ($insn) = @_;
    print BIN pack("V", $insn);
    $bytecount += 4;
}

sub insn16($)
{
    my ($insn) = @_;
    print BIN pack("v", $insn);
    $bytecount += 2;
}

sub align4()
{
    if ($bytecount & 3) {
        insn16(0xbf00);  # NOP
    }
}

# The space 0xE7F___F_ is guaranteed to always UNDEF
# and not to be allocated for insns in future architecture
# revisions. So we use it for our 'do comparison' and
# 'end of test' instructions.
# We fill in the middle bit with a randomly selected
# 'e5a' just in case the space is being used by somebody
# else too.

# For Thumb the equivalent space is 0xDExx
# and we use 0xDEEx.

# So the last nibble indicates the desired operation:
my $OP_COMPARE = 0;        # compare registers
my $OP_TESTEND = 1;        # end of test, stop
my $OP_SETMEMBLOCK = 2;    # r0 is address of memory block (8192 bytes)
my $OP_GETMEMBLOCK = 3;    # add the address of memory block to r0
my $OP_COMPAREMEM = 4;     # compare memory block

sub write_thumb_risuop($)
{
    my ($op) = @_;
    insn16(0xdee0 | $op);
}

sub write_arm_risuop($)
{
    my ($op) = @_;
    insn32(0xe7fe5af0 | $op);
}

sub write_risuop($)
{
    my ($op) = @_;
    if ($is_thumb) {
        write_thumb_risuop($op);
    } else {
        write_arm_risuop($op);
    }
}

sub write_switch_to_thumb()
{
    # Note that we have to clean up r0 afterwards
    # so it isn't tainted with a value which depends
    # on PC (and which might differ between hw and
    # qemu/valgrind/etc)
    insn32(0xe28f0001); # add r0, pc, #1
    insn32(0xe12fff10); # bx r0
    insn16(0x4040);     # eor r0,r0 (enc T1)
}

sub write_switch_to_arm()
{
    # Switch to ARM mode.
    align4();
    insn16(0x4778);  # bx pc
    insn16(0xbf00);  # nop
}

sub write_sub_rrr($$$)
{
    my ($rd, $rn, $rm) = @_;
    if ($is_thumb) {
        # enc T2
        insn16(0xeba0 | $rn);
        insn16(0x0000 | ($rd << 8) | $rm);
    } else {
        # enc A1
        insn32(0xe0400000 | ($rn << 16) | ($rd << 12) | $rm);
    }
}

# valid shift types
my $SHIFT_LSL = 0;
my $SHIFT_LSR = 1;
my $SHIFT_ASR = 2;
my $SHIFT_ROR = 3;

sub write_sub_rrrs($$$$$)
{
    # sub rd, rn, rm, shifted
    my ($rd, $rn, $rm, $type, $imm) = @_;
    $type = $SHIFT_LSL if $imm == 0;
    if ($imm == 32 && ($type == $SHIFT_LSR || $type == $SHIFT_ASR)) {
        $imm = 0;
    }
    die "write_sub_rrrs: bad shift immediate $imm\n" if $imm < 0 || $imm > 31;
    if ($is_thumb) {
        # enc T2
        my ($imm3, $imm2) = ($imm >> 2, $imm & 3);
        insn16(0xeba0 | $rn);
        insn16(($imm3 << 12) | ($rd << 8) | ($imm2 << 6) | ($type << 4) | $rm);
    } else {
        # enc A1
        insn32(0xe0400000 | ($rn << 16) | ($rd << 12) | ($imm << 7) | ($type << 5) | $rm);
    }
}

sub write_mov_rr($$)
{
    my ($rd, $rm) = @_;
    if ($is_thumb) {
        # enc T3
        insn16(0xea4f);
        insn16(($rd << 8) | $rm);
    } else {
        # enc A1
        insn32(0xe1a00000 | ($rd << 12) | $rm);
    }
}

sub write_mov_ri16($$$)
{
    # Write 16 bits of immediate to register, using either MOVW or MOVT
    my ($rd, $imm, $is_movt) = @_;
    die "write_mov_ri16: immediate $imm out of range\n" if (($imm & 0xffff0000) != 0);
    if ($is_thumb) {
        # enc T3
        my ($imm4, $i, $imm3, $imm8) = (($imm & 0xf000) >> 12,
                                        ($imm & 0x0800) >> 11,
                                        ($imm & 0x0700) >> 8,
                                        ($imm & 0x00ff));
        insn16(0xf240 | ($is_movt << 7) | ($i << 10) | $imm4);
        insn16(($imm3 << 12) | ($rd << 8) | $imm8);
    } else {
        # enc A2
        my ($imm4, $imm12) = (($imm & 0xf000) >> 12,
                              ($imm & 0x0fff));
        insn32(0xe3000000 | ($is_movt << 22) | ($imm4 << 16) | ($rd << 12) | $imm12);
    }
}

sub write_mov_ri($$)
{
    # We always use a MOVW/MOVT pair, for simplicity
    my ($rd, $imm) = @_;
    write_mov_ri16($rd, ($imm & 0xffff), 0);
    my $highhalf = ($imm >> 16) & 0xffff;
    write_mov_ri16($rd, $highhalf, 1) if $highhalf;
}

sub write_random_double_fpreg()
{
    my ($low, $high);
    my $r = rand(100);
    if ($r < 5) {
        # +-0 (5%)
        $low = $high = 0;
        $high |= 0x80000000 if (rand() < 0.5);
    } elsif ($r < 10) {
        # NaN (5%)
        # (plus a tiny chance of generating +-Inf)
        $low = rand(0xffffffff);
        $high = rand(0xffffffff) | 0x7ff00000;
    } elsif ($r < 15) {
        # Infinity (5%)
        $low = 0;
        $high = 0x7ff00000;
        $high |= 0x80000000 if (rand() < 0.5);
    } elsif ($r < 30) {
        # Denormalized number (15%)
        # (plus tiny chance of +-0)
        $low = rand(0xffffffff);
        $high = rand(0xffffffff) & ~0x7ff00000;
    } else {
        # Normalized number (70%)
        # (plus a small chance of the other cases)
        $low = rand(0xffffffff);
        $high = rand(0xffffffff);
    }
    insn32($low);
    insn32($high);
}

sub write_random_single_fpreg()
{
    my ($value);
    my $r = rand(100);
    if ($r < 5) {
        # +-0 (5%)
        $value = 0;
        $value |= 0x80000000 if (rand() < 0.5);
    } elsif ($r < 10) {
        # NaN (5%)
        # (plus a tiny chance of generating +-Inf)
        $value = rand(0xffffffff) | 0x7f800000;
    } elsif ($r < 15) {
        # Infinity (5%)
        $value = 0x7f800000;
        $value |= 0x80000000 if (rand() < 0.5);
    } elsif ($r < 30) {
        # Denormalized number (15%)
        # (plus tiny chance of +-0)
        $value = rand(0xffffffff) & ~0x7f800000;
    } else {
        # Normalized number (70%)
        # (plus a small chance of the other cases)
        $value = rand(0xffffffff);
    }
    insn32($value);
}

sub write_random_fpreg()
{
    # Write out 64 bits of random data intended to
    # initialise an FP register.
    # We tweak the "randomness" here to increase the
    # chances of picking interesting values like
    # NaN, -0.0, and so on, which would be unlikely
    # to occur if we simply picked 64 random bits.
    if (rand() < 0.5) {
        write_random_double_fpreg();
    } else {
        write_random_single_fpreg();
        write_random_single_fpreg();
    }
}

sub write_random_register_data()
{
    # TODO hardcoded, also no d16-d31 initialisation
    my $vfp = 2;  # 0 : no vfp, 1 : vfpd16, 2 : vfpd32
    if ($is_thumb) {
        write_switch_to_arm();
    }
    
    # initialise all registers
    if ($vfp == 1) {
        insn32(0xe28f0008);    # add r0, pc, #8
        insn32(0xecb00b20);    # vldmia r0!, {d0-d15}
    } elsif ($vfp == 2) {
        insn32(0xe28f000c);    # add r0, pc, #12
        insn32(0xecb00b20);    # vldmia r0!, {d0-d15}
        insn32(0xecf00b20);    # vldmia r0!, {d16-d31}
    } else {
        insn32(0xe28f0004);    # add r0, pc, #4
    }
    
    insn32(0xe8905fff);        # ldmia r0, {r0-r12,r14}
    my $datalen = 14;
    $datalen += (32 * $vfp);
    insn32(0xea000000 + ($datalen-1));    # b next
    for (0..(($vfp * 16) - 1)) {
        write_random_fpreg();
    }
    #  .word [14 words of data for r0..r12,r14]
    for (0..13) {
        insn32(rand(0xffffffff));
    }
    # next:
    # clear the flags (NZCVQ and GE): msr APSR_nzcvqg, #0
    insn32(0xe32cf000);
    if ($is_thumb) {
        write_switch_to_thumb();
    }
    write_risuop($OP_COMPARE);
}

sub write_memblock_setup()
{
    # Write code which sets up the memory block for loads and stores.
    # We just need to set r0 to point to a block of at least 8K length
    # of random data.
    if ($is_thumb) {
        write_switch_to_arm();
    }

    insn32(0xe28f0004); # add r0, pc, #4
    write_arm_risuop($OP_SETMEMBLOCK);
    insn32(0xea000000 + 2047); # b next
    for (0..2047) {
        insn32(rand(0xffffffff));
    }
    # next:

    if ($is_thumb) {
        write_switch_to_thumb();
    }
}

sub write_arm_prologue($)
{
    my ($fpscr) = @_;
    # We will start in ARM mode because we're just loaded
    # as binary and jump to the aligned start of it, so
    # the target address LSB is always 0.

    # movw r0, imm16
    insn32(0xe3000000 | ($fpscr & 0xfff) | (($fpscr & 0xf000) << 4));
    # movt r0, imm16
    insn32(0xe3400000 | (($fpscr & 0xf0000000) >> 12) | (($fpscr & 0x0fff0000) >> 16));
    # vmsr fpscr, r0
    insn32(0xeee10a10);

    if ($is_thumb) {
        # This mode change will be immediately followed by one
        # in write_random_register_data() but never mind.
        write_switch_to_thumb();
    }
    write_random_register_data();
}

sub dump_insn_details($$)
{
    # Dump the instruction details for one insn
    my ($insn, $rec) = @_;
    print "insn $insn: ";
    my $insnwidth = $rec->{width};
    my $fixedbits = $rec->{fixedbits};
    my $fixedbitmask = $rec->{fixedbitmask};
    my $constraint = $rec->{blocks}{"constraints"};
    print sprintf(" insnwidth %d fixedbits %08x mask %08x ", $insnwidth, $fixedbits, $fixedbitmask);
    if (defined $constraint) {
        print "constraint $constraint ";
    }
    for my $tuple (@{ $rec->{fields} }) {
        my ($var, $pos, $mask) = @$tuple;
        print "($var, $pos, " . sprintf("%08x", $mask) . ") ";
    }
    print "\n";
}

# Functions used in memory blocks to handle addressing modes.
# These all have the same basic API: they get called with parameters
# corresponding to the interesting fields of the instruction,
# and should generate code to set up the base register to be
# valid. They must return the register number of the base register.
# The last (array) parameter lists the registers which are trashed
# by the instruction (ie which are the targets of the load).
# This is used to avoid problems when the base reg is a load target.
sub reg($@)
{
    my ($base, @trashed) = @_;
    # Get a random offset within the memory block, of the
    # right alignment.
    my $offset = rand(2048) & ~3;
    write_mov_ri(0, $offset);
    write_risuop($OP_GETMEMBLOCK);
    # Now r0 is the address we want to do the access to,
    # so just move it into the basereg
    if ($base != 0) {
        write_mov_rr($base, 0);
        write_mov_ri(0, 0);
    }
    if (grep $_ == $base, @trashed) {
        return -1;
    }
    return $base;
}

sub reg_plus_imm($$@)
{
    # Handle reg + immediate addressing mode
    my ($base, $imm, @trashed) = @_;
    # Get a random offset within the memory block, of the
    # right alignment.
    my $offset = rand(2048) & ~3;
    write_mov_ri(0, $offset);
    write_risuop($OP_GETMEMBLOCK);
    # Now r0 is the address we want to do the access to,
    # so set the basereg by doing the inverse of the
    # addressing mode calculation, ie base = r0 - imm
    # We could do this more cleverly with a sub immediate.
    if ($base != 0) {
        write_mov_ri($base, $imm);
        write_sub_rrr($base, 0, $base);
        # Clear r0 to avoid register compare mismatches
        # when the memory block location differs between machines.
        write_mov_ri(0, 0);
    } else {
        # We borrow r1 as a temporary (not a problem
        # as long as we don't leave anything in a register
        # which depends on the location of the memory block)
        write_mov_ri(1, $imm);
        write_sub_rrr($base, 0, 1);
    }
    if (grep $_ == $base, @trashed) {
        return -1;
    }
    return $base;
}

sub reg_minus_imm($$@)
{
    my ($base, $imm, @trashed) = @_;
    return reg_plus_imm($base, -$imm, @trashed);
}

sub reg_plus_reg_shifted($$$@)
{
    # handle reg + reg LSL imm addressing mode
    my ($base, $idx, $shift, @trashed) = @_;
    die "reg_plus_reg_shifted: bad shift size\n" if ($shift < 0 || $shift > 3);
    my $savedidx = 0;
    if ($idx == 0) {
        # save the index into some other register for the
        # moment, because the risuop will trash r0
        $idx = 1;
        $idx++ if $idx == $base;
        $savedidx = 1;
        write_mov_rr($idx, 0);
    }

    # Get a random offset within the memory block, of the
    # right alignment.
    my $offset = rand(2048) & ~3;
    write_mov_ri(0, $offset);
    write_risuop($OP_GETMEMBLOCK);
    # Now r0 is the address we want to do the access to,
    # so set the basereg by doing the inverse of the
    # addressing mode calculation, ie base = r0 - idx LSL imm
    # LSL x is shift type 0, 
    write_sub_rrrs($base, 0, $idx, $SHIFT_LSL, $shift);
    if ($savedidx) {
        # We can move this back to r0 now
        write_mov_rr(0, $idx);
    } elsif ($base != 0) {
        write_mov_ri(0, 0);
    }
    if (grep $_ == $base, @trashed) {
        return -1;
    }
    return $base;
}

sub reg_plus_reg($$@)
{
    my ($base, $idx, @trashed) = @_;
    return reg_plus_reg_shifted($base, $idx, 0, @trashed);
}

sub eval_with_fields($$$$) {
    # Evaluate the given block in an environment with Perl variables
    # set corresponding to the variable fields for the insn.
    # Return the result of the eval; we die with a useful error
    # message in case of syntax error.
    my ($insn, $rec, $blockname, $block) = @_;
    my $evalstr = "{ ";
    for my $tuple (@{ $rec->{fields} }) {
        my ($var, $pos, $mask) = @$tuple;
        my $val = ($insn >> $pos) & $mask;
        $evalstr .= "my (\$$var) = $val; ";
    }
    $evalstr .= $block;
    $evalstr .= "}";
    my $v = eval $evalstr;
    if ($@) {
        print "Syntax error detected evaluating $blockname string:\n$block\n";
        exit(1);
    }
    return $v;
}

sub gen_one_insn($$)
{
    # Given an instruction-details array, generate an instruction
    my $constraintfailures = 0;

    INSN: while(1) {
        my ($forcecond, $rec) = @_;
        my $insn = int(rand(0xffffffff));
        my $insnwidth = $rec->{width};
        my $fixedbits = $rec->{fixedbits};
        my $fixedbitmask = $rec->{fixedbitmask};
        my $constraint = $rec->{blocks}{"constraints"};
        my $memblock = $rec->{blocks}{"memory"};

        $insn &= ~$fixedbitmask;
        $insn |= $fixedbits;
        for my $tuple (@{ $rec->{fields} }) {
            my ($var, $pos, $mask) = @$tuple;
            my $val = ($insn >> $pos) & $mask;
            # Check constraints here:
            # not allowed to use or modify sp or pc
            next INSN if ($var =~ /^r/ && (($val == 13) || ($val == 15)));
            # Some very arm-specific code to force the condition field
            # to 'always' if requested.
            if ($forcecond) {
                if ($var eq "cond") {
                    $insn &= ~ ($mask << $pos);
                    $insn |= (0xe << $pos);
                }
            }
        }
        if (defined $constraint) {
            # user-specified constraint: evaluate in an environment
            # with variables set corresponding to the variable fields.
            my $v = eval_with_fields($insn, $rec, "constraints", $constraint);
            if (!$v) {
                $constraintfailures++;
                if ($constraintfailures > 10000) {
                    print "10000 consecutive constraint failures for constraints string:\n$constraint\n";
                    exit (1);
                }
                next INSN;
            }
        }

        # OK, we got a good one
        $constraintfailures = 0;

        my $basereg;

        if (defined $memblock) {
            # This is a load or store. We simply evaluate the block,
            # which is expected to be a call to a function which emits
            # the code to set up the base register and returns the
            # number of the base register.
            $basereg = eval_with_fields($insn, $rec, "memory", $memblock);
        }

        if ($is_thumb) {
            # Since the encoding diagrams in the ARM ARM give 32 bit
            # Thumb instructions as low half | high half, we
            # flip the halves here so that the input format in
            # the config file can be in the same order as the ARM.
            # For a 16 bit Thumb instruction the generated insn is in
            # the high halfword (because we didn't bother to readjust
            # all the bit positions in parse_config_file() when we
            # got to the end and found we only had 16 bits).
            insn16($insn >> 16);
            if ($insnwidth == 32) {
                insn16($insn & 0xffff);
            }
        } else {
            # ARM is simple, always a 32 bit word
            insn32($insn);
        }

        if (defined $memblock) {
            # Clean up following a memory access instruction:
            # we need to turn the (possibly written-back) basereg
            # into an offset from the base of the memory block,
            # to avoid making register values depend on memory layout.
            # $basereg -1 means the basereg was a target of a load
            # (and so it doesn't contain a memory address after the op)
            if ($basereg != -1) {
                write_mov_ri(0, 0);
                write_risuop($OP_GETMEMBLOCK);
                write_sub_rrr($basereg, $basereg, 0);
                write_mov_ri(0, 0);
            }
            write_risuop($OP_COMPAREMEM);
        }
        return;
    }
}

my $lastprog;
my $proglen;
my $progmax;

sub progress_start($$)
{
    ($proglen, $progmax) = @_;
    $proglen -= 2; # allow for [] chars
    $| = 1;        # disable buffering so we can see the meter...
    print "[" . " " x $proglen . "]\r";
    $lastprog = 0;
}

sub progress_update($)
{
    # update the progress bar with current progress
    my ($done) = @_;
    my $barlen = int($proglen * $done / $progmax);
    if ($barlen != $lastprog) {
        $lastprog = $barlen;
        print "[" . "-" x $barlen . " " x ($proglen - $barlen) . "]\r";
    }
}

sub progress_end()
{
    print "[" . "-" x $proglen . "]\n";
    $| = 0;
}

sub write_test_code($$)
{
    my ($condprob, $numinsns) = @_;
    # convert from probability that insn will be conditional to
    # probability of forcing insn to unconditional
    $condprob = 1 - $condprob;

    # TODO better random number generator?
    srand(0);

    # Get a list of the insn keys which are permitted by the re patterns
    my @keys = keys %insn_details;
    if (@pattern_re) {
        my $re = '\b((' . join(')|(',@pattern_re) . '))\b';
        @keys = grep /$re/, @keys;
    }
    if (!@keys) {
        print STDERR "No instruction patterns available! (bad config file or --pattern argument?)\n";
        exit(1);
    }
    print "Generating code using patterns: @keys...\n";
    progress_start(78, $numinsns);

    if (grep { defined($insn_details{$_}->{blocks}->{"memory"}) } @keys) {
        write_memblock_setup();
    }

    for my $i (1..$numinsns) {
        my $insn_enc = $keys[int rand (@keys)];
        #dump_insn_details($insn_enc, $insn_details{$insn_enc});
        my $forcecond = (rand() < $condprob) ? 1 : 0;
        gen_one_insn($forcecond, $insn_details{$insn_enc});
        write_risuop($OP_COMPARE);
        # Rewrite the registers periodically. This avoids the tendency
        # for the VFP registers to decay to NaNs and zeroes.
        if (($i % 100) == 0) {
            write_random_register_data();
        }
        progress_update($i);
    }
    progress_end();
}

sub parse_risu_directive($$@)
{
    # Parse a line beginning with ".", which is a directive used
    # to affect how risu/risugen should behave rather than an insn pattern.

    # At the moment we only support one directive:
    #  .mode modename
    # where modename can be "arm" or "thumb"
    my ($file, $seen_pattern, $dirname, @rest) = @_;
    if ($dirname eq ".mode") {
        if ($seen_pattern != 0) {
            print STDERR "$file:$.: .mode directive must precede all instruction patterns\n";
            exit(1);
        }
        if ($#rest != 0) {
            print STDERR "$file:$.: wrong number of arguments to .mode\n";
            exit(1);
        }
        if ($rest[0] eq "thumb") {
            $is_thumb = 1;
        } elsif ($rest[0] eq "arm") {
            $is_thumb = 0;
        } else {
            print STDERR "$file:$.: .mode: unknown mode $rest[0]\n";
            exit(1);
        }
    } else {
        print STDERR "$file:$.: unknown directive $dirname\n";
        exit(1);
    }
}

sub read_tokenised_line(*)
{
    # Read a tokenised line from the config file.
    # For our purposes, tokens are generally whitespace
    # separated, but any token beginning with a '{'
    # continues until we have encountered the matching '}'
    # (including counting in and out any nested {} within it).
    # This is also where we deal with blank lines, comments
    # and line continuation characters.
    # Any mismatched braces will manifest as a single '{'
    # or '}' token in the output.
    my ($fh) = @_;
    my $line = '';
    while (<$fh>) {
        chomp;
        $line .= $_;
        next if $line =~ s/\\$//;
        $line =~ s/#.*$//;
        next if $line =~ /^\s*$/;
        last;
    }
    #print "got final line:\n";
    #print "$line\n";

    my (@tokens) = extract_multiple($line,
                                    [ sub { extract_bracketed($_[0],'{}') },
                                      qr/([^{} ]+)/,
                                      qr/([{}]+)/,
                                    ], undef, 1);

    #print "Tokenised as:\n";
    #print Dumper(@tokens), "\n";
    return @tokens;
}

sub parse_config_file($)
{
    # Read in the config file defining the instructions we can generate
    my ($file) = @_;
    # See the README for details of the format we are parsing here.

    # Our data structure here is fairly simple:
    # an assoc array %insn_details whose keys are "insn_enc" strings
    # and whose values are array references. Each array is, in order:
    # insnwidth, fixedbits, fixedbitmask, constraint, var,bitpos,mask , var,bitpos,mask ...

    my ($seen_pattern) = 0;
    my @tokens;
    open(CFILE, $file) or die "can't open $file: $!";
    while (@tokens = read_tokenised_line(CFILE))
    {
        if (grep {/^[\{\}]$/} @tokens) {
            print STDERR "$file:$.: mismatched braces\n";
            exit(1);
        }

        if ($tokens[0] =~ /^\./) {
            parse_risu_directive($file, $seen_pattern, @tokens);
            next;
        }
        $seen_pattern = 1;

        my $insnrec = {};
        my @fields = ();

        my ($insn, $enc, @bits) = @tokens;
        if (!defined $enc) {
            print STDERR "$file:$.: no insn or encoding?\n";
            exit(1);
        }
        if ($insn !~ /^[A-Za-z0-9][A-Za-z0-9_]*$/) {
            print STDERR "$file:$.: invalid insn name $insn ";
            print STDERR "(possibly missing line continuation character?)\n";
            exit(1);
        }
        if ($enc !~ /^[A-Za-z0-9][A-Za-z0-9_]*$/) {
            print STDERR "$file:$.: invalid encoding name $enc\n";
            exit(1);
        }
        my $insnname = "${insn} ${enc}";
        if (exists $insn_details{$insnname}) {
            print STDERR "$file:$.: redefinition of $insnname\n";
            exit(1);
        }

        my $fixedbits = 0;
        my $fixedbitmask = 0;
        my $bitpos = 32;
        my $insnwidth = 32;
        my $seenblock = 0;

        while (@bits) {
            my $bit = shift @bits;
            my $bitlen;
            my $bitval;
            my $var;

            if ($bit =~ /^\!/) {
                # A named block
                my $blockname = $bit;
                $blockname =~ s/^!//;
                my $block = shift @bits;
                if (!defined $block || $block !~ /^{/) {
                    print STDERR "$file:$.: expected block following '!$blockname'\n";
                    exit(1);
                }
                if (!$valid_blockname{$blockname}) {
                    print STDERR "$file:$.: unknown block name '$blockname'\n";
                    exit(1);
                }
                $insnrec->{blocks}{$blockname} = $block;
                $seenblock++;
                next;
            } elsif ($bit =~ /^{/) {
                # An unnamed block is constraints, for backcompatibility
                $insnrec->{blocks}{"constraints"} = $bit;
                $seenblock++;
                next;
            } elsif ($bit =~ /^[01]*$/) {
                # fixed bits
                $bitlen = length($bit);
                $bitval = oct("0b".$bit);
            } elsif ($bit =~ /^([a-zA-Z][a-zA-Z0-9]*):([0-9]+)$/) {
                # variable field
                $var = $1;
                $bitlen = $2;
            } elsif($bit =~ /^([a-zA-Z][a-zA-Z0-9]*)$/) {
                # single bit variable field
                $var = $1;
                $bitlen = 1;
            } else {
                print STDERR "$file:$.: ($insn $enc) unrecognised bitfield specifier $bit\n";
                exit(1);
            }

            if ($seenblock) {
                print STDERR "$file:$.: blocks may not occur in the middle of a pattern\n";
                exit(1);
            }

            my $bitmask = oct("0b". '1' x $bitlen);
            $bitpos -= $bitlen;
            if ($bitpos < 0) {
                print STDERR "$file:$.: ($insn $enc) too many bits specified\n";
                exit(1);
            }

            if (defined $bitval) {
                $fixedbits |= ($bitval << $bitpos);
                $fixedbitmask |= ($bitmask << $bitpos);
            } else {
                push @fields, [ $var, $bitpos, $bitmask ];
            }
        }
        if ($bitpos == 16) {
            # assume this is a half-width thumb instruction
            # Note that we don't fiddle with the bitmasks or positions,
            # which means the generated insn will be in the high halfword!
            $insnwidth = 16;
        } elsif ($bitpos != 0) {
            print STDERR "$file:$.: ($insn $enc) not enough bits specified\n";
            exit(1);
        }
        if ((($fixedbits & $fixedbitmask) != $fixedbits)
            || (($fixedbits & ~$fixedbitmask) != 0)) {
            die "internal error: fixed bits not lined up with mask";
        }
        #  Stick the fixedbit info on the front of the array now we know it
        $insnrec->{name} = $insnname;
        $insnrec->{width} = $insnwidth;
        $insnrec->{fixedbits} = $fixedbits;
        $insnrec->{fixedbitmask} = $fixedbitmask;
        $insnrec->{fields} = [ @fields ];
        $insn_details{$insnname} = $insnrec;
    }
    close(CFILE) or die "can't close $file: $!";
}

sub usage()
{
    print <<EOT;
Usage: risugen [options] inputfile outputfile

where inputfile is a configuration file specifying instruction patterns
and outputfile is the generated raw binary file.

Valid options:
    --numinsns n : generate n instructions (default is 10000)
    --fpscr n    : set initial FPSCR value (default is 0)
    --condprob p : make instructions conditional with probability p
                   (default is 0, ie all instructions are always executed)
    --pattern re[,re...] : only use instructions matching regular expression
                   Each re must match a full word (that is, we match on
                   the perl regex '\\b((re)|(re))\\b'). This means that
                   'VMULL' will match 'VMULL A1' and 'VMULL A2' but not
                   'VMULL_scalar A1'. This is generally what you wanted.
    --help       : print this message
EOT
}

sub main()
{
    my $numinsns = 10000;
    my $condprob = 0;
    my $fpscr = 0;
    my ($infile, $outfile);

    GetOptions( "help" => sub { usage(); exit(0); },
                "numinsns=i" => \$numinsns,
                "fpscr=o" => \$fpscr,
                "pattern=s" => \@pattern_re,
                "condprob=f" => sub { 
                    $condprob = $_[1];
                    if ($condprob < 0.0 || $condprob > 1.0) {
                        die "Value \"$condprob\" invalid for option condprob (must be between 0 and 1)\n";
                    }
                },
        ) or return 1;
    # allow "--pattern re,re" and "--pattern re --pattern re"
    @pattern_re = split(/,/,join(',',@pattern_re));

    if ($#ARGV != 1) {
        usage();
        return 1;
    }

    $infile = $ARGV[0];
    $outfile = $ARGV[1];

    parse_config_file($infile);
    
    open_bin($outfile);
    write_arm_prologue($fpscr);
    write_test_code($condprob, $numinsns);
    write_risuop($OP_TESTEND);
    close_bin();
    return 0;
}

exit(main);