#!/usr/bin/env python """Simple harness that benchmarks different variants of the routines, caches the results, and emits all of the records at the end. Results are generated for different values of: * Source * Routine * Length * Alignment """ import argparse import subprocess import math import sys SINGLE_BUFFER_FUNCTIONS = ['strchr', 'memset', 'strlen', 'memchr'] DUAL_BUFFER_FUNCTIONS = ['memcmp', 'memcpy', 'strcmp', 'strcpy'] FUNCTIONS = list(SINGLE_BUFFER_FUNCTIONS) FUNCTIONS.extend(DUAL_BUFFER_FUNCTIONS) HAS = { 'this': 'bounce memchr memcpy memset strchr strcpy strlen', 'bionic-a9': 'memcmp memcpy memset strcmp strcpy strlen', 'bionic-a15': 'memcmp memcpy memset strcmp strcpy strlen', 'bionic-c': FUNCTIONS, 'csl': 'memcpy memset', 'glibc': 'memcpy memset strchr strlen', 'glibc-c': FUNCTIONS, 'newlib': 'memcpy strcmp strcpy strlen', 'newlib-c': FUNCTIONS, 'newlib-xscale': 'memchr memcpy memset strchr strcmp strcpy strlen', 'plain': 'memset memcpy strcmp strcpy', } ALIGNMENTS = { 'bounce': ['1'], } VARIANTS = sorted(HAS.keys()) NUM_RUNS = 5 DRY_RUN = False #CLI helpers def parse_alignments(alignment): e = Exception("Alignments must be expressed as colon-separated digits e.g. 8:32 16:16") alignments = alignment.split(':') if len(alignments) != 2: raise e try: [int(x) for x in alignments] except: raise e return alignment def run(cache, variant, function, bytes, loops, alignment, run_id, quiet=False): """Perform a single run, exercising the cache as appropriate.""" key = ':'.join('%s' % x for x in (variant, function, bytes, loops, alignment, run_id)) if key in cache: got = cache[key] else: xbuild = build + "/try-" cmd = '%(xbuild)s%(variant)s -t %(function)s -c %(bytes)s -l %(loops)s -a %(alignment)s -r %(run_id)s' % locals() if(DRY_RUN): print cmd return 1 else: try: got = subprocess.check_output(cmd.split()).strip() except OSError, ex: assert False, 'Error %s while running %s' % (ex, cmd) parts = got.split(':') took = float(parts[7]) cache[key] = got if not quiet: print got sys.stdout.flush() return took def run_many(cache, variants, bytes, all_functions): # We want the data to come out in a useful order. So fix an # alignment and function, and do all sizes for a variant first bytes = sorted(bytes) mid = bytes[int(len(bytes)/1.5)] if not all_functions: # Use the ordering in 'this' as the default all_functions = HAS['this'].split() # Find all other functions for functions in HAS.values(): for function in functions.split(): if function not in all_functions: all_functions.append(function) for function in all_functions: for alignment in ALIGNMENTS[function]: for variant in variants: if function not in HAS[variant].split(): continue # Run a tracer through and see how long it takes and # adjust the number of loops based on that. Not great # for memchr() and similar which are O(n), but it will # do f = 50000000 want = 5.0 loops = int(f / math.sqrt(max(1, mid))) took = run(cache, variant, function, mid, loops, alignment, 0, quiet=True) # Keep it reasonable for silly routines like bounce factor = min(20, max(0.05, want/took)) f = f * factor # Round f to a few significant figures scale = 10**int(math.log10(f) - 1) f = scale*int(f/scale) for b in sorted(bytes): # Figure out the number of loops to give a roughly consistent run loops = int(f / math.sqrt(max(1, b))) for run_id in range(0, NUM_RUNS): run(cache, variant, function, b, loops, alignment, run_id) def run_top(cache): parser = argparse.ArgumentParser() #Syntax: python ../cortex-strings/scripts/bench.py -f bounce memcpy -v this glibc parser.add_argument("-v", "--variants", nargs="+", help="library variant to run (run all if not specified)", default = VARIANTS, choices = VARIANTS) parser.add_argument("-f", "--functions", nargs="+", help="function to run (run all if not specified)", default = FUNCTIONS, choices = FUNCTIONS) parser.add_argument("-u", "--upper", type=int, help="upper limit to test to (in bytes)", default = 512*1024) parser.add_argument("-l", "--lower", type=int, help="lowest block size to test (bytes)", default = 0) parser.add_argument("-s", "--steps", nargs="+", help="steps to test powers of", default = ['1.4', '2.0']) parser.add_argument("-p", "--prefix", help="path to executables, relative to CWD", default=".") parser.add_argument("-d", "--dry-run", help="Dry run: just print the invocations that we would use", default=False, action="store_true") parser.add_argument("-a", "--alignments", nargs="+", type=parse_alignments, help="Alignments, e.g. 2:32 for 2-byte-aligned source to 4-byte-aligned dest. Functions with just a dest use the number before the colon.", default=['1:32', '2:32', '4:32', '8:32', '16:32', '32:32']) parser.add_argument("-r", "--runs", type=int, help="Number of runs of each test", default=5) args = parser.parse_args() if(args.lower >= args.upper): raise Exception("Range starts after it ends!") global build, DRY_RUN, ALIGNMENTS, NUM_RUNS NUM_RUNS = args.runs build = args.prefix DRY_RUN = args.dry_run for function in SINGLE_BUFFER_FUNCTIONS: ALIGNMENTS[function] = [x.split(':')[0] for x in args.alignments] for function in DUAL_BUFFER_FUNCTIONS: ALIGNMENTS[function] = args.alignments bytes = [] #Test powers of steps for step in args.steps: if step[0] == '+': step = int(step[1:]) bytes.extend(range(args.lower, args.upper + step, step)) else: step = float(step) steps = int(round(math.log(args.upper - args.lower, step))) bytes.extend([args.lower - 1 + int(step**x) for x in range(steps+1)]) run_many(cache, args.variants, bytes, args.functions) def main(): cachename = 'cache.txt' cache = {} try: with open(cachename) as f: for line in f: line = line.strip() parts = line.split(':') cache[':'.join(parts[:7])] = line except: pass try: run_top(cache) finally: with open(cachename, 'w') as f: for line in sorted(cache.values()): print >> f, line if __name__ == '__main__': main()