#define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include "list.h" /* * That represents the resolution of the statistics in usec, the latency * for a bucket is BUCKET_INTERVAL * index. * The higher the resolution is the lesser good prediction you will have. * Some measurements: * * For 1ms: * SSD 6Gb/s : 99.7% * SD card class 10: 97.7% * SD card class 4 : 54.3% * HDD on USB : 93.6% * * For 500us: * SSD 6Gb/s : 99.9% * SD card class 10 : 96.8% * SD card class 4 : 55.8% * HDD on USB : 86.3% * * For 200us: * SSD 6Gb/s : 99.7% * SD card class 10 : 95.5% * SD card class 4 : 29.5% * HDD on USB : 66.3% * * For 100us: * SSD 6Gb/s : 85.7% * SD card class 10 : 67.63% * SD card class 4 : 31.4% * HDD on USB : 44.97% * * Aiming a 100% is not necessary good because we want to hit the correct * idle state. Setting a low resolution will group the different latencies * into a big interval which may overlap with the cpuidle state target * residency. * */ /* * For debugging purpose */ static int success = 0; static int nrlatency = 0; /* * What is a bucket ? * * A bucket is an interval of latency. This interval is defined with the * BUCKET_INTERVAL. The bucket index gives what latency interval we have. * For example, if you have an index 2 and a bucket interval of 1000usec, * then the bucket contains the latencies 2000 and 2999 usec. * */ struct bucket { int hits; int successive_hits; int avg_successive_hits; int index; struct list_head list; }; static LIST_HEAD(bucket_list); static struct bucket *last_bucket; struct options { int pagesize; const char *path; int nrprocesses; int bucketsize; }; /* * Find a bucket associated with the specified index */ static struct bucket *bucket_find(int index) { struct list_head *list; struct bucket *bucket = NULL; list_for_each(list, &bucket_list) { bucket = list_entry(list, struct bucket, list); if (bucket->index == index) return bucket; } return NULL; } /* * Allocate a bucket */ static struct bucket *bucket_alloc(int index) { struct bucket *bucket; bucket = malloc(sizeof(*bucket)); if (bucket) { bucket->hits = 0; bucket->successive_hits = 0; bucket->avg_successive_hits = 0; bucket->index = index; INIT_LIST_HEAD(&bucket->list); } return bucket; } /* * The list is ordered by history. The first element is the one with * the more *successive* hits. This function is called each time a new * latency is inserted. In the kernel that will replace the io latency * avg, which is pretty simple and inaccurate. * * The algorithm is pretty simple here: As the first element is the * one which more chance to occur next, its weight is the bigger, the * second one has less weight, etc ... * * The bucket which has the maximum score (number of hits weighted by * its position in the list) is the next bucket which has more chances * to occur. * */ static int bucket_guessed_index(void) { int weight = 0; int score, score_max = 0, winner = 0; struct bucket *bucket; struct list_head *list; if (list_empty(&bucket_list)) return -1; list_for_each(list, &bucket_list) { bucket = list_entry(list, struct bucket, list); /* * The list is ordered by history, the first element has * more weight the next one. If a bucket is in the process * of being hit several times, take it into account. */ score = bucket->hits / ((2 * weight) + 1); weight++; if (score < score_max) continue; score_max = score; winner = bucket->index; } return winner; } /* * Return the bucket index for the specified latency */ static int bucket_index(int latency, int bucket_interval) { return latency / bucket_interval; } #define AVG(A, B, I) ((A) + ((B - A) / (I))) /* * The dynamic of the list is the following. * - Each new element is inserted at the end of the list * - Each element passing times in this function * is elected to be moved at the beginning at the list */ static int bucket_fill(int latency, struct options *options) { int index = bucket_index(latency, options->bucketsize); struct bucket *bucket; /* * For debugging purpose */ nrlatency++; if (bucket_guessed_index() == index) success++; /* * Find the bucket associated with the index */ bucket = bucket_find(index); if (!bucket) { bucket = bucket_alloc(index); if (!bucket) return -1; list_add_tail(&bucket->list, &bucket_list); } if (bucket != last_bucket) { if (last_bucket) { last_bucket->avg_successive_hits = AVG(last_bucket->avg_successive_hits, last_bucket->successive_hits, last_bucket->hits + 1); last_bucket->successive_hits = 0; } last_bucket = bucket; } /* * Increase the number of times this bucket has been hit */ bucket->hits++; bucket->successive_hits++; /* * We had a successive number of the same bucket, move it at * the beginning of the list */ if (bucket->successive_hits >= bucket->avg_successive_hits) list_move(&bucket->list, &bucket_list); return 0; } /* * For debugging purpose */ static void bucket_show(void) { struct list_head *list; struct bucket *bucket; list_for_each(list, &bucket_list) { bucket = list_entry(list, struct bucket, list); printf("[pid %d] bucket %d: %d\n", getpid(), bucket->index, bucket->hits); } printf("[pid %d] Number of correct predictions: %d/%d (%.2f%%)\n", getpid(), success, nrlatency, ((float)success / (float)nrlatency) * 100.0); } /* * IO latency simulation main function. * * Writes a buffer at different offset and reads it again * at random offset. Use fadvise to prevent OS optimization * keeping the data in cache, so having real access time. */ #define NROFFSET 256 static char *buffer; /* * Write the big file with size NROFFSET * pagesize */ void write_file(int fd, size_t size) { int i = 0; for (i = 0; i < NROFFSET; i++) if (write(fd, buffer, size) < 0) perror("write"); fsync(fd); } int mktempfile(const char *path, size_t size) { char *name; struct stat s; int fd; fd = stat(path, &s); if (fd < 0) { if (errno == ENOENT) { fd = open(path, O_CREAT | O_SYNC | O_RDWR, 0600); if (fd >= 0) { unlink(path); write_file(fd, size); } } goto out; } if (S_ISDIR(s.st_mode)) { if (asprintf(&name, "%s/iosimul-XXXXXX", path) < 0) { perror("asprintf"); return -1; } fd = mkostemp(name, O_SYNC | O_RDWR); if (fd >= 0) { unlink(name); write_file(fd, size); } goto out; } if (S_ISREG(s.st_mode)) { fd = open(path, O_RDWR | O_SYNC); if (fd >= 0) write_file(fd, size); } out: return fd; } int access_file(int fd, struct options *options) { struct timeval begin, end; off_t offset; int i, ret; unsigned long int latency; /* * Initialize the random seed with the number of * current usec. This random value will be used to * access the file randomly, no sequential accesses * which can be optimized by the hardware */ gettimeofday(&begin, NULL); srandom(begin.tv_usec); for (i = 0; i < 10000; i++) { /* * Compute the offset address to read from */ offset = (random() % NROFFSET) * options->pagesize; /* * man posix_fadvise */ posix_fadvise(fd, offset, options->pagesize, POSIX_FADV_DONTNEED); /* * Measure the time to read a options->pagesize buffer */ gettimeofday(&begin, NULL); if (random() % 2) ret = pread(fd, buffer, options->pagesize, offset); else ret = pwrite(fd, buffer, options->pagesize, offset); gettimeofday(&end, NULL); if (ret <= 0) perror("pread/pwrite"); latency = ((end.tv_sec - begin.tv_sec) * 1000000) + ( end.tv_usec - begin.tv_usec); /* * Fill a bucket with this latency */ bucket_fill(latency, options); } return 0; } static int getoptions(int argc, char *argv[], struct options *options) { struct option long_options[] = { { "pagesize", required_argument, NULL, 'p' }, { "path", required_argument, NULL, 'f' }, { "nrprocesses", required_argument, NULL, 't' }, { "bucketsize", required_argument, NULL, 'b' }, { 0, 0, 0, 0 } }; int c; memset(options, 0, sizeof(*options)); while (1) { int optindex = 0; c = getopt_long(argc, argv, "p:f:t:b:", long_options, &optindex); if (c == -1) break; switch (c) { case 'p': options->pagesize = atoi(optarg); break; case 'f': options->path = optarg; break; case 't': options->nrprocesses = atoi(optarg); break; case 'b': options->bucketsize = atoi(optarg); break; default: fprintf(stderr, "%s: Unknown option `-%c'.\n", basename(argv[0]), optopt); return -1; } } if (!options->path) options->path = "/tmp"; if (!options->pagesize) options->pagesize = 4096; if (!options->bucketsize) options->bucketsize = 200; if (!options->nrprocesses) options->nrprocesses = 1; return optind; } int main(int argc, char *argv[]) { struct options options; int fd, pid; int i; if (getoptions(argc, argv, &options) <= 0) return -1; buffer = malloc(sizeof(*buffer) * options.pagesize); if (!buffer) { fprintf(stderr, "failed to allocate buffer\n"); return -1; } /* * Make temporary file, we don't want to pollute the file system * with big files if this program crashes */ fd = mktempfile(options.path, options.pagesize); if (fd < 0) { perror("mktempfile"); return -1; } for (i = 0; i < options.nrprocesses; i++) { pid = fork(); if (pid < 0) { perror("fork"); return -1; } if (pid) continue; access_file(fd, &options); bucket_show(); close(fd); return 0; } while (wait(NULL) == 0); return 0; }