1 files changed, 345 insertions, 0 deletions
diff --git a/iolatsimu.c b/iolatsimu.c
new file mode 100644
index 0000000..2d8badc
--- /dev/null
+++ b/iolatsimu.c
@@ -0,0 +1,345 @@
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <fcntl.h>
+
+#include "list.h"
+
+/*
+ * That represents the resolution of the statistics in usec, the latency
+ * for a bucket is BUCKET_INTERVAL * index.
+ * The higher the resolution is the lesser good prediction you will have.
+ * Some measurements:
+ *
+ * For 1ms:
+ *  SSD 6Gb/s       : 99.7%
+ *  SD card class 10: 97.7%
+ *  SD card class 4 : 54.3%
+ *  HDD on USB      : 93.6%
+ *
+ * For 500us:
+ *  SSD 6Gb/s               : 99.9%
+ *  SD card class 10        : 96.8%
+ *  SD card class 4         : 55.8%
+ *  HDD on USB              : 86.3%
+ *
+ * For 200us:
+ *  SSD 6Gb/s               : 99.7%
+ *  SD card class 10        : 95.5%
+ *  SD card class 4         : 29.5%
+ *  HDD on USB              : 66.3%
+ *
+ * For 100us:
+ *  SSD 6Gb/s               : 85.7%
+ *  SD card class 10        : 67.63%
+ *  SD card class 4         : 31.4%
+ *  HDD on USB              : 44.97%
+ *
+ * Aiming a 100% is not necessary good because we want to hit the correct
+ * idle state. Setting a low resolution will group the different latencies
+ * into a big interval which may overlap with the cpuidle state target
+ * residency.
+ *
+ */
+#define BUCKET_INTERVAL 500
+
+/*
+ * Number of successive hits for the same bucket. That is the thresold
+ * triggering the move of the element at the beginning of the list, so
+ * becoming more weighted for the statistics when guessing for the next
+ * latency.
+ */
+#define BUCKET_SUCCESSIVE 5
+
+/*
+ * For debugging purpose
+ */
+static int success   = 0;
+static int nrlatency = 0;
+
+/*
+ * What is a bucket ?
+ *
+ * A bucket is an interval of latency. This interval is defined with the
+ * BUCKET_INTERVAL. The bucket index gives what latency interval we have.
+ * For example, if you have an index 2 and a bucket interval of 1000usec,
+ * then the bucket contains the latencies 2000 and 2999 usec.
+ *
+ */
+struct bucket {
+	int hits;
+	int successive_hits;
+	int index;
+	struct list_head list;
+};
+
+static LIST_HEAD(bucket_list);
+
+/*
+ * Find a bucket associated with the specified index
+ */
+static struct bucket *bucket_find(int index)
+{
+	struct list_head *list;
+	struct bucket *bucket = NULL;
+
+	list_for_each(list, &bucket_list) {
+		
+		bucket = list_entry(list, struct bucket, list);
+
+		if (bucket->index == index)
+			return bucket;
+	}
+
+	return NULL;
+}
+
+/*
+ * Allocate a bucket
+ */
+static struct bucket *bucket_alloc(int index)
+{
+	struct bucket *bucket;
+
+	bucket = malloc(sizeof(*bucket));
+	if (bucket) {
+		bucket->hits  = 0;
+		bucket->successive_hits = 0;
+		bucket->index = index;
+		INIT_LIST_HEAD(&bucket->list);
+	}
+	
+	return bucket;
+}
+
+/*
+ * The list is ordered by history. The first element is the one with
+ * the more *successive* hits. This function is called each time a new
+ * latency is inserted. In the kernel that will replace the io latency
+ * avg, which is pretty simple and inaccurate.
+ *
+ * The algorithm is pretty simple here: As the first element is the
+ * one which more chance to occur next, its weight is the bigger, the
+ * second one has less weight, etc ...
+ *
+ * The bucket which has the maximum score (number of hits weighted by
+ * its position in the list) is the next bucket which has more chances
+ * to occur.
+ *
+ */
+static int bucket_guessed_index(void)
+{
+	int weight = 0;
+	int score, score_max = 0, winner = 0;
+	struct bucket *bucket;
+	struct list_head *list;
+
+	if (list_empty(&bucket_list))
+		return -1;
+
+	list_for_each(list, &bucket_list) {
+
+		bucket = list_entry(list, struct bucket, list);
+		
+		/* 
+		 * The list is ordered by history, the first element has
+		 * more weight the next one. If a bucket is in the process
+		 * of being hit several times, take it into account.
+		 */
+		score = bucket->hits / (1 << weight);
+
+		if (score < score_max)
+			continue;
+
+		score_max = score;
+		winner = bucket->index;
+	}
+
+	return winner;
+}
+
+/*
+ * Return the bucket index for the specified latency
+ */
+static int bucket_index(int latency)
+{
+	return latency / BUCKET_INTERVAL;
+}
+
+/*
+ * The dynamic of the list is the following.
+ * - Each new element is inserted at the end of the list
+ * - Each element passing <BUCKET_SUCCESSIVE> times in this function
+ *   is elected to be moved at the beginning at the list
+ */
+static int bucket_fill(int latency)
+{
+	int index = bucket_index(latency);
+	struct bucket *bucket;
+
+	/*
+	 * For debugging purpose
+	 */
+	nrlatency++;
+	if (bucket_guessed_index() == index)
+		success++;
+
+	/*
+	 * Find the bucket associated with the index
+	 */
+	bucket = bucket_find(index);
+	if (!bucket) {
+		bucket = bucket_alloc(index);
+		if (!bucket)
+			return -1;
+
+		list_add_tail(&bucket->list, &bucket_list);
+	}
+
+	/*
+	 * Increase the number of times this bucket has been hit
+	 */
+	bucket->hits++;
+	bucket->successive_hits++;
+
+	/*
+	 * We had a successive number of the same bucket, move it at
+	 * the beginning of the list
+	 */
+	if (bucket->successive_hits == BUCKET_SUCCESSIVE) {
+		list_move(&bucket->list, &bucket_list);
+		bucket->successive_hits = 1;
+	}
+
+	return 0;
+}
+
+/*
+ * For debugging purpose
+ */
+static void bucket_show(void)
+{
+	struct list_head *list;
+	struct bucket *bucket;
+
+	list_for_each(list, &bucket_list) {
+		
+		bucket = list_entry(list, struct bucket, list);
+
+		printf("bucket %d: %d\n",
+		       bucket->index, bucket->hits);
+	}
+
+	printf("Number of correct predictions: %d/%d (%.2f%%)\n",
+	       success, nrlatency, ((float)success / (float)nrlatency) * 100.0);
+}
+
+/*
+ * IO latency simulation main function.
+ *
+ * Writes a buffer at different offset and reads it again
+ * at random offset. Use fadvise to prevent OS optimization
+ * keeping the data in cache, so having real access time.
+ */
+
+#define NROFFSET 256
+#define PAGESIZE 16384
+
+static char buffer[PAGESIZE];
+
+/*
+ * Write the big file with size NROFFSET * PAGESIZE
+ */
+void write_file(int fd)
+{
+	int i = 0;
+
+	for (i = 0; i < NROFFSET; i++)
+		write(fd, buffer, sizeof(buffer));
+
+	fsync(fd);
+}
+
+int mktempfile(const char *dirname)
+{
+	char *name;
+
+	if (asprintf(&name, "%s/XXXXXX", dirname) < 0) {
+		perror("asprintf");
+		return -1;
+	}
+
+	return mkstemp(name);
+}
+
+int main(int argc, char *argv[])
+{
+	struct timeval begin, end;
+	off_t offset;
+	int i;
+	unsigned long int latency;
+	const char *dirname = "/tmp";
+	int fd;
+
+	/* 
+	 * Optionnally we can specify the directory to test the
+	 * latencies
+	 */
+	if (argc == 2)
+		dirname = argv[1];
+
+	/*
+	 * Make temporary file, we don't want to pollute the file system
+	 * with big files if this program crashes
+	 */
+	fd = mktempfile(dirname);
+	if (fd < 0) {
+		perror("mktempfile");
+		return -1;
+	}
+
+	write_file(fd);
+
+	/*
+	 * Initialize the random seed with the number of current usec.
+	 * This random value will be used to access the file randomly, no
+	 * sequential accesses which can be optimized by the hardware
+	 */
+	gettimeofday(&begin, NULL);
+	srandom(begin.tv_usec);
+
+	for (i = 0; i < 10000; i++) {
+
+		/*
+		 * Compute the offset address to read from
+		 */
+		offset = (random() % NROFFSET) * PAGESIZE;
+
+		/*
+		 * man posix_fadvise
+		 */
+		posix_fadvise(fd, offset, PAGESIZE, POSIX_FADV_DONTNEED);
+
+		/*
+		 * Measure the time to read a PAGESIZE buffer
+		 */
+		gettimeofday(&begin, NULL);
+		pread(fd, buffer, PAGESIZE, offset);
+		gettimeofday(&end, NULL);
+		latency = ((end.tv_sec - begin.tv_sec) * 1000000) + (
+			end.tv_usec - begin.tv_usec);
+
+		/*
+		 * Fill a bucket with this latency
+		 */
+		bucket_fill(latency);
+	}
+
+	bucket_show();
+
+	close(fd);
+
+	return 0;
+}