From 7ac3eafc7692a85e46388680ff06a90af49cbc0b Mon Sep 17 00:00:00 2001 From: Marlies Ruck Date: Thu, 16 May 2013 14:30:39 -0400 Subject: Staging: Fixes string split across lines in zram Fixes the following checkpatch warning in zram_drv.c: WARNING: quoted string split across lines Signed-off-by: Marlies Ruck Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 596b3dd4c8e172db7806372c9d0347a4e7d28bc5) Signed-off-by: Alex Shi --- drivers/staging/zram/zram_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index a333d44d0cff..2652dfac0b32 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -302,8 +302,8 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, handle = zs_malloc(meta->mem_pool, clen); if (!handle) { - pr_info("Error allocating memory for compressed " - "page: %u, size=%zu\n", index, clen); + pr_info("Error allocating memory for compressed page: %u, size=%zu\n", + index, clen); ret = -ENOMEM; goto out; } -- cgit v1.2.3 From f02a1549c748f57f7dadce5d10b4a23796584a72 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Fri, 7 Jun 2013 00:07:28 +0800 Subject: zram: simplify and optimize dev_to_zram() Simplify and optimize dev_to_zram() without walking the zram_devices array. Signed-off-by: Jiang Liu Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 80de574dca050b734d8413a98a983fba3d06240b) Signed-off-by: Alex Shi --- drivers/staging/zram/zram_sysfs.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/staging/zram/zram_sysfs.c b/drivers/staging/zram/zram_sysfs.c index dc76a3dba1b8..e239d9452726 100644 --- a/drivers/staging/zram/zram_sysfs.c +++ b/drivers/staging/zram/zram_sysfs.c @@ -30,18 +30,9 @@ static u64 zram_stat64_read(struct zram *zram, u64 *v) return val; } -static struct zram *dev_to_zram(struct device *dev) +static inline struct zram *dev_to_zram(struct device *dev) { - int i; - struct zram *zram = NULL; - - for (i = 0; i < zram_get_num_devices(); i++) { - zram = &zram_devices[i]; - if (disk_to_dev(zram->disk) == dev) - break; - } - - return zram; + return (struct zram *)dev_to_disk(dev)->private_data; } static ssize_t disksize_show(struct device *dev, -- cgit v1.2.3 From 72f7aaa7ded9c9be6d5ec8886ced1238909a1c7a Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Fri, 7 Jun 2013 00:07:29 +0800 Subject: zram: kill unused zram_get_num_devices() Now there's no caller of zram_get_num_devices(), so kill it. And change zram_devices to static because it's only used in zram_drv.c. Signed-off-by: Jiang Liu Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 0f0e3ba346c8d8d2cb409b157df79805931a1c2c) Signed-off-by: Alex Shi --- drivers/staging/zram/zram_drv.c | 7 +------ drivers/staging/zram/zram_drv.h | 2 -- 2 files changed, 1 insertion(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index 2652dfac0b32..49f34b065181 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -37,7 +37,7 @@ /* Globals */ static int zram_major; -struct zram *zram_devices; +static struct zram *zram_devices; /* Module params (documentation at end) */ static unsigned int num_devices = 1; @@ -679,11 +679,6 @@ static void destroy_device(struct zram *zram) blk_cleanup_queue(zram->queue); } -unsigned int zram_get_num_devices(void) -{ - return num_devices; -} - static int __init zram_init(void) { int ret, dev_id; diff --git a/drivers/staging/zram/zram_drv.h b/drivers/staging/zram/zram_drv.h index d542eee81357..b3a315d1eb23 100644 --- a/drivers/staging/zram/zram_drv.h +++ b/drivers/staging/zram/zram_drv.h @@ -110,8 +110,6 @@ struct zram { struct zram_stats stats; }; -extern struct zram *zram_devices; -unsigned int zram_get_num_devices(void); #ifdef CONFIG_SYSFS extern struct attribute_group zram_disk_attr_group; #endif -- cgit v1.2.3 From 728260fce381aa3a2b10a35d7ab727f9761f58bd Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Fri, 7 Jun 2013 00:07:30 +0800 Subject: zram: optimize memory operations with clear_page()/copy_page() Some architectures provides architecture-specific, optimized version of clear_page()/copy_page(), which may have better performance than memset()/memcpy(). So use clear_page()/copy_page() to optimize zram performance if possible. Signed-off-by: Jiang Liu Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 42e99bd975fdd24d2bf1a24ebb8b0b42bab8ba65) Signed-off-by: Alex Shi --- drivers/staging/zram/zram_drv.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index 49f34b065181..18a89863ae53 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -128,23 +128,26 @@ static void zram_free_page(struct zram *zram, size_t index) meta->table[index].size = 0; } +static inline int is_partial_io(struct bio_vec *bvec) +{ + return bvec->bv_len != PAGE_SIZE; +} + static void handle_zero_page(struct bio_vec *bvec) { struct page *page = bvec->bv_page; void *user_mem; user_mem = kmap_atomic(page); - memset(user_mem + bvec->bv_offset, 0, bvec->bv_len); + if (is_partial_io(bvec)) + memset(user_mem + bvec->bv_offset, 0, bvec->bv_len); + else + clear_page(user_mem); kunmap_atomic(user_mem); flush_dcache_page(page); } -static inline int is_partial_io(struct bio_vec *bvec) -{ - return bvec->bv_len != PAGE_SIZE; -} - static int zram_decompress_page(struct zram *zram, char *mem, u32 index) { int ret = LZO_E_OK; @@ -154,13 +157,13 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) unsigned long handle = meta->table[index].handle; if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) { - memset(mem, 0, PAGE_SIZE); + clear_page(mem); return 0; } cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO); if (meta->table[index].size == PAGE_SIZE) - memcpy(mem, cmem, PAGE_SIZE); + copy_page(mem, cmem); else ret = lzo1x_decompress_safe(cmem, meta->table[index].size, mem, &clen); @@ -309,11 +312,13 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, } cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO); - if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) + if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) { src = kmap_atomic(page); - memcpy(cmem, src, clen); - if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) + copy_page(cmem, src); kunmap_atomic(src); + } else { + memcpy(cmem, src, clen); + } zs_unmap_object(meta->mem_pool, handle); -- cgit v1.2.3 From dc18dd5cb1f638af5fc0328677b5d96328769ca3 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Fri, 7 Jun 2013 00:07:31 +0800 Subject: zram: use atomic64_xxx() to replace zram_stat64_xxx() Use atomic64_xxx() to replace open-coded zram_stat64_xxx(). Some architectures have native support of atomic64 operations, so we can get rid of the spin_lock() in zram_stat64_xxx(). On the other hand, for platforms use generic version of atomic64 implement, it may cause an extra save/restore of the interrupt flag. So it's a tradeoff. Signed-off-by: Jiang Liu Signed-off-by: Greg Kroah-Hartman (cherry picked from commit da5cc7d338f97886ebf35be92995460289379b73) Signed-off-by: Alex Shi --- drivers/staging/zram/zram_drv.c | 37 ++++++++----------------------------- drivers/staging/zram/zram_drv.h | 19 +++++++++++-------- drivers/staging/zram/zram_sysfs.c | 21 +++++---------------- 3 files changed, 24 insertions(+), 53 deletions(-) (limited to 'drivers') diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index 18a89863ae53..97899eac15df 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -42,25 +42,6 @@ static struct zram *zram_devices; /* Module params (documentation at end) */ static unsigned int num_devices = 1; -static void zram_stat64_add(struct zram *zram, u64 *v, u64 inc) -{ - spin_lock(&zram->stat64_lock); - *v = *v + inc; - spin_unlock(&zram->stat64_lock); -} - -static void zram_stat64_sub(struct zram *zram, u64 *v, u64 dec) -{ - spin_lock(&zram->stat64_lock); - *v = *v - dec; - spin_unlock(&zram->stat64_lock); -} - -static void zram_stat64_inc(struct zram *zram, u64 *v) -{ - zram_stat64_add(zram, v, 1); -} - static int zram_test_flag(struct zram_meta *meta, u32 index, enum zram_pageflags flag) { @@ -120,8 +101,7 @@ static void zram_free_page(struct zram *zram, size_t index) if (size <= PAGE_SIZE / 2) zram->stats.good_compress--; - zram_stat64_sub(zram, &zram->stats.compr_size, - meta->table[index].size); + atomic64_sub(meta->table[index].size, &zram->stats.compr_size); zram->stats.pages_stored--; meta->table[index].handle = 0; @@ -172,7 +152,7 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) /* Should NEVER happen. Return bio error if it does. */ if (unlikely(ret != LZO_E_OK)) { pr_err("Decompression failed! err=%d, page=%u\n", ret, index); - zram_stat64_inc(zram, &zram->stats.failed_reads); + atomic64_inc(&zram->stats.failed_reads); return ret; } @@ -326,7 +306,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, meta->table[index].size = clen; /* Update stats */ - zram_stat64_add(zram, &zram->stats.compr_size, clen); + atomic64_add(clen, &zram->stats.compr_size); zram->stats.pages_stored++; if (clen <= PAGE_SIZE / 2) zram->stats.good_compress++; @@ -336,7 +316,7 @@ out: kfree(uncmem); if (ret) - zram_stat64_inc(zram, &zram->stats.failed_writes); + atomic64_inc(&zram->stats.failed_writes); return ret; } @@ -373,10 +353,10 @@ static void __zram_make_request(struct zram *zram, struct bio *bio, int rw) switch (rw) { case READ: - zram_stat64_inc(zram, &zram->stats.num_reads); + atomic64_inc(&zram->stats.num_reads); break; case WRITE: - zram_stat64_inc(zram, &zram->stats.num_writes); + atomic64_inc(&zram->stats.num_writes); break; } @@ -456,7 +436,7 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio) goto error; if (!valid_io_request(zram, bio)) { - zram_stat64_inc(zram, &zram->stats.invalid_io); + atomic64_inc(&zram->stats.invalid_io); goto error; } @@ -595,7 +575,7 @@ static void zram_slot_free_notify(struct block_device *bdev, down_write(&zram->lock); zram_free_page(zram, index); up_write(&zram->lock); - zram_stat64_inc(zram, &zram->stats.notify_free); + atomic64_inc(&zram->stats.notify_free); } static const struct block_device_operations zram_devops = { @@ -609,7 +589,6 @@ static int create_device(struct zram *zram, int device_id) init_rwsem(&zram->lock); init_rwsem(&zram->init_lock); - spin_lock_init(&zram->stat64_lock); zram->queue = blk_alloc_queue(GFP_KERNEL); if (!zram->queue) { diff --git a/drivers/staging/zram/zram_drv.h b/drivers/staging/zram/zram_drv.h index b3a315d1eb23..11b09fc25953 100644 --- a/drivers/staging/zram/zram_drv.h +++ b/drivers/staging/zram/zram_drv.h @@ -69,14 +69,18 @@ struct table { u8 flags; } __aligned(4); +/* + * All 64bit fields should only be manipulated by 64bit atomic accessors. + * All modifications to 32bit counter should be protected by zram->lock. + */ struct zram_stats { - u64 compr_size; /* compressed size of pages stored */ - u64 num_reads; /* failed + successful */ - u64 num_writes; /* --do-- */ - u64 failed_reads; /* should NEVER! happen */ - u64 failed_writes; /* can happen when memory is too low */ - u64 invalid_io; /* non-page-aligned I/O requests */ - u64 notify_free; /* no. of swap slot free notifications */ + atomic64_t compr_size; /* compressed size of pages stored */ + atomic64_t num_reads; /* failed + successful */ + atomic64_t num_writes; /* --do-- */ + atomic64_t failed_reads; /* should NEVER! happen */ + atomic64_t failed_writes; /* can happen when memory is too low */ + atomic64_t invalid_io; /* non-page-aligned I/O requests */ + atomic64_t notify_free; /* no. of swap slot free notifications */ u32 pages_zero; /* no. of zero filled pages */ u32 pages_stored; /* no. of pages currently stored */ u32 good_compress; /* % of pages with compression ratio<=50% */ @@ -92,7 +96,6 @@ struct zram_meta { struct zram { struct zram_meta *meta; - spinlock_t stat64_lock; /* protect 64-bit stats */ struct rw_semaphore lock; /* protect compression buffers, table, * 32bit stat counters against concurrent * notifications, reads and writes */ diff --git a/drivers/staging/zram/zram_sysfs.c b/drivers/staging/zram/zram_sysfs.c index e239d9452726..93a2f9cafd7c 100644 --- a/drivers/staging/zram/zram_sysfs.c +++ b/drivers/staging/zram/zram_sysfs.c @@ -19,17 +19,6 @@ #include "zram_drv.h" -static u64 zram_stat64_read(struct zram *zram, u64 *v) -{ - u64 val; - - spin_lock(&zram->stat64_lock); - val = *v; - spin_unlock(&zram->stat64_lock); - - return val; -} - static inline struct zram *dev_to_zram(struct device *dev) { return (struct zram *)dev_to_disk(dev)->private_data; @@ -116,7 +105,7 @@ static ssize_t num_reads_show(struct device *dev, struct zram *zram = dev_to_zram(dev); return sprintf(buf, "%llu\n", - zram_stat64_read(zram, &zram->stats.num_reads)); + (u64)atomic64_read(&zram->stats.num_reads)); } static ssize_t num_writes_show(struct device *dev, @@ -125,7 +114,7 @@ static ssize_t num_writes_show(struct device *dev, struct zram *zram = dev_to_zram(dev); return sprintf(buf, "%llu\n", - zram_stat64_read(zram, &zram->stats.num_writes)); + (u64)atomic64_read(&zram->stats.num_writes)); } static ssize_t invalid_io_show(struct device *dev, @@ -134,7 +123,7 @@ static ssize_t invalid_io_show(struct device *dev, struct zram *zram = dev_to_zram(dev); return sprintf(buf, "%llu\n", - zram_stat64_read(zram, &zram->stats.invalid_io)); + (u64)atomic64_read(&zram->stats.invalid_io)); } static ssize_t notify_free_show(struct device *dev, @@ -143,7 +132,7 @@ static ssize_t notify_free_show(struct device *dev, struct zram *zram = dev_to_zram(dev); return sprintf(buf, "%llu\n", - zram_stat64_read(zram, &zram->stats.notify_free)); + (u64)atomic64_read(&zram->stats.notify_free)); } static ssize_t zero_pages_show(struct device *dev, @@ -169,7 +158,7 @@ static ssize_t compr_data_size_show(struct device *dev, struct zram *zram = dev_to_zram(dev); return sprintf(buf, "%llu\n", - zram_stat64_read(zram, &zram->stats.compr_size)); + (u64)atomic64_read(&zram->stats.compr_size)); } static ssize_t mem_used_total_show(struct device *dev, -- cgit v1.2.3 From c5544682efcda470167318ac56559f023e578b09 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Sat, 22 Jun 2013 03:21:18 +0300 Subject: zram: remove zram_sysfs file (v2) Move zram sysfs code to zram drv and remove zram_sysfs.c file. This gives ability to make static a number of previously exported zram functions, used from zram sysfs, e.g. internal zram zram_meta_alloc/free(). We also can drop zram_drv wrapper functions, used from zram sysfs: e.g. zram_reset_device()/__zram_reset_device() pair. v2: as suggested by Greg K-H, move MODULE description to the bottom of the file. Signed-off-by: Sergey Senozhatsky Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 9b3bb7abcdf2df0f1b2657e6cbc9d06bc2b3b36f) Signed-off-by: Alex Shi --- drivers/staging/zram/Makefile | 2 +- drivers/staging/zram/zram_drv.c | 516 ++++++++++++++++++++++++++------------ drivers/staging/zram/zram_drv.h | 10 - drivers/staging/zram/zram_sysfs.c | 209 --------------- 4 files changed, 350 insertions(+), 387 deletions(-) delete mode 100644 drivers/staging/zram/zram_sysfs.c (limited to 'drivers') diff --git a/drivers/staging/zram/Makefile b/drivers/staging/zram/Makefile index 7f4a3019e9c4..cb0f9ced6a93 100644 --- a/drivers/staging/zram/Makefile +++ b/drivers/staging/zram/Makefile @@ -1,3 +1,3 @@ -zram-y := zram_drv.o zram_sysfs.o +zram-y := zram_drv.o obj-$(CONFIG_ZRAM) += zram.o diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index 97899eac15df..753877431b5f 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -42,6 +42,104 @@ static struct zram *zram_devices; /* Module params (documentation at end) */ static unsigned int num_devices = 1; +static inline struct zram *dev_to_zram(struct device *dev) +{ + return (struct zram *)dev_to_disk(dev)->private_data; +} + +static ssize_t disksize_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%llu\n", zram->disksize); +} + +static ssize_t initstate_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%u\n", zram->init_done); +} + +static ssize_t num_reads_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%llu\n", + (u64)atomic64_read(&zram->stats.num_reads)); +} + +static ssize_t num_writes_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%llu\n", + (u64)atomic64_read(&zram->stats.num_writes)); +} + +static ssize_t invalid_io_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%llu\n", + (u64)atomic64_read(&zram->stats.invalid_io)); +} + +static ssize_t notify_free_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%llu\n", + (u64)atomic64_read(&zram->stats.notify_free)); +} + +static ssize_t zero_pages_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%u\n", zram->stats.pages_zero); +} + +static ssize_t orig_data_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%llu\n", + (u64)(zram->stats.pages_stored) << PAGE_SHIFT); +} + +static ssize_t compr_data_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%llu\n", + (u64)atomic64_read(&zram->stats.compr_size)); +} + +static ssize_t mem_used_total_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u64 val = 0; + struct zram *zram = dev_to_zram(dev); + struct zram_meta *meta = zram->meta; + + down_read(&zram->init_lock); + if (zram->init_done) + val = zs_get_total_size_bytes(meta->mem_pool); + up_read(&zram->init_lock); + + return sprintf(buf, "%llu\n", val); +} + static int zram_test_flag(struct zram_meta *meta, u32 index, enum zram_pageflags flag) { @@ -60,6 +158,97 @@ static void zram_clear_flag(struct zram_meta *meta, u32 index, meta->table[index].flags &= ~BIT(flag); } +static inline int is_partial_io(struct bio_vec *bvec) +{ + return bvec->bv_len != PAGE_SIZE; +} + +/* + * Check if request is within bounds and aligned on zram logical blocks. + */ +static inline int valid_io_request(struct zram *zram, struct bio *bio) +{ + u64 start, end, bound; + + /* unaligned request */ + if (unlikely(bio->bi_sector & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1))) + return 0; + if (unlikely(bio->bi_size & (ZRAM_LOGICAL_BLOCK_SIZE - 1))) + return 0; + + start = bio->bi_sector; + end = start + (bio->bi_size >> SECTOR_SHIFT); + bound = zram->disksize >> SECTOR_SHIFT; + /* out of range range */ + if (unlikely(start >= bound || end >= bound || start > end)) + return 0; + + /* I/O request is valid */ + return 1; +} + +static void zram_meta_free(struct zram_meta *meta) +{ + zs_destroy_pool(meta->mem_pool); + kfree(meta->compress_workmem); + free_pages((unsigned long)meta->compress_buffer, 1); + vfree(meta->table); + kfree(meta); +} + +static struct zram_meta *zram_meta_alloc(u64 disksize) +{ + size_t num_pages; + struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL); + if (!meta) + goto out; + + meta->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); + if (!meta->compress_workmem) + goto free_meta; + + meta->compress_buffer = + (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1); + if (!meta->compress_buffer) { + pr_err("Error allocating compressor buffer space\n"); + goto free_workmem; + } + + num_pages = disksize >> PAGE_SHIFT; + meta->table = vzalloc(num_pages * sizeof(*meta->table)); + if (!meta->table) { + pr_err("Error allocating zram address table\n"); + goto free_buffer; + } + + meta->mem_pool = zs_create_pool(GFP_NOIO | __GFP_HIGHMEM); + if (!meta->mem_pool) { + pr_err("Error creating memory pool\n"); + goto free_table; + } + + return meta; + +free_table: + vfree(meta->table); +free_buffer: + free_pages((unsigned long)meta->compress_buffer, 1); +free_workmem: + kfree(meta->compress_workmem); +free_meta: + kfree(meta); + meta = NULL; +out: + return meta; +} + +static void update_position(u32 *index, int *offset, struct bio_vec *bvec) +{ + if (*offset + bvec->bv_len >= PAGE_SIZE) + (*index)++; + *offset = (*offset + bvec->bv_len) % PAGE_SIZE; +} + static int page_zero_filled(void *ptr) { unsigned int pos; @@ -75,6 +264,21 @@ static int page_zero_filled(void *ptr) return 1; } +static void handle_zero_page(struct bio_vec *bvec) +{ + struct page *page = bvec->bv_page; + void *user_mem; + + user_mem = kmap_atomic(page); + if (is_partial_io(bvec)) + memset(user_mem + bvec->bv_offset, 0, bvec->bv_len); + else + clear_page(user_mem); + kunmap_atomic(user_mem); + + flush_dcache_page(page); +} + static void zram_free_page(struct zram *zram, size_t index) { struct zram_meta *meta = zram->meta; @@ -108,26 +312,6 @@ static void zram_free_page(struct zram *zram, size_t index) meta->table[index].size = 0; } -static inline int is_partial_io(struct bio_vec *bvec) -{ - return bvec->bv_len != PAGE_SIZE; -} - -static void handle_zero_page(struct bio_vec *bvec) -{ - struct page *page = bvec->bv_page; - void *user_mem; - - user_mem = kmap_atomic(page); - if (is_partial_io(bvec)) - memset(user_mem + bvec->bv_offset, 0, bvec->bv_len); - else - clear_page(user_mem); - kunmap_atomic(user_mem); - - flush_dcache_page(page); -} - static int zram_decompress_page(struct zram *zram, char *mem, u32 index) { int ret = LZO_E_OK; @@ -338,11 +522,117 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, return ret; } -static void update_position(u32 *index, int *offset, struct bio_vec *bvec) +static void zram_reset_device(struct zram *zram) { - if (*offset + bvec->bv_len >= PAGE_SIZE) - (*index)++; - *offset = (*offset + bvec->bv_len) % PAGE_SIZE; + size_t index; + struct zram_meta *meta; + + if (!zram->init_done) + return; + + meta = zram->meta; + zram->init_done = 0; + + /* Free all pages that are still in this zram device */ + for (index = 0; index < zram->disksize >> PAGE_SHIFT; index++) { + unsigned long handle = meta->table[index].handle; + if (!handle) + continue; + + zs_free(meta->mem_pool, handle); + } + + zram_meta_free(zram->meta); + zram->meta = NULL; + /* Reset stats */ + memset(&zram->stats, 0, sizeof(zram->stats)); + + zram->disksize = 0; + set_capacity(zram->disk, 0); +} + +static void zram_init_device(struct zram *zram, struct zram_meta *meta) +{ + if (zram->disksize > 2 * (totalram_pages << PAGE_SHIFT)) { + pr_info( + "There is little point creating a zram of greater than " + "twice the size of memory since we expect a 2:1 compression " + "ratio. Note that zram uses about 0.1%% of the size of " + "the disk when not in use so a huge zram is " + "wasteful.\n" + "\tMemory Size: %lu kB\n" + "\tSize you selected: %llu kB\n" + "Continuing anyway ...\n", + (totalram_pages << PAGE_SHIFT) >> 10, zram->disksize >> 10 + ); + } + + /* zram devices sort of resembles non-rotational disks */ + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); + + zram->meta = meta; + zram->init_done = 1; + + pr_debug("Initialization done!\n"); +} + +static ssize_t disksize_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + u64 disksize; + struct zram_meta *meta; + struct zram *zram = dev_to_zram(dev); + + disksize = memparse(buf, NULL); + if (!disksize) + return -EINVAL; + + disksize = PAGE_ALIGN(disksize); + meta = zram_meta_alloc(disksize); + down_write(&zram->init_lock); + if (zram->init_done) { + up_write(&zram->init_lock); + zram_meta_free(meta); + pr_info("Cannot change disksize for initialized device\n"); + return -EBUSY; + } + + zram->disksize = disksize; + set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); + zram_init_device(zram, meta); + up_write(&zram->init_lock); + + return len; +} + +static ssize_t reset_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + int ret; + unsigned short do_reset; + struct zram *zram; + struct block_device *bdev; + + zram = dev_to_zram(dev); + bdev = bdget_disk(zram->disk, 0); + + /* Do not reset an active device! */ + if (bdev->bd_holders) + return -EBUSY; + + ret = kstrtou16(buf, 10, &do_reset); + if (ret) + return ret; + + if (!do_reset) + return -EINVAL; + + /* Make sure all pending I/O is finished */ + if (bdev) + fsync_bdev(bdev); + + zram_reset_device(zram); + return len; } static void __zram_make_request(struct zram *zram, struct bio *bio, int rw) @@ -400,30 +690,6 @@ out: bio_io_error(bio); } -/* - * Check if request is within bounds and aligned on zram logical blocks. - */ -static inline int valid_io_request(struct zram *zram, struct bio *bio) -{ - u64 start, end, bound; - - /* unaligned request */ - if (unlikely(bio->bi_sector & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1))) - return 0; - if (unlikely(bio->bi_size & (ZRAM_LOGICAL_BLOCK_SIZE - 1))) - return 0; - - start = bio->bi_sector; - end = start + (bio->bi_size >> SECTOR_SHIFT); - bound = zram->disksize >> SECTOR_SHIFT; - /* out of range range */ - if (unlikely(start >= bound || end > bound || start > end)) - return 0; - - /* I/O request is valid */ - return 1; -} - /* * Handler function for all zram I/O requests. */ @@ -450,122 +716,6 @@ error: bio_io_error(bio); } -static void __zram_reset_device(struct zram *zram) -{ - size_t index; - struct zram_meta *meta; - - if (!zram->init_done) - return; - - meta = zram->meta; - zram->init_done = 0; - - /* Free all pages that are still in this zram device */ - for (index = 0; index < zram->disksize >> PAGE_SHIFT; index++) { - unsigned long handle = meta->table[index].handle; - if (!handle) - continue; - - zs_free(meta->mem_pool, handle); - } - - zram_meta_free(zram->meta); - zram->meta = NULL; - /* Reset stats */ - memset(&zram->stats, 0, sizeof(zram->stats)); - - zram->disksize = 0; - set_capacity(zram->disk, 0); -} - -void zram_reset_device(struct zram *zram) -{ - down_write(&zram->init_lock); - __zram_reset_device(zram); - up_write(&zram->init_lock); -} - -void zram_meta_free(struct zram_meta *meta) -{ - zs_destroy_pool(meta->mem_pool); - kfree(meta->compress_workmem); - free_pages((unsigned long)meta->compress_buffer, 1); - vfree(meta->table); - kfree(meta); -} - -struct zram_meta *zram_meta_alloc(u64 disksize) -{ - size_t num_pages; - struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL); - if (!meta) - goto out; - - meta->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); - if (!meta->compress_workmem) - goto free_meta; - - meta->compress_buffer = - (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1); - if (!meta->compress_buffer) { - pr_err("Error allocating compressor buffer space\n"); - goto free_workmem; - } - - num_pages = disksize >> PAGE_SHIFT; - meta->table = vzalloc(num_pages * sizeof(*meta->table)); - if (!meta->table) { - pr_err("Error allocating zram address table\n"); - goto free_buffer; - } - - meta->mem_pool = zs_create_pool(GFP_NOIO | __GFP_HIGHMEM); - if (!meta->mem_pool) { - pr_err("Error creating memory pool\n"); - goto free_table; - } - - return meta; - -free_table: - vfree(meta->table); -free_buffer: - free_pages((unsigned long)meta->compress_buffer, 1); -free_workmem: - kfree(meta->compress_workmem); -free_meta: - kfree(meta); - meta = NULL; -out: - return meta; -} - -void zram_init_device(struct zram *zram, struct zram_meta *meta) -{ - if (zram->disksize > 2 * (totalram_pages << PAGE_SHIFT)) { - pr_info( - "There is little point creating a zram of greater than " - "twice the size of memory since we expect a 2:1 compression " - "ratio. Note that zram uses about 0.1%% of the size of " - "the disk when not in use so a huge zram is " - "wasteful.\n" - "\tMemory Size: %lu kB\n" - "\tSize you selected: %llu kB\n" - "Continuing anyway ...\n", - (totalram_pages << PAGE_SHIFT) >> 10, zram->disksize >> 10 - ); - } - - /* zram devices sort of resembles non-rotational disks */ - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); - - zram->meta = meta; - zram->init_done = 1; - - pr_debug("Initialization done!\n"); -} - static void zram_slot_free_notify(struct block_device *bdev, unsigned long index) { @@ -583,6 +733,38 @@ static const struct block_device_operations zram_devops = { .owner = THIS_MODULE }; +static DEVICE_ATTR(disksize, S_IRUGO | S_IWUSR, + disksize_show, disksize_store); +static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL); +static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store); +static DEVICE_ATTR(num_reads, S_IRUGO, num_reads_show, NULL); +static DEVICE_ATTR(num_writes, S_IRUGO, num_writes_show, NULL); +static DEVICE_ATTR(invalid_io, S_IRUGO, invalid_io_show, NULL); +static DEVICE_ATTR(notify_free, S_IRUGO, notify_free_show, NULL); +static DEVICE_ATTR(zero_pages, S_IRUGO, zero_pages_show, NULL); +static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL); +static DEVICE_ATTR(compr_data_size, S_IRUGO, compr_data_size_show, NULL); +static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL); + +static struct attribute *zram_disk_attrs[] = { + &dev_attr_disksize.attr, + &dev_attr_initstate.attr, + &dev_attr_reset.attr, + &dev_attr_num_reads.attr, + &dev_attr_num_writes.attr, + &dev_attr_invalid_io.attr, + &dev_attr_notify_free.attr, + &dev_attr_zero_pages.attr, + &dev_attr_orig_data_size.attr, + &dev_attr_compr_data_size.attr, + &dev_attr_mem_used_total.attr, + NULL, +}; + +static struct attribute_group zram_disk_attr_group = { + .attrs = zram_disk_attrs, +}; + static int create_device(struct zram *zram, int device_id) { int ret = -ENOMEM; @@ -728,12 +910,12 @@ static void __exit zram_exit(void) pr_debug("Cleanup done!\n"); } -module_param(num_devices, uint, 0); -MODULE_PARM_DESC(num_devices, "Number of zram devices"); - module_init(zram_init); module_exit(zram_exit); +module_param(num_devices, uint, 0); +MODULE_PARM_DESC(num_devices, "Number of zram devices"); + MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Nitin Gupta "); MODULE_DESCRIPTION("Compressed RAM Block Device"); diff --git a/drivers/staging/zram/zram_drv.h b/drivers/staging/zram/zram_drv.h index 11b09fc25953..9e57bfb29b4f 100644 --- a/drivers/staging/zram/zram_drv.h +++ b/drivers/staging/zram/zram_drv.h @@ -112,14 +112,4 @@ struct zram { struct zram_stats stats; }; - -#ifdef CONFIG_SYSFS -extern struct attribute_group zram_disk_attr_group; -#endif - -extern void zram_reset_device(struct zram *zram); -extern struct zram_meta *zram_meta_alloc(u64 disksize); -extern void zram_meta_free(struct zram_meta *meta); -extern void zram_init_device(struct zram *zram, struct zram_meta *meta); - #endif diff --git a/drivers/staging/zram/zram_sysfs.c b/drivers/staging/zram/zram_sysfs.c deleted file mode 100644 index 93a2f9cafd7c..000000000000 --- a/drivers/staging/zram/zram_sysfs.c +++ /dev/null @@ -1,209 +0,0 @@ -/* - * Compressed RAM block device - * - * Copyright (C) 2008, 2009, 2010 Nitin Gupta - * - * This code is released using a dual license strategy: BSD/GPL - * You can choose the licence that better fits your requirements. - * - * Released under the terms of 3-clause BSD License - * Released under the terms of GNU General Public License Version 2.0 - * - * Project home: http://compcache.googlecode.com/ - */ - -#include -#include -#include -#include - -#include "zram_drv.h" - -static inline struct zram *dev_to_zram(struct device *dev) -{ - return (struct zram *)dev_to_disk(dev)->private_data; -} - -static ssize_t disksize_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", zram->disksize); -} - -static ssize_t disksize_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) -{ - u64 disksize; - struct zram_meta *meta; - struct zram *zram = dev_to_zram(dev); - - disksize = memparse(buf, NULL); - if (!disksize) - return -EINVAL; - - disksize = PAGE_ALIGN(disksize); - meta = zram_meta_alloc(disksize); - down_write(&zram->init_lock); - if (zram->init_done) { - up_write(&zram->init_lock); - zram_meta_free(meta); - pr_info("Cannot change disksize for initialized device\n"); - return -EBUSY; - } - - zram->disksize = disksize; - set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); - zram_init_device(zram, meta); - up_write(&zram->init_lock); - - return len; -} - -static ssize_t initstate_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%u\n", zram->init_done); -} - -static ssize_t reset_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) -{ - int ret; - unsigned short do_reset; - struct zram *zram; - struct block_device *bdev; - - zram = dev_to_zram(dev); - bdev = bdget_disk(zram->disk, 0); - - /* Do not reset an active device! */ - if (bdev->bd_holders) - return -EBUSY; - - ret = kstrtou16(buf, 10, &do_reset); - if (ret) - return ret; - - if (!do_reset) - return -EINVAL; - - /* Make sure all pending I/O is finished */ - if (bdev) - fsync_bdev(bdev); - - zram_reset_device(zram); - return len; -} - -static ssize_t num_reads_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - (u64)atomic64_read(&zram->stats.num_reads)); -} - -static ssize_t num_writes_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - (u64)atomic64_read(&zram->stats.num_writes)); -} - -static ssize_t invalid_io_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - (u64)atomic64_read(&zram->stats.invalid_io)); -} - -static ssize_t notify_free_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - (u64)atomic64_read(&zram->stats.notify_free)); -} - -static ssize_t zero_pages_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%u\n", zram->stats.pages_zero); -} - -static ssize_t orig_data_size_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - (u64)(zram->stats.pages_stored) << PAGE_SHIFT); -} - -static ssize_t compr_data_size_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - (u64)atomic64_read(&zram->stats.compr_size)); -} - -static ssize_t mem_used_total_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - u64 val = 0; - struct zram *zram = dev_to_zram(dev); - struct zram_meta *meta = zram->meta; - - down_read(&zram->init_lock); - if (zram->init_done) - val = zs_get_total_size_bytes(meta->mem_pool); - up_read(&zram->init_lock); - - return sprintf(buf, "%llu\n", val); -} - -static DEVICE_ATTR(disksize, S_IRUGO | S_IWUSR, - disksize_show, disksize_store); -static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL); -static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store); -static DEVICE_ATTR(num_reads, S_IRUGO, num_reads_show, NULL); -static DEVICE_ATTR(num_writes, S_IRUGO, num_writes_show, NULL); -static DEVICE_ATTR(invalid_io, S_IRUGO, invalid_io_show, NULL); -static DEVICE_ATTR(notify_free, S_IRUGO, notify_free_show, NULL); -static DEVICE_ATTR(zero_pages, S_IRUGO, zero_pages_show, NULL); -static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL); -static DEVICE_ATTR(compr_data_size, S_IRUGO, compr_data_size_show, NULL); -static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL); - -static struct attribute *zram_disk_attrs[] = { - &dev_attr_disksize.attr, - &dev_attr_initstate.attr, - &dev_attr_reset.attr, - &dev_attr_num_reads.attr, - &dev_attr_num_writes.attr, - &dev_attr_invalid_io.attr, - &dev_attr_notify_free.attr, - &dev_attr_zero_pages.attr, - &dev_attr_orig_data_size.attr, - &dev_attr_compr_data_size.attr, - &dev_attr_mem_used_total.attr, - NULL, -}; - -struct attribute_group zram_disk_attr_group = { - .attrs = zram_disk_attrs, -}; -- cgit v1.2.3 From 687f091b39ef238b975b6ad4e4e282ee9aade598 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 26 Jun 2013 15:28:39 +0300 Subject: staging: zram: protect zram_reset_device() call Commit 9b3bb7abcdf2df0f1b2657e6cbc9d06bc2b3b36f (remove zram_sysfs file (v2)) accidentally made zram_reset_device() racy. Protect zram_reset_device() call with zram->lock. Signed-off-by: Sergey Senozhatsky Acked-by: Jerome Marchand Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 644d478793c6594277f8ae76954da4ace7ac6f96) Signed-off-by: Alex Shi --- drivers/staging/zram/zram_drv.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index 753877431b5f..c549e3940bcf 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -527,8 +527,11 @@ static void zram_reset_device(struct zram *zram) size_t index; struct zram_meta *meta; - if (!zram->init_done) + down_write(&zram->init_lock); + if (!zram->init_done) { + up_write(&zram->init_lock); return; + } meta = zram->meta; zram->init_done = 0; @@ -549,6 +552,7 @@ static void zram_reset_device(struct zram *zram) zram->disksize = 0; set_capacity(zram->disk, 0); + up_write(&zram->init_lock); } static void zram_init_device(struct zram *zram, struct zram_meta *meta) -- cgit v1.2.3 From 068e927e51265b16110951101506d112ef38fb36 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 12 Jul 2013 14:20:52 -0400 Subject: staging: zram: Add auto loading of module if user opens /dev/zram. Greg spotted that said driver is not subscribing to the automagic mechanism of auto-loading if a user tries to open /dev/zram. This fixes it. CC: Minchan Kim Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Greg Kroah-Hartman (cherry picked from commit c70bda992c12e593e411c02a52e4bd6985407539) Signed-off-by: Alex Shi --- drivers/staging/zram/zram_drv.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index c549e3940bcf..77f40a7a6726 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -923,3 +923,4 @@ MODULE_PARM_DESC(num_devices, "Number of zram devices"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Nitin Gupta "); MODULE_DESCRIPTION("Compressed RAM Block Device"); +MODULE_ALIAS("devname:zram"); -- cgit v1.2.3 From 1da6589d327fdfbb2c91551ea9047e598b4115f5 Mon Sep 17 00:00:00 2001 From: Sunghan Suh Date: Wed, 3 Jul 2013 20:10:05 +0900 Subject: zram: prevent data loss in error cases of function zram_bvec_write() In function zram_bvec_write(), previous data at the index is already freed by function zram_free_page(). When failed to compress or zs_malloc, there is no way to restore old data. Therefore, free previous data when it's about to update. Also, no need to check whether table is not empty outside of function zram_free_page(), because the function properly checks inside. Signed-off-by: Sunghan Suh Signed-off-by: Greg Kroah-Hartman (cherry picked from commit f40ac2ae1b506484dd9261a24bbf3e86b2206ff8) Signed-off-by: Alex Shi --- drivers/staging/zram/zram_drv.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index 77f40a7a6726..84df3999d6af 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -418,14 +418,6 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, goto out; } - /* - * System overwrites unused sectors. Free memory associated - * with this sector now. - */ - if (meta->table[index].handle || - zram_test_flag(meta, index, ZRAM_ZERO)) - zram_free_page(zram, index); - user_mem = kmap_atomic(page); if (is_partial_io(bvec)) { @@ -439,6 +431,9 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, if (page_zero_filled(uncmem)) { kunmap_atomic(user_mem); + /* Free memory associated with this sector now. */ + zram_free_page(zram, index); + zram->stats.pages_zero++; zram_set_flag(meta, index, ZRAM_ZERO); ret = 0; @@ -486,6 +481,12 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, zs_unmap_object(meta->mem_pool, handle); + /* + * Free memory associated with this sector + * before overwriting unused sectors. + */ + zram_free_page(zram, index); + meta->table[index].handle = handle; meta->table[index].size = clen; -- cgit v1.2.3 From b237ffc2780c1c6d082930835e383143a51ea405 Mon Sep 17 00:00:00 2001 From: Kumar Gaurav Date: Thu, 8 Aug 2013 23:53:24 +0530 Subject: Staging: zram: zram_drv.c: Fixed Error of trailing whitespace Fixed by removing trailing whitespace Signed-off-by: Kumar Gaurav Signed-off-by: Greg Kroah-Hartman (cherry picked from commit a539c72a195c081d950475c2945cb82d80be9b66) Signed-off-by: Alex Shi --- drivers/staging/zram/zram_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index 84df3999d6af..35d536a11395 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -169,7 +169,7 @@ static inline int is_partial_io(struct bio_vec *bvec) static inline int valid_io_request(struct zram *zram, struct bio *bio) { u64 start, end, bound; - + /* unaligned request */ if (unlikely(bio->bi_sector & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1))) return 0; -- cgit v1.2.3 From 5fc58bd448ae1b2bafd83fcb5e2c0d65fcee2c37 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Mon, 12 Aug 2013 15:13:55 +0900 Subject: zram: fix invalid memory access [1] tried to fix invalid memory access on zram->disk but it didn't fix properly because get_disk failed during module exit path. Actually, we don't need to reset zram->disk's capacity to zero in module exit path so that this patch introduces new argument "reset_capacity" on zram_reset_divice and it only reset it when reset_store is called. [1] 6030ea9b, zram: avoid invalid memory access in zram_exit() Cc: Nitin Gupta Cc: Jiang Liu Cc: stable@vger.kernel.org Signed-off-by: Minchan Kim Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 2b86ab9cc29fcd435cde9378c3b9ffe8b5c76128) Signed-off-by: Alex Shi --- drivers/staging/zram/zram_drv.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index 35d536a11395..255d512763f2 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -523,7 +523,7 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, return ret; } -static void zram_reset_device(struct zram *zram) +static void zram_reset_device(struct zram *zram, bool reset_capacity) { size_t index; struct zram_meta *meta; @@ -552,7 +552,8 @@ static void zram_reset_device(struct zram *zram) memset(&zram->stats, 0, sizeof(zram->stats)); zram->disksize = 0; - set_capacity(zram->disk, 0); + if (reset_capacity) + set_capacity(zram->disk, 0); up_write(&zram->init_lock); } @@ -636,7 +637,7 @@ static ssize_t reset_store(struct device *dev, if (bdev) fsync_bdev(bdev); - zram_reset_device(zram); + zram_reset_device(zram, true); return len; } @@ -903,10 +904,12 @@ static void __exit zram_exit(void) for (i = 0; i < num_devices; i++) { zram = &zram_devices[i]; - get_disk(zram->disk); destroy_device(zram); - zram_reset_device(zram); - put_disk(zram->disk); + /* + * Shouldn't access zram->disk after destroy_device + * because destroy_device already released zram->disk. + */ + zram_reset_device(zram, false); } unregister_blkdev(zram_major, "zram"); -- cgit v1.2.3 From 92fe27ba3d29df5971bec2f2b55ff350194772a4 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Mon, 12 Aug 2013 15:13:56 +0900 Subject: zram: don't grab mutex in zram_slot_free_noity [1] introduced down_write in zram_slot_free_notify to prevent race between zram_slot_free_notify and zram_bvec_[read|write]. The race could happen if somebody who has right permission to open swap device is reading swap device while it is used by swap in parallel. However, zram_slot_free_notify is called with holding spin_lock of swap layer so we shouldn't avoid holing mutex. Otherwise, lockdep warns it. This patch adds new list to handle free slot and workqueue so zram_slot_free_notify just registers slot index to be freed and registers the request to workqueue. If workqueue is expired, it holds mutex_lock so there is no problem any more. If any I/O is issued, zram handles pending slot-free request caused by zram_slot_free_notify right before handling issued request because workqueue wouldn't be expired yet so zram I/O request handling function can miss it. Lastly, when zram is reset, flush_work could handle all of pending free request so we shouldn't have memory leak. NOTE: If zram_slot_free_notify's kmalloc with GFP_ATOMIC would be failed, the slot will be freed when next write I/O write the slot. [1] [57ab0485, zram: use zram->lock to protect zram_free_page() in swap free notify path] * from v2 * refactoring * from v1 * totally redesign Cc: Nitin Gupta Cc: Jiang Liu Cc: stable@vger.kernel.org Signed-off-by: Minchan Kim Signed-off-by: Greg Kroah-Hartman (cherry picked from commit a0c516cbfc7452c8cbd564525fef66d9f20b46d1) Signed-off-by: Alex Shi --- drivers/staging/zram/zram_drv.c | 60 ++++++++++++++++++++++++++++++++++++++--- drivers/staging/zram/zram_drv.h | 10 +++++++ 2 files changed, 67 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index 255d512763f2..3d08ff11e700 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -440,6 +440,14 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, goto out; } + /* + * zram_slot_free_notify could miss free so that let's + * double check. + */ + if (unlikely(meta->table[index].handle || + zram_test_flag(meta, index, ZRAM_ZERO))) + zram_free_page(zram, index); + ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen, meta->compress_workmem); @@ -505,6 +513,20 @@ out: return ret; } +static void handle_pending_slot_free(struct zram *zram) +{ + struct zram_slot_free *free_rq; + + spin_lock(&zram->slot_free_lock); + while (zram->slot_free_rq) { + free_rq = zram->slot_free_rq; + zram->slot_free_rq = free_rq->next; + zram_free_page(zram, free_rq->index); + kfree(free_rq); + } + spin_unlock(&zram->slot_free_lock); +} + static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, int offset, struct bio *bio, int rw) { @@ -512,10 +534,12 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, if (rw == READ) { down_read(&zram->lock); + handle_pending_slot_free(zram); ret = zram_bvec_read(zram, bvec, index, offset, bio); up_read(&zram->lock); } else { down_write(&zram->lock); + handle_pending_slot_free(zram); ret = zram_bvec_write(zram, bvec, index, offset); up_write(&zram->lock); } @@ -528,6 +552,8 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) size_t index; struct zram_meta *meta; + flush_work(&zram->free_work); + down_write(&zram->init_lock); if (!zram->init_done) { up_write(&zram->init_lock); @@ -722,16 +748,40 @@ error: bio_io_error(bio); } +static void zram_slot_free(struct work_struct *work) +{ + struct zram *zram; + + zram = container_of(work, struct zram, free_work); + down_write(&zram->lock); + handle_pending_slot_free(zram); + up_write(&zram->lock); +} + +static void add_slot_free(struct zram *zram, struct zram_slot_free *free_rq) +{ + spin_lock(&zram->slot_free_lock); + free_rq->next = zram->slot_free_rq; + zram->slot_free_rq = free_rq; + spin_unlock(&zram->slot_free_lock); +} + static void zram_slot_free_notify(struct block_device *bdev, unsigned long index) { struct zram *zram; + struct zram_slot_free *free_rq; zram = bdev->bd_disk->private_data; - down_write(&zram->lock); - zram_free_page(zram, index); - up_write(&zram->lock); atomic64_inc(&zram->stats.notify_free); + + free_rq = kmalloc(sizeof(struct zram_slot_free), GFP_ATOMIC); + if (!free_rq) + return; + + free_rq->index = index; + add_slot_free(zram, free_rq); + schedule_work(&zram->free_work); } static const struct block_device_operations zram_devops = { @@ -778,6 +828,10 @@ static int create_device(struct zram *zram, int device_id) init_rwsem(&zram->lock); init_rwsem(&zram->init_lock); + INIT_WORK(&zram->free_work, zram_slot_free); + spin_lock_init(&zram->slot_free_lock); + zram->slot_free_rq = NULL; + zram->queue = blk_alloc_queue(GFP_KERNEL); if (!zram->queue) { pr_err("Error allocating disk queue for device %d\n", diff --git a/drivers/staging/zram/zram_drv.h b/drivers/staging/zram/zram_drv.h index 9e57bfb29b4f..97a3acf6ab76 100644 --- a/drivers/staging/zram/zram_drv.h +++ b/drivers/staging/zram/zram_drv.h @@ -94,11 +94,20 @@ struct zram_meta { struct zs_pool *mem_pool; }; +struct zram_slot_free { + unsigned long index; + struct zram_slot_free *next; +}; + struct zram { struct zram_meta *meta; struct rw_semaphore lock; /* protect compression buffers, table, * 32bit stat counters against concurrent * notifications, reads and writes */ + + struct work_struct free_work; /* handle pending free request */ + struct zram_slot_free *slot_free_rq; /* list head of free request */ + struct request_queue *queue; struct gendisk *disk; int init_done; @@ -109,6 +118,7 @@ struct zram { * we can store in a disk. */ u64 disksize; /* bytes */ + spinlock_t slot_free_lock; struct zram_stats stats; }; -- cgit v1.2.3 From e2671233fa7df020b7ea720db33f1237c418f3ab Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 12 Sep 2013 15:41:31 -0700 Subject: Revert "staging: zram: Add auto loading of module if user opens /dev/zram." This reverts commit c70bda992c12e593e411c02a52e4bd6985407539. It's incorrect, Kay writes: Please just remove it. "devname" is meant to be used for single-instance devices with a static dev_t, never for things like zramX. It will not do anything useful here, it does nothing really without a statically assigned dev_t, and it should not be used for devices of this kind anyway. Reported-by: Tom Gundersen Reported-by: Kay Sievers Cc: Minchan Kim Cc: Konrad Rzeszutek Wilk Signed-off-by: Greg Kroah-Hartman (cherry picked from commit f0f65a95de2840db3fa61c953dca267e7b773168) Signed-off-by: Alex Shi --- drivers/staging/zram/zram_drv.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers') diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index 3d08ff11e700..7d8ff31f67f2 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -981,4 +981,3 @@ MODULE_PARM_DESC(num_devices, "Number of zram devices"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Nitin Gupta "); MODULE_DESCRIPTION("Compressed RAM Block Device"); -MODULE_ALIAS("devname:zram"); -- cgit v1.2.3 From 922959916229ecc5f143b0fe66a9a8bbf4b55169 Mon Sep 17 00:00:00 2001 From: Rashika Kheria Date: Wed, 30 Oct 2013 18:43:32 +0530 Subject: Staging: zram: Fix variable dereferenced before check This patch fixes the following Smatch warning in zram_drv.c- drivers/staging/zram/zram_drv.c:899 destroy_device() warn: variable dereferenced before check 'zram->disk' (see line 896) Acked-by: Minchan Kim Acked-by: Jerome Marchand Signed-off-by: Rashika Kheria Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 59d3fe540454dd8fc48d4eda44e200f9c98bef10) Signed-off-by: Alex Shi --- drivers/staging/zram/zram_drv.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index 7d8ff31f67f2..a125cfae6942 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -896,13 +896,10 @@ static void destroy_device(struct zram *zram) sysfs_remove_group(&disk_to_dev(zram->disk)->kobj, &zram_disk_attr_group); - if (zram->disk) { - del_gendisk(zram->disk); - put_disk(zram->disk); - } + del_gendisk(zram->disk); + put_disk(zram->disk); - if (zram->queue) - blk_cleanup_queue(zram->queue); + blk_cleanup_queue(zram->queue); } static int __init zram_init(void) -- cgit v1.2.3 From 3c073fe1e7f533e087a8e2faaff31f8b02e6aed9 Mon Sep 17 00:00:00 2001 From: Rashika Kheria Date: Wed, 30 Oct 2013 18:36:32 +0530 Subject: Staging: zram: Fix access of NULL pointer This patch fixes the bug in reset_store caused by accessing NULL pointer. The bdev gets its value from bdget_disk() which could fail when memory pressure is severe and hence can return NULL because allocation of inode in bdget could fail. Hence, this patch introduces a check for bdev to prevent reference to a NULL pointer in the later part of the code. It also removes unnecessary check of bdev for fsync_bdev(). Cc: stable Acked-by: Jerome Marchand Signed-off-by: Rashika Kheria Acked-by: Minchan Kim Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 46a51c80216cb891f271ad021f59009f34677499) Signed-off-by: Alex Shi --- drivers/staging/zram/zram_drv.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index a125cfae6942..206f59d9a7a8 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -648,6 +648,9 @@ static ssize_t reset_store(struct device *dev, zram = dev_to_zram(dev); bdev = bdget_disk(zram->disk, 0); + if (!bdev) + return -ENOMEM; + /* Do not reset an active device! */ if (bdev->bd_holders) return -EBUSY; @@ -660,8 +663,7 @@ static ssize_t reset_store(struct device *dev, return -EINVAL; /* Make sure all pending I/O is finished */ - if (bdev) - fsync_bdev(bdev); + fsync_bdev(bdev); zram_reset_device(zram, true); return len; -- cgit v1.2.3 From 2b299eb831324558d68be874fa1c9d65a8cfe5f3 Mon Sep 17 00:00:00 2001 From: Rashika Kheria Date: Sun, 10 Nov 2013 22:13:53 +0530 Subject: Staging: zram: Fix memory leak by refcount mismatch As suggested by Minchan Kim and Jerome Marchand "The code in reset_store get the block device (bdget_disk()) but it does not put it (bdput()) when it's done using it. The usage count is therefore incremented but never decremented." This patch also puts bdput() for all error cases. Acked-by: Minchan Kim Acked-by: Jerome Marchand Cc: stable@vger.kernel.org Signed-off-by: Rashika Kheria Signed-off-by: Greg Kroah-Hartman (cherry picked from commit 1b672224d128ec2570eb37572ff803cfe452b4f7) Signed-off-by: Alex Shi --- drivers/staging/zram/zram_drv.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c index 206f59d9a7a8..689ebf105acd 100644 --- a/drivers/staging/zram/zram_drv.c +++ b/drivers/staging/zram/zram_drv.c @@ -652,21 +652,30 @@ static ssize_t reset_store(struct device *dev, return -ENOMEM; /* Do not reset an active device! */ - if (bdev->bd_holders) - return -EBUSY; + if (bdev->bd_holders) { + ret = -EBUSY; + goto out; + } ret = kstrtou16(buf, 10, &do_reset); if (ret) - return ret; + goto out; - if (!do_reset) - return -EINVAL; + if (!do_reset) { + ret = -EINVAL; + goto out; + } /* Make sure all pending I/O is finished */ fsync_bdev(bdev); + bdput(bdev); zram_reset_device(zram, true); return len; + +out: + bdput(bdev); + return ret; } static void __zram_make_request(struct zram *zram, struct bio *bio, int rw) -- cgit v1.2.3 From 03dc2ac5b10ee9ab68090a486d54a7d53492c86d Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 30 Jan 2014 15:45:50 -0800 Subject: zsmalloc: move it under mm This patch moves zsmalloc under mm directory. Before that, description will explain why we have needed custom allocator. Zsmalloc is a new slab-based memory allocator for storing compressed pages. It is designed for low fragmentation and high allocation success rate on large object, but <= PAGE_SIZE allocations. zsmalloc differs from the kernel slab allocator in two primary ways to achieve these design goals. zsmalloc never requires high order page allocations to back slabs, or "size classes" in zsmalloc terms. Instead it allows multiple single-order pages to be stitched together into a "zspage" which backs the slab. This allows for higher allocation success rate under memory pressure. Also, zsmalloc allows objects to span page boundaries within the zspage. This allows for lower fragmentation than could be had with the kernel slab allocator for objects between PAGE_SIZE/2 and PAGE_SIZE. With the kernel slab allocator, if a page compresses to 60% of it original size, the memory savings gained through compression is lost in fragmentation because another object of the same size can't be stored in the leftover space. This ability to span pages results in zsmalloc allocations not being directly addressable by the user. The user is given an non-dereferencable handle in response to an allocation request. That handle must be mapped, using zs_map_object(), which returns a pointer to the mapped region that can be used. The mapping is necessary since the object data may reside in two different noncontigious pages. The zsmalloc fulfills the allocation needs for zram perfectly [sjenning@linux.vnet.ibm.com: borrow Seth's quote] Signed-off-by: Minchan Kim Acked-by: Nitin Gupta Reviewed-by: Konrad Rzeszutek Wilk Cc: Bob Liu Cc: Greg Kroah-Hartman Cc: Hugh Dickins Cc: Jens Axboe Cc: Luigi Semenzato Cc: Mel Gorman Cc: Pekka Enberg Cc: Rik van Riel Cc: Seth Jennings Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit bcf1647d0899666f0fb90d176abf63bae22abb7c) Signed-off-by: Alex Shi Conflicts: drivers/staging/zsmalloc/Kconfig mm/Kconfig mm/Makefile Conflicts solutions: only move zsmalloc to mm/, skip unrelated cma/zbud/zswap --- drivers/staging/Kconfig | 2 - drivers/staging/Makefile | 1 - drivers/staging/zram/zram_drv.h | 3 +- drivers/staging/zsmalloc/Makefile | 3 - drivers/staging/zsmalloc/zsmalloc-main.c | 1073 ------------------------------ drivers/staging/zsmalloc/zsmalloc.h | 43 -- 6 files changed, 1 insertion(+), 1124 deletions(-) delete mode 100644 drivers/staging/zsmalloc/Makefile delete mode 100644 drivers/staging/zsmalloc/zsmalloc-main.c delete mode 100644 drivers/staging/zsmalloc/zsmalloc.h (limited to 'drivers') diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig index aefe820a8005..60585217481f 100644 --- a/drivers/staging/Kconfig +++ b/drivers/staging/Kconfig @@ -72,8 +72,6 @@ source "drivers/staging/sep/Kconfig" source "drivers/staging/iio/Kconfig" -source "drivers/staging/zsmalloc/Kconfig" - source "drivers/staging/zram/Kconfig" source "drivers/staging/wlags49_h2/Kconfig" diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile index 415772ea306d..29aaeaa283eb 100644 --- a/drivers/staging/Makefile +++ b/drivers/staging/Makefile @@ -31,7 +31,6 @@ obj-$(CONFIG_VME_BUS) += vme/ obj-$(CONFIG_DX_SEP) += sep/ obj-$(CONFIG_IIO) += iio/ obj-$(CONFIG_ZRAM) += zram/ -obj-$(CONFIG_ZSMALLOC) += zsmalloc/ obj-$(CONFIG_WLAGS49_H2) += wlags49_h2/ obj-$(CONFIG_WLAGS49_H25) += wlags49_h25/ obj-$(CONFIG_FB_SM7XX) += sm7xxfb/ diff --git a/drivers/staging/zram/zram_drv.h b/drivers/staging/zram/zram_drv.h index 97a3acf6ab76..d8f6596513c3 100644 --- a/drivers/staging/zram/zram_drv.h +++ b/drivers/staging/zram/zram_drv.h @@ -17,8 +17,7 @@ #include #include - -#include "../zsmalloc/zsmalloc.h" +#include /* * Some arbitrary value. This is just to catch diff --git a/drivers/staging/zsmalloc/Makefile b/drivers/staging/zsmalloc/Makefile deleted file mode 100644 index b134848a590d..000000000000 --- a/drivers/staging/zsmalloc/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -zsmalloc-y := zsmalloc-main.o - -obj-$(CONFIG_ZSMALLOC) += zsmalloc.o diff --git a/drivers/staging/zsmalloc/zsmalloc-main.c b/drivers/staging/zsmalloc/zsmalloc-main.c deleted file mode 100644 index 288f58252a18..000000000000 --- a/drivers/staging/zsmalloc/zsmalloc-main.c +++ /dev/null @@ -1,1073 +0,0 @@ -/* - * zsmalloc memory allocator - * - * Copyright (C) 2011 Nitin Gupta - * - * This code is released using a dual license strategy: BSD/GPL - * You can choose the license that better fits your requirements. - * - * Released under the terms of 3-clause BSD License - * Released under the terms of GNU General Public License Version 2.0 - */ - - -/* - * This allocator is designed for use with zcache and zram. Thus, the - * allocator is supposed to work well under low memory conditions. In - * particular, it never attempts higher order page allocation which is - * very likely to fail under memory pressure. On the other hand, if we - * just use single (0-order) pages, it would suffer from very high - * fragmentation -- any object of size PAGE_SIZE/2 or larger would occupy - * an entire page. This was one of the major issues with its predecessor - * (xvmalloc). - * - * To overcome these issues, zsmalloc allocates a bunch of 0-order pages - * and links them together using various 'struct page' fields. These linked - * pages act as a single higher-order page i.e. an object can span 0-order - * page boundaries. The code refers to these linked pages as a single entity - * called zspage. - * - * Following is how we use various fields and flags of underlying - * struct page(s) to form a zspage. - * - * Usage of struct page fields: - * page->first_page: points to the first component (0-order) page - * page->index (union with page->freelist): offset of the first object - * starting in this page. For the first page, this is - * always 0, so we use this field (aka freelist) to point - * to the first free object in zspage. - * page->lru: links together all component pages (except the first page) - * of a zspage - * - * For _first_ page only: - * - * page->private (union with page->first_page): refers to the - * component page after the first page - * page->freelist: points to the first free object in zspage. - * Free objects are linked together using in-place - * metadata. - * page->objects: maximum number of objects we can store in this - * zspage (class->zspage_order * PAGE_SIZE / class->size) - * page->lru: links together first pages of various zspages. - * Basically forming list of zspages in a fullness group. - * page->mapping: class index and fullness group of the zspage - * - * Usage of struct page flags: - * PG_private: identifies the first component page - * PG_private2: identifies the last component page - * - */ - -#ifdef CONFIG_ZSMALLOC_DEBUG -#define DEBUG -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "zsmalloc.h" - -/* - * This must be power of 2 and greater than of equal to sizeof(link_free). - * These two conditions ensure that any 'struct link_free' itself doesn't - * span more than 1 page which avoids complex case of mapping 2 pages simply - * to restore link_free pointer values. - */ -#define ZS_ALIGN 8 - -/* - * A single 'zspage' is composed of up to 2^N discontiguous 0-order (single) - * pages. ZS_MAX_ZSPAGE_ORDER defines upper limit on N. - */ -#define ZS_MAX_ZSPAGE_ORDER 2 -#define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER) - -/* - * Object location (, ) is encoded as - * as single (void *) handle value. - * - * Note that object index is relative to system - * page it is stored in, so for each sub-page belonging - * to a zspage, obj_idx starts with 0. - * - * This is made more complicated by various memory models and PAE. - */ - -#ifndef MAX_PHYSMEM_BITS -#ifdef CONFIG_HIGHMEM64G -#define MAX_PHYSMEM_BITS 36 -#else /* !CONFIG_HIGHMEM64G */ -/* - * If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just - * be PAGE_SHIFT - */ -#define MAX_PHYSMEM_BITS BITS_PER_LONG -#endif -#endif -#define _PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT) -#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS) -#define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1) - -#define MAX(a, b) ((a) >= (b) ? (a) : (b)) -/* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */ -#define ZS_MIN_ALLOC_SIZE \ - MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS)) -#define ZS_MAX_ALLOC_SIZE PAGE_SIZE - -/* - * On systems with 4K page size, this gives 254 size classes! There is a - * trader-off here: - * - Large number of size classes is potentially wasteful as free page are - * spread across these classes - * - Small number of size classes causes large internal fragmentation - * - Probably its better to use specific size classes (empirically - * determined). NOTE: all those class sizes must be set as multiple of - * ZS_ALIGN to make sure link_free itself never has to span 2 pages. - * - * ZS_MIN_ALLOC_SIZE and ZS_SIZE_CLASS_DELTA must be multiple of ZS_ALIGN - * (reason above) - */ -#define ZS_SIZE_CLASS_DELTA (PAGE_SIZE >> 8) -#define ZS_SIZE_CLASSES ((ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) / \ - ZS_SIZE_CLASS_DELTA + 1) - -/* - * We do not maintain any list for completely empty or full pages - */ -enum fullness_group { - ZS_ALMOST_FULL, - ZS_ALMOST_EMPTY, - _ZS_NR_FULLNESS_GROUPS, - - ZS_EMPTY, - ZS_FULL -}; - -/* - * We assign a page to ZS_ALMOST_EMPTY fullness group when: - * n <= N / f, where - * n = number of allocated objects - * N = total number of objects zspage can store - * f = 1/fullness_threshold_frac - * - * Similarly, we assign zspage to: - * ZS_ALMOST_FULL when n > N / f - * ZS_EMPTY when n == 0 - * ZS_FULL when n == N - * - * (see: fix_fullness_group()) - */ -static const int fullness_threshold_frac = 4; - -struct size_class { - /* - * Size of objects stored in this class. Must be multiple - * of ZS_ALIGN. - */ - int size; - unsigned int index; - - /* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */ - int pages_per_zspage; - - spinlock_t lock; - - /* stats */ - u64 pages_allocated; - - struct page *fullness_list[_ZS_NR_FULLNESS_GROUPS]; -}; - -/* - * Placed within free objects to form a singly linked list. - * For every zspage, first_page->freelist gives head of this list. - * - * This must be power of 2 and less than or equal to ZS_ALIGN - */ -struct link_free { - /* Handle of next free chunk (encodes ) */ - void *next; -}; - -struct zs_pool { - struct size_class size_class[ZS_SIZE_CLASSES]; - - gfp_t flags; /* allocation flags used when growing pool */ -}; - -/* - * A zspage's class index and fullness group - * are encoded in its (first)page->mapping - */ -#define CLASS_IDX_BITS 28 -#define FULLNESS_BITS 4 -#define CLASS_IDX_MASK ((1 << CLASS_IDX_BITS) - 1) -#define FULLNESS_MASK ((1 << FULLNESS_BITS) - 1) - -/* - * By default, zsmalloc uses a copy-based object mapping method to access - * allocations that span two pages. However, if a particular architecture - * performs VM mapping faster than copying, then it should be added here - * so that USE_PGTABLE_MAPPING is defined. This causes zsmalloc to use - * page table mapping rather than copying for object mapping. -*/ -#if defined(CONFIG_ARM) && !defined(MODULE) -#define USE_PGTABLE_MAPPING -#endif - -struct mapping_area { -#ifdef USE_PGTABLE_MAPPING - struct vm_struct *vm; /* vm area for mapping object that span pages */ -#else - char *vm_buf; /* copy buffer for objects that span pages */ -#endif - char *vm_addr; /* address of kmap_atomic()'ed pages */ - enum zs_mapmode vm_mm; /* mapping mode */ -}; - - -/* per-cpu VM mapping areas for zspage accesses that cross page boundaries */ -static DEFINE_PER_CPU(struct mapping_area, zs_map_area); - -static int is_first_page(struct page *page) -{ - return PagePrivate(page); -} - -static int is_last_page(struct page *page) -{ - return PagePrivate2(page); -} - -static void get_zspage_mapping(struct page *page, unsigned int *class_idx, - enum fullness_group *fullness) -{ - unsigned long m; - BUG_ON(!is_first_page(page)); - - m = (unsigned long)page->mapping; - *fullness = m & FULLNESS_MASK; - *class_idx = (m >> FULLNESS_BITS) & CLASS_IDX_MASK; -} - -static void set_zspage_mapping(struct page *page, unsigned int class_idx, - enum fullness_group fullness) -{ - unsigned long m; - BUG_ON(!is_first_page(page)); - - m = ((class_idx & CLASS_IDX_MASK) << FULLNESS_BITS) | - (fullness & FULLNESS_MASK); - page->mapping = (struct address_space *)m; -} - -static int get_size_class_index(int size) -{ - int idx = 0; - - if (likely(size > ZS_MIN_ALLOC_SIZE)) - idx = DIV_ROUND_UP(size - ZS_MIN_ALLOC_SIZE, - ZS_SIZE_CLASS_DELTA); - - return idx; -} - -static enum fullness_group get_fullness_group(struct page *page) -{ - int inuse, max_objects; - enum fullness_group fg; - BUG_ON(!is_first_page(page)); - - inuse = page->inuse; - max_objects = page->objects; - - if (inuse == 0) - fg = ZS_EMPTY; - else if (inuse == max_objects) - fg = ZS_FULL; - else if (inuse <= max_objects / fullness_threshold_frac) - fg = ZS_ALMOST_EMPTY; - else - fg = ZS_ALMOST_FULL; - - return fg; -} - -static void insert_zspage(struct page *page, struct size_class *class, - enum fullness_group fullness) -{ - struct page **head; - - BUG_ON(!is_first_page(page)); - - if (fullness >= _ZS_NR_FULLNESS_GROUPS) - return; - - head = &class->fullness_list[fullness]; - if (*head) - list_add_tail(&page->lru, &(*head)->lru); - - *head = page; -} - -static void remove_zspage(struct page *page, struct size_class *class, - enum fullness_group fullness) -{ - struct page **head; - - BUG_ON(!is_first_page(page)); - - if (fullness >= _ZS_NR_FULLNESS_GROUPS) - return; - - head = &class->fullness_list[fullness]; - BUG_ON(!*head); - if (list_empty(&(*head)->lru)) - *head = NULL; - else if (*head == page) - *head = (struct page *)list_entry((*head)->lru.next, - struct page, lru); - - list_del_init(&page->lru); -} - -static enum fullness_group fix_fullness_group(struct zs_pool *pool, - struct page *page) -{ - int class_idx; - struct size_class *class; - enum fullness_group currfg, newfg; - - BUG_ON(!is_first_page(page)); - - get_zspage_mapping(page, &class_idx, &currfg); - newfg = get_fullness_group(page); - if (newfg == currfg) - goto out; - - class = &pool->size_class[class_idx]; - remove_zspage(page, class, currfg); - insert_zspage(page, class, newfg); - set_zspage_mapping(page, class_idx, newfg); - -out: - return newfg; -} - -/* - * We have to decide on how many pages to link together - * to form a zspage for each size class. This is important - * to reduce wastage due to unusable space left at end of - * each zspage which is given as: - * wastage = Zp - Zp % size_class - * where Zp = zspage size = k * PAGE_SIZE where k = 1, 2, ... - * - * For example, for size class of 3/8 * PAGE_SIZE, we should - * link together 3 PAGE_SIZE sized pages to form a zspage - * since then we can perfectly fit in 8 such objects. - */ -static int get_pages_per_zspage(int class_size) -{ - int i, max_usedpc = 0; - /* zspage order which gives maximum used size per KB */ - int max_usedpc_order = 1; - - for (i = 1; i <= ZS_MAX_PAGES_PER_ZSPAGE; i++) { - int zspage_size; - int waste, usedpc; - - zspage_size = i * PAGE_SIZE; - waste = zspage_size % class_size; - usedpc = (zspage_size - waste) * 100 / zspage_size; - - if (usedpc > max_usedpc) { - max_usedpc = usedpc; - max_usedpc_order = i; - } - } - - return max_usedpc_order; -} - -/* - * A single 'zspage' is composed of many system pages which are - * linked together using fields in struct page. This function finds - * the first/head page, given any component page of a zspage. - */ -static struct page *get_first_page(struct page *page) -{ - if (is_first_page(page)) - return page; - else - return page->first_page; -} - -static struct page *get_next_page(struct page *page) -{ - struct page *next; - - if (is_last_page(page)) - next = NULL; - else if (is_first_page(page)) - next = (struct page *)page->private; - else - next = list_entry(page->lru.next, struct page, lru); - - return next; -} - -/* - * Encode as a single handle value. - * On hardware platforms with physical memory starting at 0x0 the pfn - * could be 0 so we ensure that the handle will never be 0 by adjusting the - * encoded obj_idx value before encoding. - */ -static void *obj_location_to_handle(struct page *page, unsigned long obj_idx) -{ - unsigned long handle; - - if (!page) { - BUG_ON(obj_idx); - return NULL; - } - - handle = page_to_pfn(page) << OBJ_INDEX_BITS; - handle |= ((obj_idx + 1) & OBJ_INDEX_MASK); - - return (void *)handle; -} - -/* - * Decode pair from the given object handle. We adjust the - * decoded obj_idx back to its original value since it was adjusted in - * obj_location_to_handle(). - */ -static void obj_handle_to_location(unsigned long handle, struct page **page, - unsigned long *obj_idx) -{ - *page = pfn_to_page(handle >> OBJ_INDEX_BITS); - *obj_idx = (handle & OBJ_INDEX_MASK) - 1; -} - -static unsigned long obj_idx_to_offset(struct page *page, - unsigned long obj_idx, int class_size) -{ - unsigned long off = 0; - - if (!is_first_page(page)) - off = page->index; - - return off + obj_idx * class_size; -} - -static void reset_page(struct page *page) -{ - clear_bit(PG_private, &page->flags); - clear_bit(PG_private_2, &page->flags); - set_page_private(page, 0); - page->mapping = NULL; - page->freelist = NULL; - page_mapcount_reset(page); -} - -static void free_zspage(struct page *first_page) -{ - struct page *nextp, *tmp, *head_extra; - - BUG_ON(!is_first_page(first_page)); - BUG_ON(first_page->inuse); - - head_extra = (struct page *)page_private(first_page); - - reset_page(first_page); - __free_page(first_page); - - /* zspage with only 1 system page */ - if (!head_extra) - return; - - list_for_each_entry_safe(nextp, tmp, &head_extra->lru, lru) { - list_del(&nextp->lru); - reset_page(nextp); - __free_page(nextp); - } - reset_page(head_extra); - __free_page(head_extra); -} - -/* Initialize a newly allocated zspage */ -static void init_zspage(struct page *first_page, struct size_class *class) -{ - unsigned long off = 0; - struct page *page = first_page; - - BUG_ON(!is_first_page(first_page)); - while (page) { - struct page *next_page; - struct link_free *link; - unsigned int i, objs_on_page; - - /* - * page->index stores offset of first object starting - * in the page. For the first page, this is always 0, - * so we use first_page->index (aka ->freelist) to store - * head of corresponding zspage's freelist. - */ - if (page != first_page) - page->index = off; - - link = (struct link_free *)kmap_atomic(page) + - off / sizeof(*link); - objs_on_page = (PAGE_SIZE - off) / class->size; - - for (i = 1; i <= objs_on_page; i++) { - off += class->size; - if (off < PAGE_SIZE) { - link->next = obj_location_to_handle(page, i); - link += class->size / sizeof(*link); - } - } - - /* - * We now come to the last (full or partial) object on this - * page, which must point to the first object on the next - * page (if present) - */ - next_page = get_next_page(page); - link->next = obj_location_to_handle(next_page, 0); - kunmap_atomic(link); - page = next_page; - off = (off + class->size) % PAGE_SIZE; - } -} - -/* - * Allocate a zspage for the given size class - */ -static struct page *alloc_zspage(struct size_class *class, gfp_t flags) -{ - int i, error; - struct page *first_page = NULL, *uninitialized_var(prev_page); - - /* - * Allocate individual pages and link them together as: - * 1. first page->private = first sub-page - * 2. all sub-pages are linked together using page->lru - * 3. each sub-page is linked to the first page using page->first_page - * - * For each size class, First/Head pages are linked together using - * page->lru. Also, we set PG_private to identify the first page - * (i.e. no other sub-page has this flag set) and PG_private_2 to - * identify the last page. - */ - error = -ENOMEM; - for (i = 0; i < class->pages_per_zspage; i++) { - struct page *page; - - page = alloc_page(flags); - if (!page) - goto cleanup; - - INIT_LIST_HEAD(&page->lru); - if (i == 0) { /* first page */ - SetPagePrivate(page); - set_page_private(page, 0); - first_page = page; - first_page->inuse = 0; - } - if (i == 1) - first_page->private = (unsigned long)page; - if (i >= 1) - page->first_page = first_page; - if (i >= 2) - list_add(&page->lru, &prev_page->lru); - if (i == class->pages_per_zspage - 1) /* last page */ - SetPagePrivate2(page); - prev_page = page; - } - - init_zspage(first_page, class); - - first_page->freelist = obj_location_to_handle(first_page, 0); - /* Maximum number of objects we can store in this zspage */ - first_page->objects = class->pages_per_zspage * PAGE_SIZE / class->size; - - error = 0; /* Success */ - -cleanup: - if (unlikely(error) && first_page) { - free_zspage(first_page); - first_page = NULL; - } - - return first_page; -} - -static struct page *find_get_zspage(struct size_class *class) -{ - int i; - struct page *page; - - for (i = 0; i < _ZS_NR_FULLNESS_GROUPS; i++) { - page = class->fullness_list[i]; - if (page) - break; - } - - return page; -} - -#ifdef USE_PGTABLE_MAPPING -static inline int __zs_cpu_up(struct mapping_area *area) -{ - /* - * Make sure we don't leak memory if a cpu UP notification - * and zs_init() race and both call zs_cpu_up() on the same cpu - */ - if (area->vm) - return 0; - area->vm = alloc_vm_area(PAGE_SIZE * 2, NULL); - if (!area->vm) - return -ENOMEM; - return 0; -} - -static inline void __zs_cpu_down(struct mapping_area *area) -{ - if (area->vm) - free_vm_area(area->vm); - area->vm = NULL; -} - -static inline void *__zs_map_object(struct mapping_area *area, - struct page *pages[2], int off, int size) -{ - BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, &pages)); - area->vm_addr = area->vm->addr; - return area->vm_addr + off; -} - -static inline void __zs_unmap_object(struct mapping_area *area, - struct page *pages[2], int off, int size) -{ - unsigned long addr = (unsigned long)area->vm_addr; - - unmap_kernel_range(addr, PAGE_SIZE * 2); -} - -#else /* USE_PGTABLE_MAPPING */ - -static inline int __zs_cpu_up(struct mapping_area *area) -{ - /* - * Make sure we don't leak memory if a cpu UP notification - * and zs_init() race and both call zs_cpu_up() on the same cpu - */ - if (area->vm_buf) - return 0; - area->vm_buf = (char *)__get_free_page(GFP_KERNEL); - if (!area->vm_buf) - return -ENOMEM; - return 0; -} - -static inline void __zs_cpu_down(struct mapping_area *area) -{ - if (area->vm_buf) - free_page((unsigned long)area->vm_buf); - area->vm_buf = NULL; -} - -static void *__zs_map_object(struct mapping_area *area, - struct page *pages[2], int off, int size) -{ - int sizes[2]; - void *addr; - char *buf = area->vm_buf; - - /* disable page faults to match kmap_atomic() return conditions */ - pagefault_disable(); - - /* no read fastpath */ - if (area->vm_mm == ZS_MM_WO) - goto out; - - sizes[0] = PAGE_SIZE - off; - sizes[1] = size - sizes[0]; - - /* copy object to per-cpu buffer */ - addr = kmap_atomic(pages[0]); - memcpy(buf, addr + off, sizes[0]); - kunmap_atomic(addr); - addr = kmap_atomic(pages[1]); - memcpy(buf + sizes[0], addr, sizes[1]); - kunmap_atomic(addr); -out: - return area->vm_buf; -} - -static void __zs_unmap_object(struct mapping_area *area, - struct page *pages[2], int off, int size) -{ - int sizes[2]; - void *addr; - char *buf = area->vm_buf; - - /* no write fastpath */ - if (area->vm_mm == ZS_MM_RO) - goto out; - - sizes[0] = PAGE_SIZE - off; - sizes[1] = size - sizes[0]; - - /* copy per-cpu buffer to object */ - addr = kmap_atomic(pages[0]); - memcpy(addr + off, buf, sizes[0]); - kunmap_atomic(addr); - addr = kmap_atomic(pages[1]); - memcpy(addr, buf + sizes[0], sizes[1]); - kunmap_atomic(addr); - -out: - /* enable page faults to match kunmap_atomic() return conditions */ - pagefault_enable(); -} - -#endif /* USE_PGTABLE_MAPPING */ - -static int zs_cpu_notifier(struct notifier_block *nb, unsigned long action, - void *pcpu) -{ - int ret, cpu = (long)pcpu; - struct mapping_area *area; - - switch (action) { - case CPU_UP_PREPARE: - area = &per_cpu(zs_map_area, cpu); - ret = __zs_cpu_up(area); - if (ret) - return notifier_from_errno(ret); - break; - case CPU_DEAD: - case CPU_UP_CANCELED: - area = &per_cpu(zs_map_area, cpu); - __zs_cpu_down(area); - break; - } - - return NOTIFY_OK; -} - -static struct notifier_block zs_cpu_nb = { - .notifier_call = zs_cpu_notifier -}; - -static void zs_exit(void) -{ - int cpu; - - for_each_online_cpu(cpu) - zs_cpu_notifier(NULL, CPU_DEAD, (void *)(long)cpu); - unregister_cpu_notifier(&zs_cpu_nb); -} - -static int zs_init(void) -{ - int cpu, ret; - - register_cpu_notifier(&zs_cpu_nb); - for_each_online_cpu(cpu) { - ret = zs_cpu_notifier(NULL, CPU_UP_PREPARE, (void *)(long)cpu); - if (notifier_to_errno(ret)) - goto fail; - } - return 0; -fail: - zs_exit(); - return notifier_to_errno(ret); -} - -/** - * zs_create_pool - Creates an allocation pool to work from. - * @flags: allocation flags used to allocate pool metadata - * - * This function must be called before anything when using - * the zsmalloc allocator. - * - * On success, a pointer to the newly created pool is returned, - * otherwise NULL. - */ -struct zs_pool *zs_create_pool(gfp_t flags) -{ - int i, ovhd_size; - struct zs_pool *pool; - - ovhd_size = roundup(sizeof(*pool), PAGE_SIZE); - pool = kzalloc(ovhd_size, GFP_KERNEL); - if (!pool) - return NULL; - - for (i = 0; i < ZS_SIZE_CLASSES; i++) { - int size; - struct size_class *class; - - size = ZS_MIN_ALLOC_SIZE + i * ZS_SIZE_CLASS_DELTA; - if (size > ZS_MAX_ALLOC_SIZE) - size = ZS_MAX_ALLOC_SIZE; - - class = &pool->size_class[i]; - class->size = size; - class->index = i; - spin_lock_init(&class->lock); - class->pages_per_zspage = get_pages_per_zspage(size); - - } - - pool->flags = flags; - - return pool; -} -EXPORT_SYMBOL_GPL(zs_create_pool); - -void zs_destroy_pool(struct zs_pool *pool) -{ - int i; - - for (i = 0; i < ZS_SIZE_CLASSES; i++) { - int fg; - struct size_class *class = &pool->size_class[i]; - - for (fg = 0; fg < _ZS_NR_FULLNESS_GROUPS; fg++) { - if (class->fullness_list[fg]) { - pr_info("Freeing non-empty class with size " - "%db, fullness group %d\n", - class->size, fg); - } - } - } - kfree(pool); -} -EXPORT_SYMBOL_GPL(zs_destroy_pool); - -/** - * zs_malloc - Allocate block of given size from pool. - * @pool: pool to allocate from - * @size: size of block to allocate - * - * On success, handle to the allocated object is returned, - * otherwise 0. - * Allocation requests with size > ZS_MAX_ALLOC_SIZE will fail. - */ -unsigned long zs_malloc(struct zs_pool *pool, size_t size) -{ - unsigned long obj; - struct link_free *link; - int class_idx; - struct size_class *class; - - struct page *first_page, *m_page; - unsigned long m_objidx, m_offset; - - if (unlikely(!size || size > ZS_MAX_ALLOC_SIZE)) - return 0; - - class_idx = get_size_class_index(size); - class = &pool->size_class[class_idx]; - BUG_ON(class_idx != class->index); - - spin_lock(&class->lock); - first_page = find_get_zspage(class); - - if (!first_page) { - spin_unlock(&class->lock); - first_page = alloc_zspage(class, pool->flags); - if (unlikely(!first_page)) - return 0; - - set_zspage_mapping(first_page, class->index, ZS_EMPTY); - spin_lock(&class->lock); - class->pages_allocated += class->pages_per_zspage; - } - - obj = (unsigned long)first_page->freelist; - obj_handle_to_location(obj, &m_page, &m_objidx); - m_offset = obj_idx_to_offset(m_page, m_objidx, class->size); - - link = (struct link_free *)kmap_atomic(m_page) + - m_offset / sizeof(*link); - first_page->freelist = link->next; - memset(link, POISON_INUSE, sizeof(*link)); - kunmap_atomic(link); - - first_page->inuse++; - /* Now move the zspage to another fullness group, if required */ - fix_fullness_group(pool, first_page); - spin_unlock(&class->lock); - - return obj; -} -EXPORT_SYMBOL_GPL(zs_malloc); - -void zs_free(struct zs_pool *pool, unsigned long obj) -{ - struct link_free *link; - struct page *first_page, *f_page; - unsigned long f_objidx, f_offset; - - int class_idx; - struct size_class *class; - enum fullness_group fullness; - - if (unlikely(!obj)) - return; - - obj_handle_to_location(obj, &f_page, &f_objidx); - first_page = get_first_page(f_page); - - get_zspage_mapping(first_page, &class_idx, &fullness); - class = &pool->size_class[class_idx]; - f_offset = obj_idx_to_offset(f_page, f_objidx, class->size); - - spin_lock(&class->lock); - - /* Insert this object in containing zspage's freelist */ - link = (struct link_free *)((unsigned char *)kmap_atomic(f_page) - + f_offset); - link->next = first_page->freelist; - kunmap_atomic(link); - first_page->freelist = (void *)obj; - - first_page->inuse--; - fullness = fix_fullness_group(pool, first_page); - - if (fullness == ZS_EMPTY) - class->pages_allocated -= class->pages_per_zspage; - - spin_unlock(&class->lock); - - if (fullness == ZS_EMPTY) - free_zspage(first_page); -} -EXPORT_SYMBOL_GPL(zs_free); - -/** - * zs_map_object - get address of allocated object from handle. - * @pool: pool from which the object was allocated - * @handle: handle returned from zs_malloc - * - * Before using an object allocated from zs_malloc, it must be mapped using - * this function. When done with the object, it must be unmapped using - * zs_unmap_object. - * - * Only one object can be mapped per cpu at a time. There is no protection - * against nested mappings. - * - * This function returns with preemption and page faults disabled. -*/ -void *zs_map_object(struct zs_pool *pool, unsigned long handle, - enum zs_mapmode mm) -{ - struct page *page; - unsigned long obj_idx, off; - - unsigned int class_idx; - enum fullness_group fg; - struct size_class *class; - struct mapping_area *area; - struct page *pages[2]; - - BUG_ON(!handle); - - /* - * Because we use per-cpu mapping areas shared among the - * pools/users, we can't allow mapping in interrupt context - * because it can corrupt another users mappings. - */ - BUG_ON(in_interrupt()); - - obj_handle_to_location(handle, &page, &obj_idx); - get_zspage_mapping(get_first_page(page), &class_idx, &fg); - class = &pool->size_class[class_idx]; - off = obj_idx_to_offset(page, obj_idx, class->size); - - area = &get_cpu_var(zs_map_area); - area->vm_mm = mm; - if (off + class->size <= PAGE_SIZE) { - /* this object is contained entirely within a page */ - area->vm_addr = kmap_atomic(page); - return area->vm_addr + off; - } - - /* this object spans two pages */ - pages[0] = page; - pages[1] = get_next_page(page); - BUG_ON(!pages[1]); - - return __zs_map_object(area, pages, off, class->size); -} -EXPORT_SYMBOL_GPL(zs_map_object); - -void zs_unmap_object(struct zs_pool *pool, unsigned long handle) -{ - struct page *page; - unsigned long obj_idx, off; - - unsigned int class_idx; - enum fullness_group fg; - struct size_class *class; - struct mapping_area *area; - - BUG_ON(!handle); - - obj_handle_to_location(handle, &page, &obj_idx); - get_zspage_mapping(get_first_page(page), &class_idx, &fg); - class = &pool->size_class[class_idx]; - off = obj_idx_to_offset(page, obj_idx, class->size); - - area = &__get_cpu_var(zs_map_area); - if (off + class->size <= PAGE_SIZE) - kunmap_atomic(area->vm_addr); - else { - struct page *pages[2]; - - pages[0] = page; - pages[1] = get_next_page(page); - BUG_ON(!pages[1]); - - __zs_unmap_object(area, pages, off, class->size); - } - put_cpu_var(zs_map_area); -} -EXPORT_SYMBOL_GPL(zs_unmap_object); - -u64 zs_get_total_size_bytes(struct zs_pool *pool) -{ - int i; - u64 npages = 0; - - for (i = 0; i < ZS_SIZE_CLASSES; i++) - npages += pool->size_class[i].pages_allocated; - - return npages << PAGE_SHIFT; -} -EXPORT_SYMBOL_GPL(zs_get_total_size_bytes); - -module_init(zs_init); -module_exit(zs_exit); - -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_AUTHOR("Nitin Gupta "); diff --git a/drivers/staging/zsmalloc/zsmalloc.h b/drivers/staging/zsmalloc/zsmalloc.h deleted file mode 100644 index 46dbd0558d86..000000000000 --- a/drivers/staging/zsmalloc/zsmalloc.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * zsmalloc memory allocator - * - * Copyright (C) 2011 Nitin Gupta - * - * This code is released using a dual license strategy: BSD/GPL - * You can choose the license that better fits your requirements. - * - * Released under the terms of 3-clause BSD License - * Released under the terms of GNU General Public License Version 2.0 - */ - -#ifndef _ZS_MALLOC_H_ -#define _ZS_MALLOC_H_ - -#include - -/* - * zsmalloc mapping modes - * - * NOTE: These only make a difference when a mapped object spans pages -*/ -enum zs_mapmode { - ZS_MM_RW, /* normal read-write mapping */ - ZS_MM_RO, /* read-only (no copy-out at unmap time) */ - ZS_MM_WO /* write-only (no copy-in at map time) */ -}; - -struct zs_pool; - -struct zs_pool *zs_create_pool(gfp_t flags); -void zs_destroy_pool(struct zs_pool *pool); - -unsigned long zs_malloc(struct zs_pool *pool, size_t size); -void zs_free(struct zs_pool *pool, unsigned long obj); - -void *zs_map_object(struct zs_pool *pool, unsigned long handle, - enum zs_mapmode mm); -void zs_unmap_object(struct zs_pool *pool, unsigned long handle); - -u64 zs_get_total_size_bytes(struct zs_pool *pool); - -#endif -- cgit v1.2.3 From 68955a0e9bb5f971229d3cabed259b31b39bda89 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 30 Jan 2014 15:45:52 -0800 Subject: zram: promote zram from staging Zram has lived in staging for a LONG LONG time and have been fixed/improved by many contributors so code is clean and stable now. Of course, there are lots of product using zram in real practice. The major TV companys have used zram as swap since two years ago and recently our production team released android smart phone with zram which is used as swap, too and recently Android Kitkat start to use zram for small memory smart phone. And there was a report Google released their ChromeOS with zram, too and cyanogenmod have been used zram long time ago. And I heard some disto have used zram block device for tmpfs. In addition, I saw many report from many other peoples. For example, Lubuntu start to use it. The benefit of zram is very clear. With my experience, one of the benefit was to remove jitter of video application with backgroud memory pressure. It would be effect of efficient memory usage by compression but more issue is whether swap is there or not in the system. Recent mobile platforms have used JAVA so there are many anonymous pages. But embedded system normally are reluctant to use eMMC or SDCard as swap because there is wear-leveling and latency issues so if we do not use swap, it means we can't reclaim anoymous pages and at last, we could encounter OOM kill. :( Although we have real storage as swap, it was a problem, too. Because it sometime ends up making system very unresponsible caused by slow swap storage performance. Quote from Luigi on Google "Since Chrome OS was mentioned: the main reason why we don't use swap to a disk (rotating or SSD) is because it doesn't degrade gracefully and leads to a bad interactive experience. Generally we prefer to manage RAM at a higher level, by transparently killing and restarting processes. But we noticed that zram is fast enough to be competitive with the latter, and it lets us make more efficient use of the available RAM. " and he announced. http://www.spinics.net/lists/linux-mm/msg57717.html Other uses case is to use zram for block device. Zram is block device so anyone can format the block device and mount on it so some guys on the internet start zram as /var/tmp. http://forums.gentoo.org/viewtopic-t-838198-start-0.html Let's promote zram and enhance/maintain it instead of removing. Signed-off-by: Minchan Kim Reviewed-by: Konrad Rzeszutek Wilk Acked-by: Nitin Gupta Acked-by: Pekka Enberg Cc: Bob Liu Cc: Greg Kroah-Hartman Cc: Hugh Dickins Cc: Jens Axboe Cc: Luigi Semenzato Cc: Mel Gorman Cc: Rik van Riel Cc: Seth Jennings Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit cd67e10ac6997c6d1e1504e3c111b693bfdbc148) Signed-off-by: Alex Shi --- drivers/block/Kconfig | 2 + drivers/block/Makefile | 1 + drivers/block/zram/Kconfig | 25 + drivers/block/zram/Makefile | 3 + drivers/block/zram/zram_drv.c | 991 ++++++++++++++++++++++++++++++++++++++++ drivers/block/zram/zram_drv.h | 124 +++++ drivers/staging/Kconfig | 2 - drivers/staging/Makefile | 1 - drivers/staging/zram/Kconfig | 25 - drivers/staging/zram/Makefile | 3 - drivers/staging/zram/zram.txt | 77 ---- drivers/staging/zram/zram_drv.c | 991 ---------------------------------------- drivers/staging/zram/zram_drv.h | 124 ----- 13 files changed, 1146 insertions(+), 1223 deletions(-) create mode 100644 drivers/block/zram/Kconfig create mode 100644 drivers/block/zram/Makefile create mode 100644 drivers/block/zram/zram_drv.c create mode 100644 drivers/block/zram/zram_drv.h delete mode 100644 drivers/staging/zram/Kconfig delete mode 100644 drivers/staging/zram/Makefile delete mode 100644 drivers/staging/zram/zram.txt delete mode 100644 drivers/staging/zram/zram_drv.c delete mode 100644 drivers/staging/zram/zram_drv.h (limited to 'drivers') diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index b81ddfea1da0..9da952c9af91 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -105,6 +105,8 @@ source "drivers/block/paride/Kconfig" source "drivers/block/mtip32xx/Kconfig" +source "drivers/block/zram/Kconfig" + config BLK_CPQ_DA tristate "Compaq SMART2 support" depends on PCI && VIRT_TO_BUS diff --git a/drivers/block/Makefile b/drivers/block/Makefile index ca07399a8d99..3675937ab651 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -41,6 +41,7 @@ obj-$(CONFIG_BLK_DEV_RBD) += rbd.o obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/ obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/ +obj-$(CONFIG_ZRAM) += zram/ nvme-y := nvme-core.o nvme-scsi.o swim_mod-y := swim.o swim_asm.o diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig new file mode 100644 index 000000000000..983314c41349 --- /dev/null +++ b/drivers/block/zram/Kconfig @@ -0,0 +1,25 @@ +config ZRAM + tristate "Compressed RAM block device support" + depends on BLOCK && SYSFS && ZSMALLOC + select LZO_COMPRESS + select LZO_DECOMPRESS + default n + help + Creates virtual block devices called /dev/zramX (X = 0, 1, ...). + Pages written to these disks are compressed and stored in memory + itself. These disks allow very fast I/O and compression provides + good amounts of memory savings. + + It has several use cases, for example: /tmp storage, use as swap + disks and maybe many more. + + See zram.txt for more information. + Project home: + +config ZRAM_DEBUG + bool "Compressed RAM block device debug support" + depends on ZRAM + default n + help + This option adds additional debugging code to the compressed + RAM block device driver. diff --git a/drivers/block/zram/Makefile b/drivers/block/zram/Makefile new file mode 100644 index 000000000000..cb0f9ced6a93 --- /dev/null +++ b/drivers/block/zram/Makefile @@ -0,0 +1,3 @@ +zram-y := zram_drv.o + +obj-$(CONFIG_ZRAM) += zram.o diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c new file mode 100644 index 000000000000..689ebf105acd --- /dev/null +++ b/drivers/block/zram/zram_drv.c @@ -0,0 +1,991 @@ +/* + * Compressed RAM block device + * + * Copyright (C) 2008, 2009, 2010 Nitin Gupta + * + * This code is released using a dual license strategy: BSD/GPL + * You can choose the licence that better fits your requirements. + * + * Released under the terms of 3-clause BSD License + * Released under the terms of GNU General Public License Version 2.0 + * + * Project home: http://compcache.googlecode.com + */ + +#define KMSG_COMPONENT "zram" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#ifdef CONFIG_ZRAM_DEBUG +#define DEBUG +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "zram_drv.h" + +/* Globals */ +static int zram_major; +static struct zram *zram_devices; + +/* Module params (documentation at end) */ +static unsigned int num_devices = 1; + +static inline struct zram *dev_to_zram(struct device *dev) +{ + return (struct zram *)dev_to_disk(dev)->private_data; +} + +static ssize_t disksize_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%llu\n", zram->disksize); +} + +static ssize_t initstate_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%u\n", zram->init_done); +} + +static ssize_t num_reads_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%llu\n", + (u64)atomic64_read(&zram->stats.num_reads)); +} + +static ssize_t num_writes_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%llu\n", + (u64)atomic64_read(&zram->stats.num_writes)); +} + +static ssize_t invalid_io_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%llu\n", + (u64)atomic64_read(&zram->stats.invalid_io)); +} + +static ssize_t notify_free_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%llu\n", + (u64)atomic64_read(&zram->stats.notify_free)); +} + +static ssize_t zero_pages_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%u\n", zram->stats.pages_zero); +} + +static ssize_t orig_data_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%llu\n", + (u64)(zram->stats.pages_stored) << PAGE_SHIFT); +} + +static ssize_t compr_data_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + + return sprintf(buf, "%llu\n", + (u64)atomic64_read(&zram->stats.compr_size)); +} + +static ssize_t mem_used_total_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u64 val = 0; + struct zram *zram = dev_to_zram(dev); + struct zram_meta *meta = zram->meta; + + down_read(&zram->init_lock); + if (zram->init_done) + val = zs_get_total_size_bytes(meta->mem_pool); + up_read(&zram->init_lock); + + return sprintf(buf, "%llu\n", val); +} + +static int zram_test_flag(struct zram_meta *meta, u32 index, + enum zram_pageflags flag) +{ + return meta->table[index].flags & BIT(flag); +} + +static void zram_set_flag(struct zram_meta *meta, u32 index, + enum zram_pageflags flag) +{ + meta->table[index].flags |= BIT(flag); +} + +static void zram_clear_flag(struct zram_meta *meta, u32 index, + enum zram_pageflags flag) +{ + meta->table[index].flags &= ~BIT(flag); +} + +static inline int is_partial_io(struct bio_vec *bvec) +{ + return bvec->bv_len != PAGE_SIZE; +} + +/* + * Check if request is within bounds and aligned on zram logical blocks. + */ +static inline int valid_io_request(struct zram *zram, struct bio *bio) +{ + u64 start, end, bound; + + /* unaligned request */ + if (unlikely(bio->bi_sector & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1))) + return 0; + if (unlikely(bio->bi_size & (ZRAM_LOGICAL_BLOCK_SIZE - 1))) + return 0; + + start = bio->bi_sector; + end = start + (bio->bi_size >> SECTOR_SHIFT); + bound = zram->disksize >> SECTOR_SHIFT; + /* out of range range */ + if (unlikely(start >= bound || end >= bound || start > end)) + return 0; + + /* I/O request is valid */ + return 1; +} + +static void zram_meta_free(struct zram_meta *meta) +{ + zs_destroy_pool(meta->mem_pool); + kfree(meta->compress_workmem); + free_pages((unsigned long)meta->compress_buffer, 1); + vfree(meta->table); + kfree(meta); +} + +static struct zram_meta *zram_meta_alloc(u64 disksize) +{ + size_t num_pages; + struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL); + if (!meta) + goto out; + + meta->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); + if (!meta->compress_workmem) + goto free_meta; + + meta->compress_buffer = + (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1); + if (!meta->compress_buffer) { + pr_err("Error allocating compressor buffer space\n"); + goto free_workmem; + } + + num_pages = disksize >> PAGE_SHIFT; + meta->table = vzalloc(num_pages * sizeof(*meta->table)); + if (!meta->table) { + pr_err("Error allocating zram address table\n"); + goto free_buffer; + } + + meta->mem_pool = zs_create_pool(GFP_NOIO | __GFP_HIGHMEM); + if (!meta->mem_pool) { + pr_err("Error creating memory pool\n"); + goto free_table; + } + + return meta; + +free_table: + vfree(meta->table); +free_buffer: + free_pages((unsigned long)meta->compress_buffer, 1); +free_workmem: + kfree(meta->compress_workmem); +free_meta: + kfree(meta); + meta = NULL; +out: + return meta; +} + +static void update_position(u32 *index, int *offset, struct bio_vec *bvec) +{ + if (*offset + bvec->bv_len >= PAGE_SIZE) + (*index)++; + *offset = (*offset + bvec->bv_len) % PAGE_SIZE; +} + +static int page_zero_filled(void *ptr) +{ + unsigned int pos; + unsigned long *page; + + page = (unsigned long *)ptr; + + for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) { + if (page[pos]) + return 0; + } + + return 1; +} + +static void handle_zero_page(struct bio_vec *bvec) +{ + struct page *page = bvec->bv_page; + void *user_mem; + + user_mem = kmap_atomic(page); + if (is_partial_io(bvec)) + memset(user_mem + bvec->bv_offset, 0, bvec->bv_len); + else + clear_page(user_mem); + kunmap_atomic(user_mem); + + flush_dcache_page(page); +} + +static void zram_free_page(struct zram *zram, size_t index) +{ + struct zram_meta *meta = zram->meta; + unsigned long handle = meta->table[index].handle; + u16 size = meta->table[index].size; + + if (unlikely(!handle)) { + /* + * No memory is allocated for zero filled pages. + * Simply clear zero page flag. + */ + if (zram_test_flag(meta, index, ZRAM_ZERO)) { + zram_clear_flag(meta, index, ZRAM_ZERO); + zram->stats.pages_zero--; + } + return; + } + + if (unlikely(size > max_zpage_size)) + zram->stats.bad_compress--; + + zs_free(meta->mem_pool, handle); + + if (size <= PAGE_SIZE / 2) + zram->stats.good_compress--; + + atomic64_sub(meta->table[index].size, &zram->stats.compr_size); + zram->stats.pages_stored--; + + meta->table[index].handle = 0; + meta->table[index].size = 0; +} + +static int zram_decompress_page(struct zram *zram, char *mem, u32 index) +{ + int ret = LZO_E_OK; + size_t clen = PAGE_SIZE; + unsigned char *cmem; + struct zram_meta *meta = zram->meta; + unsigned long handle = meta->table[index].handle; + + if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) { + clear_page(mem); + return 0; + } + + cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO); + if (meta->table[index].size == PAGE_SIZE) + copy_page(mem, cmem); + else + ret = lzo1x_decompress_safe(cmem, meta->table[index].size, + mem, &clen); + zs_unmap_object(meta->mem_pool, handle); + + /* Should NEVER happen. Return bio error if it does. */ + if (unlikely(ret != LZO_E_OK)) { + pr_err("Decompression failed! err=%d, page=%u\n", ret, index); + atomic64_inc(&zram->stats.failed_reads); + return ret; + } + + return 0; +} + +static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, + u32 index, int offset, struct bio *bio) +{ + int ret; + struct page *page; + unsigned char *user_mem, *uncmem = NULL; + struct zram_meta *meta = zram->meta; + page = bvec->bv_page; + + if (unlikely(!meta->table[index].handle) || + zram_test_flag(meta, index, ZRAM_ZERO)) { + handle_zero_page(bvec); + return 0; + } + + if (is_partial_io(bvec)) + /* Use a temporary buffer to decompress the page */ + uncmem = kmalloc(PAGE_SIZE, GFP_NOIO); + + user_mem = kmap_atomic(page); + if (!is_partial_io(bvec)) + uncmem = user_mem; + + if (!uncmem) { + pr_info("Unable to allocate temp memory\n"); + ret = -ENOMEM; + goto out_cleanup; + } + + ret = zram_decompress_page(zram, uncmem, index); + /* Should NEVER happen. Return bio error if it does. */ + if (unlikely(ret != LZO_E_OK)) + goto out_cleanup; + + if (is_partial_io(bvec)) + memcpy(user_mem + bvec->bv_offset, uncmem + offset, + bvec->bv_len); + + flush_dcache_page(page); + ret = 0; +out_cleanup: + kunmap_atomic(user_mem); + if (is_partial_io(bvec)) + kfree(uncmem); + return ret; +} + +static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, + int offset) +{ + int ret = 0; + size_t clen; + unsigned long handle; + struct page *page; + unsigned char *user_mem, *cmem, *src, *uncmem = NULL; + struct zram_meta *meta = zram->meta; + + page = bvec->bv_page; + src = meta->compress_buffer; + + if (is_partial_io(bvec)) { + /* + * This is a partial IO. We need to read the full page + * before to write the changes. + */ + uncmem = kmalloc(PAGE_SIZE, GFP_NOIO); + if (!uncmem) { + ret = -ENOMEM; + goto out; + } + ret = zram_decompress_page(zram, uncmem, index); + if (ret) + goto out; + } + + user_mem = kmap_atomic(page); + + if (is_partial_io(bvec)) { + memcpy(uncmem + offset, user_mem + bvec->bv_offset, + bvec->bv_len); + kunmap_atomic(user_mem); + user_mem = NULL; + } else { + uncmem = user_mem; + } + + if (page_zero_filled(uncmem)) { + kunmap_atomic(user_mem); + /* Free memory associated with this sector now. */ + zram_free_page(zram, index); + + zram->stats.pages_zero++; + zram_set_flag(meta, index, ZRAM_ZERO); + ret = 0; + goto out; + } + + /* + * zram_slot_free_notify could miss free so that let's + * double check. + */ + if (unlikely(meta->table[index].handle || + zram_test_flag(meta, index, ZRAM_ZERO))) + zram_free_page(zram, index); + + ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen, + meta->compress_workmem); + + if (!is_partial_io(bvec)) { + kunmap_atomic(user_mem); + user_mem = NULL; + uncmem = NULL; + } + + if (unlikely(ret != LZO_E_OK)) { + pr_err("Compression failed! err=%d\n", ret); + goto out; + } + + if (unlikely(clen > max_zpage_size)) { + zram->stats.bad_compress++; + clen = PAGE_SIZE; + src = NULL; + if (is_partial_io(bvec)) + src = uncmem; + } + + handle = zs_malloc(meta->mem_pool, clen); + if (!handle) { + pr_info("Error allocating memory for compressed page: %u, size=%zu\n", + index, clen); + ret = -ENOMEM; + goto out; + } + cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO); + + if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) { + src = kmap_atomic(page); + copy_page(cmem, src); + kunmap_atomic(src); + } else { + memcpy(cmem, src, clen); + } + + zs_unmap_object(meta->mem_pool, handle); + + /* + * Free memory associated with this sector + * before overwriting unused sectors. + */ + zram_free_page(zram, index); + + meta->table[index].handle = handle; + meta->table[index].size = clen; + + /* Update stats */ + atomic64_add(clen, &zram->stats.compr_size); + zram->stats.pages_stored++; + if (clen <= PAGE_SIZE / 2) + zram->stats.good_compress++; + +out: + if (is_partial_io(bvec)) + kfree(uncmem); + + if (ret) + atomic64_inc(&zram->stats.failed_writes); + return ret; +} + +static void handle_pending_slot_free(struct zram *zram) +{ + struct zram_slot_free *free_rq; + + spin_lock(&zram->slot_free_lock); + while (zram->slot_free_rq) { + free_rq = zram->slot_free_rq; + zram->slot_free_rq = free_rq->next; + zram_free_page(zram, free_rq->index); + kfree(free_rq); + } + spin_unlock(&zram->slot_free_lock); +} + +static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, + int offset, struct bio *bio, int rw) +{ + int ret; + + if (rw == READ) { + down_read(&zram->lock); + handle_pending_slot_free(zram); + ret = zram_bvec_read(zram, bvec, index, offset, bio); + up_read(&zram->lock); + } else { + down_write(&zram->lock); + handle_pending_slot_free(zram); + ret = zram_bvec_write(zram, bvec, index, offset); + up_write(&zram->lock); + } + + return ret; +} + +static void zram_reset_device(struct zram *zram, bool reset_capacity) +{ + size_t index; + struct zram_meta *meta; + + flush_work(&zram->free_work); + + down_write(&zram->init_lock); + if (!zram->init_done) { + up_write(&zram->init_lock); + return; + } + + meta = zram->meta; + zram->init_done = 0; + + /* Free all pages that are still in this zram device */ + for (index = 0; index < zram->disksize >> PAGE_SHIFT; index++) { + unsigned long handle = meta->table[index].handle; + if (!handle) + continue; + + zs_free(meta->mem_pool, handle); + } + + zram_meta_free(zram->meta); + zram->meta = NULL; + /* Reset stats */ + memset(&zram->stats, 0, sizeof(zram->stats)); + + zram->disksize = 0; + if (reset_capacity) + set_capacity(zram->disk, 0); + up_write(&zram->init_lock); +} + +static void zram_init_device(struct zram *zram, struct zram_meta *meta) +{ + if (zram->disksize > 2 * (totalram_pages << PAGE_SHIFT)) { + pr_info( + "There is little point creating a zram of greater than " + "twice the size of memory since we expect a 2:1 compression " + "ratio. Note that zram uses about 0.1%% of the size of " + "the disk when not in use so a huge zram is " + "wasteful.\n" + "\tMemory Size: %lu kB\n" + "\tSize you selected: %llu kB\n" + "Continuing anyway ...\n", + (totalram_pages << PAGE_SHIFT) >> 10, zram->disksize >> 10 + ); + } + + /* zram devices sort of resembles non-rotational disks */ + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); + + zram->meta = meta; + zram->init_done = 1; + + pr_debug("Initialization done!\n"); +} + +static ssize_t disksize_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + u64 disksize; + struct zram_meta *meta; + struct zram *zram = dev_to_zram(dev); + + disksize = memparse(buf, NULL); + if (!disksize) + return -EINVAL; + + disksize = PAGE_ALIGN(disksize); + meta = zram_meta_alloc(disksize); + down_write(&zram->init_lock); + if (zram->init_done) { + up_write(&zram->init_lock); + zram_meta_free(meta); + pr_info("Cannot change disksize for initialized device\n"); + return -EBUSY; + } + + zram->disksize = disksize; + set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); + zram_init_device(zram, meta); + up_write(&zram->init_lock); + + return len; +} + +static ssize_t reset_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + int ret; + unsigned short do_reset; + struct zram *zram; + struct block_device *bdev; + + zram = dev_to_zram(dev); + bdev = bdget_disk(zram->disk, 0); + + if (!bdev) + return -ENOMEM; + + /* Do not reset an active device! */ + if (bdev->bd_holders) { + ret = -EBUSY; + goto out; + } + + ret = kstrtou16(buf, 10, &do_reset); + if (ret) + goto out; + + if (!do_reset) { + ret = -EINVAL; + goto out; + } + + /* Make sure all pending I/O is finished */ + fsync_bdev(bdev); + bdput(bdev); + + zram_reset_device(zram, true); + return len; + +out: + bdput(bdev); + return ret; +} + +static void __zram_make_request(struct zram *zram, struct bio *bio, int rw) +{ + int i, offset; + u32 index; + struct bio_vec *bvec; + + switch (rw) { + case READ: + atomic64_inc(&zram->stats.num_reads); + break; + case WRITE: + atomic64_inc(&zram->stats.num_writes); + break; + } + + index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT; + offset = (bio->bi_sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; + + bio_for_each_segment(bvec, bio, i) { + int max_transfer_size = PAGE_SIZE - offset; + + if (bvec->bv_len > max_transfer_size) { + /* + * zram_bvec_rw() can only make operation on a single + * zram page. Split the bio vector. + */ + struct bio_vec bv; + + bv.bv_page = bvec->bv_page; + bv.bv_len = max_transfer_size; + bv.bv_offset = bvec->bv_offset; + + if (zram_bvec_rw(zram, &bv, index, offset, bio, rw) < 0) + goto out; + + bv.bv_len = bvec->bv_len - max_transfer_size; + bv.bv_offset += max_transfer_size; + if (zram_bvec_rw(zram, &bv, index+1, 0, bio, rw) < 0) + goto out; + } else + if (zram_bvec_rw(zram, bvec, index, offset, bio, rw) + < 0) + goto out; + + update_position(&index, &offset, bvec); + } + + set_bit(BIO_UPTODATE, &bio->bi_flags); + bio_endio(bio, 0); + return; + +out: + bio_io_error(bio); +} + +/* + * Handler function for all zram I/O requests. + */ +static void zram_make_request(struct request_queue *queue, struct bio *bio) +{ + struct zram *zram = queue->queuedata; + + down_read(&zram->init_lock); + if (unlikely(!zram->init_done)) + goto error; + + if (!valid_io_request(zram, bio)) { + atomic64_inc(&zram->stats.invalid_io); + goto error; + } + + __zram_make_request(zram, bio, bio_data_dir(bio)); + up_read(&zram->init_lock); + + return; + +error: + up_read(&zram->init_lock); + bio_io_error(bio); +} + +static void zram_slot_free(struct work_struct *work) +{ + struct zram *zram; + + zram = container_of(work, struct zram, free_work); + down_write(&zram->lock); + handle_pending_slot_free(zram); + up_write(&zram->lock); +} + +static void add_slot_free(struct zram *zram, struct zram_slot_free *free_rq) +{ + spin_lock(&zram->slot_free_lock); + free_rq->next = zram->slot_free_rq; + zram->slot_free_rq = free_rq; + spin_unlock(&zram->slot_free_lock); +} + +static void zram_slot_free_notify(struct block_device *bdev, + unsigned long index) +{ + struct zram *zram; + struct zram_slot_free *free_rq; + + zram = bdev->bd_disk->private_data; + atomic64_inc(&zram->stats.notify_free); + + free_rq = kmalloc(sizeof(struct zram_slot_free), GFP_ATOMIC); + if (!free_rq) + return; + + free_rq->index = index; + add_slot_free(zram, free_rq); + schedule_work(&zram->free_work); +} + +static const struct block_device_operations zram_devops = { + .swap_slot_free_notify = zram_slot_free_notify, + .owner = THIS_MODULE +}; + +static DEVICE_ATTR(disksize, S_IRUGO | S_IWUSR, + disksize_show, disksize_store); +static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL); +static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store); +static DEVICE_ATTR(num_reads, S_IRUGO, num_reads_show, NULL); +static DEVICE_ATTR(num_writes, S_IRUGO, num_writes_show, NULL); +static DEVICE_ATTR(invalid_io, S_IRUGO, invalid_io_show, NULL); +static DEVICE_ATTR(notify_free, S_IRUGO, notify_free_show, NULL); +static DEVICE_ATTR(zero_pages, S_IRUGO, zero_pages_show, NULL); +static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL); +static DEVICE_ATTR(compr_data_size, S_IRUGO, compr_data_size_show, NULL); +static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL); + +static struct attribute *zram_disk_attrs[] = { + &dev_attr_disksize.attr, + &dev_attr_initstate.attr, + &dev_attr_reset.attr, + &dev_attr_num_reads.attr, + &dev_attr_num_writes.attr, + &dev_attr_invalid_io.attr, + &dev_attr_notify_free.attr, + &dev_attr_zero_pages.attr, + &dev_attr_orig_data_size.attr, + &dev_attr_compr_data_size.attr, + &dev_attr_mem_used_total.attr, + NULL, +}; + +static struct attribute_group zram_disk_attr_group = { + .attrs = zram_disk_attrs, +}; + +static int create_device(struct zram *zram, int device_id) +{ + int ret = -ENOMEM; + + init_rwsem(&zram->lock); + init_rwsem(&zram->init_lock); + + INIT_WORK(&zram->free_work, zram_slot_free); + spin_lock_init(&zram->slot_free_lock); + zram->slot_free_rq = NULL; + + zram->queue = blk_alloc_queue(GFP_KERNEL); + if (!zram->queue) { + pr_err("Error allocating disk queue for device %d\n", + device_id); + goto out; + } + + blk_queue_make_request(zram->queue, zram_make_request); + zram->queue->queuedata = zram; + + /* gendisk structure */ + zram->disk = alloc_disk(1); + if (!zram->disk) { + pr_warn("Error allocating disk structure for device %d\n", + device_id); + goto out_free_queue; + } + + zram->disk->major = zram_major; + zram->disk->first_minor = device_id; + zram->disk->fops = &zram_devops; + zram->disk->queue = zram->queue; + zram->disk->private_data = zram; + snprintf(zram->disk->disk_name, 16, "zram%d", device_id); + + /* Actual capacity set using syfs (/sys/block/zram/disksize */ + set_capacity(zram->disk, 0); + + /* + * To ensure that we always get PAGE_SIZE aligned + * and n*PAGE_SIZED sized I/O requests. + */ + blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE); + blk_queue_logical_block_size(zram->disk->queue, + ZRAM_LOGICAL_BLOCK_SIZE); + blk_queue_io_min(zram->disk->queue, PAGE_SIZE); + blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); + + add_disk(zram->disk); + + ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj, + &zram_disk_attr_group); + if (ret < 0) { + pr_warn("Error creating sysfs group"); + goto out_free_disk; + } + + zram->init_done = 0; + return 0; + +out_free_disk: + del_gendisk(zram->disk); + put_disk(zram->disk); +out_free_queue: + blk_cleanup_queue(zram->queue); +out: + return ret; +} + +static void destroy_device(struct zram *zram) +{ + sysfs_remove_group(&disk_to_dev(zram->disk)->kobj, + &zram_disk_attr_group); + + del_gendisk(zram->disk); + put_disk(zram->disk); + + blk_cleanup_queue(zram->queue); +} + +static int __init zram_init(void) +{ + int ret, dev_id; + + if (num_devices > max_num_devices) { + pr_warn("Invalid value for num_devices: %u\n", + num_devices); + ret = -EINVAL; + goto out; + } + + zram_major = register_blkdev(0, "zram"); + if (zram_major <= 0) { + pr_warn("Unable to get major number\n"); + ret = -EBUSY; + goto out; + } + + /* Allocate the device array and initialize each one */ + zram_devices = kzalloc(num_devices * sizeof(struct zram), GFP_KERNEL); + if (!zram_devices) { + ret = -ENOMEM; + goto unregister; + } + + for (dev_id = 0; dev_id < num_devices; dev_id++) { + ret = create_device(&zram_devices[dev_id], dev_id); + if (ret) + goto free_devices; + } + + pr_info("Created %u device(s) ...\n", num_devices); + + return 0; + +free_devices: + while (dev_id) + destroy_device(&zram_devices[--dev_id]); + kfree(zram_devices); +unregister: + unregister_blkdev(zram_major, "zram"); +out: + return ret; +} + +static void __exit zram_exit(void) +{ + int i; + struct zram *zram; + + for (i = 0; i < num_devices; i++) { + zram = &zram_devices[i]; + + destroy_device(zram); + /* + * Shouldn't access zram->disk after destroy_device + * because destroy_device already released zram->disk. + */ + zram_reset_device(zram, false); + } + + unregister_blkdev(zram_major, "zram"); + + kfree(zram_devices); + pr_debug("Cleanup done!\n"); +} + +module_init(zram_init); +module_exit(zram_exit); + +module_param(num_devices, uint, 0); +MODULE_PARM_DESC(num_devices, "Number of zram devices"); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Nitin Gupta "); +MODULE_DESCRIPTION("Compressed RAM Block Device"); diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h new file mode 100644 index 000000000000..d8f6596513c3 --- /dev/null +++ b/drivers/block/zram/zram_drv.h @@ -0,0 +1,124 @@ +/* + * Compressed RAM block device + * + * Copyright (C) 2008, 2009, 2010 Nitin Gupta + * + * This code is released using a dual license strategy: BSD/GPL + * You can choose the licence that better fits your requirements. + * + * Released under the terms of 3-clause BSD License + * Released under the terms of GNU General Public License Version 2.0 + * + * Project home: http://compcache.googlecode.com + */ + +#ifndef _ZRAM_DRV_H_ +#define _ZRAM_DRV_H_ + +#include +#include +#include + +/* + * Some arbitrary value. This is just to catch + * invalid value for num_devices module parameter. + */ +static const unsigned max_num_devices = 32; + +/*-- Configurable parameters */ + +/* + * Pages that compress to size greater than this are stored + * uncompressed in memory. + */ +static const size_t max_zpage_size = PAGE_SIZE / 4 * 3; + +/* + * NOTE: max_zpage_size must be less than or equal to: + * ZS_MAX_ALLOC_SIZE. Otherwise, zs_malloc() would + * always return failure. + */ + +/*-- End of configurable params */ + +#define SECTOR_SHIFT 9 +#define SECTOR_SIZE (1 << SECTOR_SHIFT) +#define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) +#define SECTORS_PER_PAGE (1 << SECTORS_PER_PAGE_SHIFT) +#define ZRAM_LOGICAL_BLOCK_SHIFT 12 +#define ZRAM_LOGICAL_BLOCK_SIZE (1 << ZRAM_LOGICAL_BLOCK_SHIFT) +#define ZRAM_SECTOR_PER_LOGICAL_BLOCK \ + (1 << (ZRAM_LOGICAL_BLOCK_SHIFT - SECTOR_SHIFT)) + +/* Flags for zram pages (table[page_no].flags) */ +enum zram_pageflags { + /* Page consists entirely of zeros */ + ZRAM_ZERO, + + __NR_ZRAM_PAGEFLAGS, +}; + +/*-- Data structures */ + +/* Allocated for each disk page */ +struct table { + unsigned long handle; + u16 size; /* object size (excluding header) */ + u8 count; /* object ref count (not yet used) */ + u8 flags; +} __aligned(4); + +/* + * All 64bit fields should only be manipulated by 64bit atomic accessors. + * All modifications to 32bit counter should be protected by zram->lock. + */ +struct zram_stats { + atomic64_t compr_size; /* compressed size of pages stored */ + atomic64_t num_reads; /* failed + successful */ + atomic64_t num_writes; /* --do-- */ + atomic64_t failed_reads; /* should NEVER! happen */ + atomic64_t failed_writes; /* can happen when memory is too low */ + atomic64_t invalid_io; /* non-page-aligned I/O requests */ + atomic64_t notify_free; /* no. of swap slot free notifications */ + u32 pages_zero; /* no. of zero filled pages */ + u32 pages_stored; /* no. of pages currently stored */ + u32 good_compress; /* % of pages with compression ratio<=50% */ + u32 bad_compress; /* % of pages with compression ratio>=75% */ +}; + +struct zram_meta { + void *compress_workmem; + void *compress_buffer; + struct table *table; + struct zs_pool *mem_pool; +}; + +struct zram_slot_free { + unsigned long index; + struct zram_slot_free *next; +}; + +struct zram { + struct zram_meta *meta; + struct rw_semaphore lock; /* protect compression buffers, table, + * 32bit stat counters against concurrent + * notifications, reads and writes */ + + struct work_struct free_work; /* handle pending free request */ + struct zram_slot_free *slot_free_rq; /* list head of free request */ + + struct request_queue *queue; + struct gendisk *disk; + int init_done; + /* Prevent concurrent execution of device init, reset and R/W request */ + struct rw_semaphore init_lock; + /* + * This is the limit on amount of *uncompressed* worth of data + * we can store in a disk. + */ + u64 disksize; /* bytes */ + spinlock_t slot_free_lock; + + struct zram_stats stats; +}; +#endif diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig index 60585217481f..25c8bffdd248 100644 --- a/drivers/staging/Kconfig +++ b/drivers/staging/Kconfig @@ -72,8 +72,6 @@ source "drivers/staging/sep/Kconfig" source "drivers/staging/iio/Kconfig" -source "drivers/staging/zram/Kconfig" - source "drivers/staging/wlags49_h2/Kconfig" source "drivers/staging/wlags49_h25/Kconfig" diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile index 29aaeaa283eb..f9d86a4b48e9 100644 --- a/drivers/staging/Makefile +++ b/drivers/staging/Makefile @@ -30,7 +30,6 @@ obj-$(CONFIG_VT6656) += vt6656/ obj-$(CONFIG_VME_BUS) += vme/ obj-$(CONFIG_DX_SEP) += sep/ obj-$(CONFIG_IIO) += iio/ -obj-$(CONFIG_ZRAM) += zram/ obj-$(CONFIG_WLAGS49_H2) += wlags49_h2/ obj-$(CONFIG_WLAGS49_H25) += wlags49_h25/ obj-$(CONFIG_FB_SM7XX) += sm7xxfb/ diff --git a/drivers/staging/zram/Kconfig b/drivers/staging/zram/Kconfig deleted file mode 100644 index 983314c41349..000000000000 --- a/drivers/staging/zram/Kconfig +++ /dev/null @@ -1,25 +0,0 @@ -config ZRAM - tristate "Compressed RAM block device support" - depends on BLOCK && SYSFS && ZSMALLOC - select LZO_COMPRESS - select LZO_DECOMPRESS - default n - help - Creates virtual block devices called /dev/zramX (X = 0, 1, ...). - Pages written to these disks are compressed and stored in memory - itself. These disks allow very fast I/O and compression provides - good amounts of memory savings. - - It has several use cases, for example: /tmp storage, use as swap - disks and maybe many more. - - See zram.txt for more information. - Project home: - -config ZRAM_DEBUG - bool "Compressed RAM block device debug support" - depends on ZRAM - default n - help - This option adds additional debugging code to the compressed - RAM block device driver. diff --git a/drivers/staging/zram/Makefile b/drivers/staging/zram/Makefile deleted file mode 100644 index cb0f9ced6a93..000000000000 --- a/drivers/staging/zram/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -zram-y := zram_drv.o - -obj-$(CONFIG_ZRAM) += zram.o diff --git a/drivers/staging/zram/zram.txt b/drivers/staging/zram/zram.txt deleted file mode 100644 index 765d790ae831..000000000000 --- a/drivers/staging/zram/zram.txt +++ /dev/null @@ -1,77 +0,0 @@ -zram: Compressed RAM based block devices ----------------------------------------- - -Project home: http://compcache.googlecode.com/ - -* Introduction - -The zram module creates RAM based block devices named /dev/zram -( = 0, 1, ...). Pages written to these disks are compressed and stored -in memory itself. These disks allow very fast I/O and compression provides -good amounts of memory savings. Some of the usecases include /tmp storage, -use as swap disks, various caches under /var and maybe many more :) - -Statistics for individual zram devices are exported through sysfs nodes at -/sys/block/zram/ - -* Usage - -Following shows a typical sequence of steps for using zram. - -1) Load Module: - modprobe zram num_devices=4 - This creates 4 devices: /dev/zram{0,1,2,3} - (num_devices parameter is optional. Default: 1) - -2) Set Disksize - Set disk size by writing the value to sysfs node 'disksize'. - The value can be either in bytes or you can use mem suffixes. - Examples: - # Initialize /dev/zram0 with 50MB disksize - echo $((50*1024*1024)) > /sys/block/zram0/disksize - - # Using mem suffixes - echo 256K > /sys/block/zram0/disksize - echo 512M > /sys/block/zram0/disksize - echo 1G > /sys/block/zram0/disksize - -3) Activate: - mkswap /dev/zram0 - swapon /dev/zram0 - - mkfs.ext4 /dev/zram1 - mount /dev/zram1 /tmp - -4) Stats: - Per-device statistics are exported as various nodes under - /sys/block/zram/ - disksize - num_reads - num_writes - invalid_io - notify_free - discard - zero_pages - orig_data_size - compr_data_size - mem_used_total - -5) Deactivate: - swapoff /dev/zram0 - umount /dev/zram1 - -6) Reset: - Write any positive value to 'reset' sysfs node - echo 1 > /sys/block/zram0/reset - echo 1 > /sys/block/zram1/reset - - This frees all the memory allocated for the given device and - resets the disksize to zero. You must set the disksize again - before reusing the device. - -Please report any problems at: - - Mailing list: linux-mm-cc at laptop dot org - - Issue tracker: http://code.google.com/p/compcache/issues/list - -Nitin Gupta -ngupta@vflare.org diff --git a/drivers/staging/zram/zram_drv.c b/drivers/staging/zram/zram_drv.c deleted file mode 100644 index 689ebf105acd..000000000000 --- a/drivers/staging/zram/zram_drv.c +++ /dev/null @@ -1,991 +0,0 @@ -/* - * Compressed RAM block device - * - * Copyright (C) 2008, 2009, 2010 Nitin Gupta - * - * This code is released using a dual license strategy: BSD/GPL - * You can choose the licence that better fits your requirements. - * - * Released under the terms of 3-clause BSD License - * Released under the terms of GNU General Public License Version 2.0 - * - * Project home: http://compcache.googlecode.com - */ - -#define KMSG_COMPONENT "zram" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt - -#ifdef CONFIG_ZRAM_DEBUG -#define DEBUG -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "zram_drv.h" - -/* Globals */ -static int zram_major; -static struct zram *zram_devices; - -/* Module params (documentation at end) */ -static unsigned int num_devices = 1; - -static inline struct zram *dev_to_zram(struct device *dev) -{ - return (struct zram *)dev_to_disk(dev)->private_data; -} - -static ssize_t disksize_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", zram->disksize); -} - -static ssize_t initstate_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%u\n", zram->init_done); -} - -static ssize_t num_reads_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - (u64)atomic64_read(&zram->stats.num_reads)); -} - -static ssize_t num_writes_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - (u64)atomic64_read(&zram->stats.num_writes)); -} - -static ssize_t invalid_io_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - (u64)atomic64_read(&zram->stats.invalid_io)); -} - -static ssize_t notify_free_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - (u64)atomic64_read(&zram->stats.notify_free)); -} - -static ssize_t zero_pages_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%u\n", zram->stats.pages_zero); -} - -static ssize_t orig_data_size_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - (u64)(zram->stats.pages_stored) << PAGE_SHIFT); -} - -static ssize_t compr_data_size_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - (u64)atomic64_read(&zram->stats.compr_size)); -} - -static ssize_t mem_used_total_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - u64 val = 0; - struct zram *zram = dev_to_zram(dev); - struct zram_meta *meta = zram->meta; - - down_read(&zram->init_lock); - if (zram->init_done) - val = zs_get_total_size_bytes(meta->mem_pool); - up_read(&zram->init_lock); - - return sprintf(buf, "%llu\n", val); -} - -static int zram_test_flag(struct zram_meta *meta, u32 index, - enum zram_pageflags flag) -{ - return meta->table[index].flags & BIT(flag); -} - -static void zram_set_flag(struct zram_meta *meta, u32 index, - enum zram_pageflags flag) -{ - meta->table[index].flags |= BIT(flag); -} - -static void zram_clear_flag(struct zram_meta *meta, u32 index, - enum zram_pageflags flag) -{ - meta->table[index].flags &= ~BIT(flag); -} - -static inline int is_partial_io(struct bio_vec *bvec) -{ - return bvec->bv_len != PAGE_SIZE; -} - -/* - * Check if request is within bounds and aligned on zram logical blocks. - */ -static inline int valid_io_request(struct zram *zram, struct bio *bio) -{ - u64 start, end, bound; - - /* unaligned request */ - if (unlikely(bio->bi_sector & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1))) - return 0; - if (unlikely(bio->bi_size & (ZRAM_LOGICAL_BLOCK_SIZE - 1))) - return 0; - - start = bio->bi_sector; - end = start + (bio->bi_size >> SECTOR_SHIFT); - bound = zram->disksize >> SECTOR_SHIFT; - /* out of range range */ - if (unlikely(start >= bound || end >= bound || start > end)) - return 0; - - /* I/O request is valid */ - return 1; -} - -static void zram_meta_free(struct zram_meta *meta) -{ - zs_destroy_pool(meta->mem_pool); - kfree(meta->compress_workmem); - free_pages((unsigned long)meta->compress_buffer, 1); - vfree(meta->table); - kfree(meta); -} - -static struct zram_meta *zram_meta_alloc(u64 disksize) -{ - size_t num_pages; - struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL); - if (!meta) - goto out; - - meta->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); - if (!meta->compress_workmem) - goto free_meta; - - meta->compress_buffer = - (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1); - if (!meta->compress_buffer) { - pr_err("Error allocating compressor buffer space\n"); - goto free_workmem; - } - - num_pages = disksize >> PAGE_SHIFT; - meta->table = vzalloc(num_pages * sizeof(*meta->table)); - if (!meta->table) { - pr_err("Error allocating zram address table\n"); - goto free_buffer; - } - - meta->mem_pool = zs_create_pool(GFP_NOIO | __GFP_HIGHMEM); - if (!meta->mem_pool) { - pr_err("Error creating memory pool\n"); - goto free_table; - } - - return meta; - -free_table: - vfree(meta->table); -free_buffer: - free_pages((unsigned long)meta->compress_buffer, 1); -free_workmem: - kfree(meta->compress_workmem); -free_meta: - kfree(meta); - meta = NULL; -out: - return meta; -} - -static void update_position(u32 *index, int *offset, struct bio_vec *bvec) -{ - if (*offset + bvec->bv_len >= PAGE_SIZE) - (*index)++; - *offset = (*offset + bvec->bv_len) % PAGE_SIZE; -} - -static int page_zero_filled(void *ptr) -{ - unsigned int pos; - unsigned long *page; - - page = (unsigned long *)ptr; - - for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) { - if (page[pos]) - return 0; - } - - return 1; -} - -static void handle_zero_page(struct bio_vec *bvec) -{ - struct page *page = bvec->bv_page; - void *user_mem; - - user_mem = kmap_atomic(page); - if (is_partial_io(bvec)) - memset(user_mem + bvec->bv_offset, 0, bvec->bv_len); - else - clear_page(user_mem); - kunmap_atomic(user_mem); - - flush_dcache_page(page); -} - -static void zram_free_page(struct zram *zram, size_t index) -{ - struct zram_meta *meta = zram->meta; - unsigned long handle = meta->table[index].handle; - u16 size = meta->table[index].size; - - if (unlikely(!handle)) { - /* - * No memory is allocated for zero filled pages. - * Simply clear zero page flag. - */ - if (zram_test_flag(meta, index, ZRAM_ZERO)) { - zram_clear_flag(meta, index, ZRAM_ZERO); - zram->stats.pages_zero--; - } - return; - } - - if (unlikely(size > max_zpage_size)) - zram->stats.bad_compress--; - - zs_free(meta->mem_pool, handle); - - if (size <= PAGE_SIZE / 2) - zram->stats.good_compress--; - - atomic64_sub(meta->table[index].size, &zram->stats.compr_size); - zram->stats.pages_stored--; - - meta->table[index].handle = 0; - meta->table[index].size = 0; -} - -static int zram_decompress_page(struct zram *zram, char *mem, u32 index) -{ - int ret = LZO_E_OK; - size_t clen = PAGE_SIZE; - unsigned char *cmem; - struct zram_meta *meta = zram->meta; - unsigned long handle = meta->table[index].handle; - - if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) { - clear_page(mem); - return 0; - } - - cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO); - if (meta->table[index].size == PAGE_SIZE) - copy_page(mem, cmem); - else - ret = lzo1x_decompress_safe(cmem, meta->table[index].size, - mem, &clen); - zs_unmap_object(meta->mem_pool, handle); - - /* Should NEVER happen. Return bio error if it does. */ - if (unlikely(ret != LZO_E_OK)) { - pr_err("Decompression failed! err=%d, page=%u\n", ret, index); - atomic64_inc(&zram->stats.failed_reads); - return ret; - } - - return 0; -} - -static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, - u32 index, int offset, struct bio *bio) -{ - int ret; - struct page *page; - unsigned char *user_mem, *uncmem = NULL; - struct zram_meta *meta = zram->meta; - page = bvec->bv_page; - - if (unlikely(!meta->table[index].handle) || - zram_test_flag(meta, index, ZRAM_ZERO)) { - handle_zero_page(bvec); - return 0; - } - - if (is_partial_io(bvec)) - /* Use a temporary buffer to decompress the page */ - uncmem = kmalloc(PAGE_SIZE, GFP_NOIO); - - user_mem = kmap_atomic(page); - if (!is_partial_io(bvec)) - uncmem = user_mem; - - if (!uncmem) { - pr_info("Unable to allocate temp memory\n"); - ret = -ENOMEM; - goto out_cleanup; - } - - ret = zram_decompress_page(zram, uncmem, index); - /* Should NEVER happen. Return bio error if it does. */ - if (unlikely(ret != LZO_E_OK)) - goto out_cleanup; - - if (is_partial_io(bvec)) - memcpy(user_mem + bvec->bv_offset, uncmem + offset, - bvec->bv_len); - - flush_dcache_page(page); - ret = 0; -out_cleanup: - kunmap_atomic(user_mem); - if (is_partial_io(bvec)) - kfree(uncmem); - return ret; -} - -static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, - int offset) -{ - int ret = 0; - size_t clen; - unsigned long handle; - struct page *page; - unsigned char *user_mem, *cmem, *src, *uncmem = NULL; - struct zram_meta *meta = zram->meta; - - page = bvec->bv_page; - src = meta->compress_buffer; - - if (is_partial_io(bvec)) { - /* - * This is a partial IO. We need to read the full page - * before to write the changes. - */ - uncmem = kmalloc(PAGE_SIZE, GFP_NOIO); - if (!uncmem) { - ret = -ENOMEM; - goto out; - } - ret = zram_decompress_page(zram, uncmem, index); - if (ret) - goto out; - } - - user_mem = kmap_atomic(page); - - if (is_partial_io(bvec)) { - memcpy(uncmem + offset, user_mem + bvec->bv_offset, - bvec->bv_len); - kunmap_atomic(user_mem); - user_mem = NULL; - } else { - uncmem = user_mem; - } - - if (page_zero_filled(uncmem)) { - kunmap_atomic(user_mem); - /* Free memory associated with this sector now. */ - zram_free_page(zram, index); - - zram->stats.pages_zero++; - zram_set_flag(meta, index, ZRAM_ZERO); - ret = 0; - goto out; - } - - /* - * zram_slot_free_notify could miss free so that let's - * double check. - */ - if (unlikely(meta->table[index].handle || - zram_test_flag(meta, index, ZRAM_ZERO))) - zram_free_page(zram, index); - - ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen, - meta->compress_workmem); - - if (!is_partial_io(bvec)) { - kunmap_atomic(user_mem); - user_mem = NULL; - uncmem = NULL; - } - - if (unlikely(ret != LZO_E_OK)) { - pr_err("Compression failed! err=%d\n", ret); - goto out; - } - - if (unlikely(clen > max_zpage_size)) { - zram->stats.bad_compress++; - clen = PAGE_SIZE; - src = NULL; - if (is_partial_io(bvec)) - src = uncmem; - } - - handle = zs_malloc(meta->mem_pool, clen); - if (!handle) { - pr_info("Error allocating memory for compressed page: %u, size=%zu\n", - index, clen); - ret = -ENOMEM; - goto out; - } - cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO); - - if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) { - src = kmap_atomic(page); - copy_page(cmem, src); - kunmap_atomic(src); - } else { - memcpy(cmem, src, clen); - } - - zs_unmap_object(meta->mem_pool, handle); - - /* - * Free memory associated with this sector - * before overwriting unused sectors. - */ - zram_free_page(zram, index); - - meta->table[index].handle = handle; - meta->table[index].size = clen; - - /* Update stats */ - atomic64_add(clen, &zram->stats.compr_size); - zram->stats.pages_stored++; - if (clen <= PAGE_SIZE / 2) - zram->stats.good_compress++; - -out: - if (is_partial_io(bvec)) - kfree(uncmem); - - if (ret) - atomic64_inc(&zram->stats.failed_writes); - return ret; -} - -static void handle_pending_slot_free(struct zram *zram) -{ - struct zram_slot_free *free_rq; - - spin_lock(&zram->slot_free_lock); - while (zram->slot_free_rq) { - free_rq = zram->slot_free_rq; - zram->slot_free_rq = free_rq->next; - zram_free_page(zram, free_rq->index); - kfree(free_rq); - } - spin_unlock(&zram->slot_free_lock); -} - -static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, - int offset, struct bio *bio, int rw) -{ - int ret; - - if (rw == READ) { - down_read(&zram->lock); - handle_pending_slot_free(zram); - ret = zram_bvec_read(zram, bvec, index, offset, bio); - up_read(&zram->lock); - } else { - down_write(&zram->lock); - handle_pending_slot_free(zram); - ret = zram_bvec_write(zram, bvec, index, offset); - up_write(&zram->lock); - } - - return ret; -} - -static void zram_reset_device(struct zram *zram, bool reset_capacity) -{ - size_t index; - struct zram_meta *meta; - - flush_work(&zram->free_work); - - down_write(&zram->init_lock); - if (!zram->init_done) { - up_write(&zram->init_lock); - return; - } - - meta = zram->meta; - zram->init_done = 0; - - /* Free all pages that are still in this zram device */ - for (index = 0; index < zram->disksize >> PAGE_SHIFT; index++) { - unsigned long handle = meta->table[index].handle; - if (!handle) - continue; - - zs_free(meta->mem_pool, handle); - } - - zram_meta_free(zram->meta); - zram->meta = NULL; - /* Reset stats */ - memset(&zram->stats, 0, sizeof(zram->stats)); - - zram->disksize = 0; - if (reset_capacity) - set_capacity(zram->disk, 0); - up_write(&zram->init_lock); -} - -static void zram_init_device(struct zram *zram, struct zram_meta *meta) -{ - if (zram->disksize > 2 * (totalram_pages << PAGE_SHIFT)) { - pr_info( - "There is little point creating a zram of greater than " - "twice the size of memory since we expect a 2:1 compression " - "ratio. Note that zram uses about 0.1%% of the size of " - "the disk when not in use so a huge zram is " - "wasteful.\n" - "\tMemory Size: %lu kB\n" - "\tSize you selected: %llu kB\n" - "Continuing anyway ...\n", - (totalram_pages << PAGE_SHIFT) >> 10, zram->disksize >> 10 - ); - } - - /* zram devices sort of resembles non-rotational disks */ - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); - - zram->meta = meta; - zram->init_done = 1; - - pr_debug("Initialization done!\n"); -} - -static ssize_t disksize_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) -{ - u64 disksize; - struct zram_meta *meta; - struct zram *zram = dev_to_zram(dev); - - disksize = memparse(buf, NULL); - if (!disksize) - return -EINVAL; - - disksize = PAGE_ALIGN(disksize); - meta = zram_meta_alloc(disksize); - down_write(&zram->init_lock); - if (zram->init_done) { - up_write(&zram->init_lock); - zram_meta_free(meta); - pr_info("Cannot change disksize for initialized device\n"); - return -EBUSY; - } - - zram->disksize = disksize; - set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); - zram_init_device(zram, meta); - up_write(&zram->init_lock); - - return len; -} - -static ssize_t reset_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) -{ - int ret; - unsigned short do_reset; - struct zram *zram; - struct block_device *bdev; - - zram = dev_to_zram(dev); - bdev = bdget_disk(zram->disk, 0); - - if (!bdev) - return -ENOMEM; - - /* Do not reset an active device! */ - if (bdev->bd_holders) { - ret = -EBUSY; - goto out; - } - - ret = kstrtou16(buf, 10, &do_reset); - if (ret) - goto out; - - if (!do_reset) { - ret = -EINVAL; - goto out; - } - - /* Make sure all pending I/O is finished */ - fsync_bdev(bdev); - bdput(bdev); - - zram_reset_device(zram, true); - return len; - -out: - bdput(bdev); - return ret; -} - -static void __zram_make_request(struct zram *zram, struct bio *bio, int rw) -{ - int i, offset; - u32 index; - struct bio_vec *bvec; - - switch (rw) { - case READ: - atomic64_inc(&zram->stats.num_reads); - break; - case WRITE: - atomic64_inc(&zram->stats.num_writes); - break; - } - - index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT; - offset = (bio->bi_sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; - - bio_for_each_segment(bvec, bio, i) { - int max_transfer_size = PAGE_SIZE - offset; - - if (bvec->bv_len > max_transfer_size) { - /* - * zram_bvec_rw() can only make operation on a single - * zram page. Split the bio vector. - */ - struct bio_vec bv; - - bv.bv_page = bvec->bv_page; - bv.bv_len = max_transfer_size; - bv.bv_offset = bvec->bv_offset; - - if (zram_bvec_rw(zram, &bv, index, offset, bio, rw) < 0) - goto out; - - bv.bv_len = bvec->bv_len - max_transfer_size; - bv.bv_offset += max_transfer_size; - if (zram_bvec_rw(zram, &bv, index+1, 0, bio, rw) < 0) - goto out; - } else - if (zram_bvec_rw(zram, bvec, index, offset, bio, rw) - < 0) - goto out; - - update_position(&index, &offset, bvec); - } - - set_bit(BIO_UPTODATE, &bio->bi_flags); - bio_endio(bio, 0); - return; - -out: - bio_io_error(bio); -} - -/* - * Handler function for all zram I/O requests. - */ -static void zram_make_request(struct request_queue *queue, struct bio *bio) -{ - struct zram *zram = queue->queuedata; - - down_read(&zram->init_lock); - if (unlikely(!zram->init_done)) - goto error; - - if (!valid_io_request(zram, bio)) { - atomic64_inc(&zram->stats.invalid_io); - goto error; - } - - __zram_make_request(zram, bio, bio_data_dir(bio)); - up_read(&zram->init_lock); - - return; - -error: - up_read(&zram->init_lock); - bio_io_error(bio); -} - -static void zram_slot_free(struct work_struct *work) -{ - struct zram *zram; - - zram = container_of(work, struct zram, free_work); - down_write(&zram->lock); - handle_pending_slot_free(zram); - up_write(&zram->lock); -} - -static void add_slot_free(struct zram *zram, struct zram_slot_free *free_rq) -{ - spin_lock(&zram->slot_free_lock); - free_rq->next = zram->slot_free_rq; - zram->slot_free_rq = free_rq; - spin_unlock(&zram->slot_free_lock); -} - -static void zram_slot_free_notify(struct block_device *bdev, - unsigned long index) -{ - struct zram *zram; - struct zram_slot_free *free_rq; - - zram = bdev->bd_disk->private_data; - atomic64_inc(&zram->stats.notify_free); - - free_rq = kmalloc(sizeof(struct zram_slot_free), GFP_ATOMIC); - if (!free_rq) - return; - - free_rq->index = index; - add_slot_free(zram, free_rq); - schedule_work(&zram->free_work); -} - -static const struct block_device_operations zram_devops = { - .swap_slot_free_notify = zram_slot_free_notify, - .owner = THIS_MODULE -}; - -static DEVICE_ATTR(disksize, S_IRUGO | S_IWUSR, - disksize_show, disksize_store); -static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL); -static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store); -static DEVICE_ATTR(num_reads, S_IRUGO, num_reads_show, NULL); -static DEVICE_ATTR(num_writes, S_IRUGO, num_writes_show, NULL); -static DEVICE_ATTR(invalid_io, S_IRUGO, invalid_io_show, NULL); -static DEVICE_ATTR(notify_free, S_IRUGO, notify_free_show, NULL); -static DEVICE_ATTR(zero_pages, S_IRUGO, zero_pages_show, NULL); -static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL); -static DEVICE_ATTR(compr_data_size, S_IRUGO, compr_data_size_show, NULL); -static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL); - -static struct attribute *zram_disk_attrs[] = { - &dev_attr_disksize.attr, - &dev_attr_initstate.attr, - &dev_attr_reset.attr, - &dev_attr_num_reads.attr, - &dev_attr_num_writes.attr, - &dev_attr_invalid_io.attr, - &dev_attr_notify_free.attr, - &dev_attr_zero_pages.attr, - &dev_attr_orig_data_size.attr, - &dev_attr_compr_data_size.attr, - &dev_attr_mem_used_total.attr, - NULL, -}; - -static struct attribute_group zram_disk_attr_group = { - .attrs = zram_disk_attrs, -}; - -static int create_device(struct zram *zram, int device_id) -{ - int ret = -ENOMEM; - - init_rwsem(&zram->lock); - init_rwsem(&zram->init_lock); - - INIT_WORK(&zram->free_work, zram_slot_free); - spin_lock_init(&zram->slot_free_lock); - zram->slot_free_rq = NULL; - - zram->queue = blk_alloc_queue(GFP_KERNEL); - if (!zram->queue) { - pr_err("Error allocating disk queue for device %d\n", - device_id); - goto out; - } - - blk_queue_make_request(zram->queue, zram_make_request); - zram->queue->queuedata = zram; - - /* gendisk structure */ - zram->disk = alloc_disk(1); - if (!zram->disk) { - pr_warn("Error allocating disk structure for device %d\n", - device_id); - goto out_free_queue; - } - - zram->disk->major = zram_major; - zram->disk->first_minor = device_id; - zram->disk->fops = &zram_devops; - zram->disk->queue = zram->queue; - zram->disk->private_data = zram; - snprintf(zram->disk->disk_name, 16, "zram%d", device_id); - - /* Actual capacity set using syfs (/sys/block/zram/disksize */ - set_capacity(zram->disk, 0); - - /* - * To ensure that we always get PAGE_SIZE aligned - * and n*PAGE_SIZED sized I/O requests. - */ - blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE); - blk_queue_logical_block_size(zram->disk->queue, - ZRAM_LOGICAL_BLOCK_SIZE); - blk_queue_io_min(zram->disk->queue, PAGE_SIZE); - blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); - - add_disk(zram->disk); - - ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj, - &zram_disk_attr_group); - if (ret < 0) { - pr_warn("Error creating sysfs group"); - goto out_free_disk; - } - - zram->init_done = 0; - return 0; - -out_free_disk: - del_gendisk(zram->disk); - put_disk(zram->disk); -out_free_queue: - blk_cleanup_queue(zram->queue); -out: - return ret; -} - -static void destroy_device(struct zram *zram) -{ - sysfs_remove_group(&disk_to_dev(zram->disk)->kobj, - &zram_disk_attr_group); - - del_gendisk(zram->disk); - put_disk(zram->disk); - - blk_cleanup_queue(zram->queue); -} - -static int __init zram_init(void) -{ - int ret, dev_id; - - if (num_devices > max_num_devices) { - pr_warn("Invalid value for num_devices: %u\n", - num_devices); - ret = -EINVAL; - goto out; - } - - zram_major = register_blkdev(0, "zram"); - if (zram_major <= 0) { - pr_warn("Unable to get major number\n"); - ret = -EBUSY; - goto out; - } - - /* Allocate the device array and initialize each one */ - zram_devices = kzalloc(num_devices * sizeof(struct zram), GFP_KERNEL); - if (!zram_devices) { - ret = -ENOMEM; - goto unregister; - } - - for (dev_id = 0; dev_id < num_devices; dev_id++) { - ret = create_device(&zram_devices[dev_id], dev_id); - if (ret) - goto free_devices; - } - - pr_info("Created %u device(s) ...\n", num_devices); - - return 0; - -free_devices: - while (dev_id) - destroy_device(&zram_devices[--dev_id]); - kfree(zram_devices); -unregister: - unregister_blkdev(zram_major, "zram"); -out: - return ret; -} - -static void __exit zram_exit(void) -{ - int i; - struct zram *zram; - - for (i = 0; i < num_devices; i++) { - zram = &zram_devices[i]; - - destroy_device(zram); - /* - * Shouldn't access zram->disk after destroy_device - * because destroy_device already released zram->disk. - */ - zram_reset_device(zram, false); - } - - unregister_blkdev(zram_major, "zram"); - - kfree(zram_devices); - pr_debug("Cleanup done!\n"); -} - -module_init(zram_init); -module_exit(zram_exit); - -module_param(num_devices, uint, 0); -MODULE_PARM_DESC(num_devices, "Number of zram devices"); - -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_AUTHOR("Nitin Gupta "); -MODULE_DESCRIPTION("Compressed RAM Block Device"); diff --git a/drivers/staging/zram/zram_drv.h b/drivers/staging/zram/zram_drv.h deleted file mode 100644 index d8f6596513c3..000000000000 --- a/drivers/staging/zram/zram_drv.h +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Compressed RAM block device - * - * Copyright (C) 2008, 2009, 2010 Nitin Gupta - * - * This code is released using a dual license strategy: BSD/GPL - * You can choose the licence that better fits your requirements. - * - * Released under the terms of 3-clause BSD License - * Released under the terms of GNU General Public License Version 2.0 - * - * Project home: http://compcache.googlecode.com - */ - -#ifndef _ZRAM_DRV_H_ -#define _ZRAM_DRV_H_ - -#include -#include -#include - -/* - * Some arbitrary value. This is just to catch - * invalid value for num_devices module parameter. - */ -static const unsigned max_num_devices = 32; - -/*-- Configurable parameters */ - -/* - * Pages that compress to size greater than this are stored - * uncompressed in memory. - */ -static const size_t max_zpage_size = PAGE_SIZE / 4 * 3; - -/* - * NOTE: max_zpage_size must be less than or equal to: - * ZS_MAX_ALLOC_SIZE. Otherwise, zs_malloc() would - * always return failure. - */ - -/*-- End of configurable params */ - -#define SECTOR_SHIFT 9 -#define SECTOR_SIZE (1 << SECTOR_SHIFT) -#define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) -#define SECTORS_PER_PAGE (1 << SECTORS_PER_PAGE_SHIFT) -#define ZRAM_LOGICAL_BLOCK_SHIFT 12 -#define ZRAM_LOGICAL_BLOCK_SIZE (1 << ZRAM_LOGICAL_BLOCK_SHIFT) -#define ZRAM_SECTOR_PER_LOGICAL_BLOCK \ - (1 << (ZRAM_LOGICAL_BLOCK_SHIFT - SECTOR_SHIFT)) - -/* Flags for zram pages (table[page_no].flags) */ -enum zram_pageflags { - /* Page consists entirely of zeros */ - ZRAM_ZERO, - - __NR_ZRAM_PAGEFLAGS, -}; - -/*-- Data structures */ - -/* Allocated for each disk page */ -struct table { - unsigned long handle; - u16 size; /* object size (excluding header) */ - u8 count; /* object ref count (not yet used) */ - u8 flags; -} __aligned(4); - -/* - * All 64bit fields should only be manipulated by 64bit atomic accessors. - * All modifications to 32bit counter should be protected by zram->lock. - */ -struct zram_stats { - atomic64_t compr_size; /* compressed size of pages stored */ - atomic64_t num_reads; /* failed + successful */ - atomic64_t num_writes; /* --do-- */ - atomic64_t failed_reads; /* should NEVER! happen */ - atomic64_t failed_writes; /* can happen when memory is too low */ - atomic64_t invalid_io; /* non-page-aligned I/O requests */ - atomic64_t notify_free; /* no. of swap slot free notifications */ - u32 pages_zero; /* no. of zero filled pages */ - u32 pages_stored; /* no. of pages currently stored */ - u32 good_compress; /* % of pages with compression ratio<=50% */ - u32 bad_compress; /* % of pages with compression ratio>=75% */ -}; - -struct zram_meta { - void *compress_workmem; - void *compress_buffer; - struct table *table; - struct zs_pool *mem_pool; -}; - -struct zram_slot_free { - unsigned long index; - struct zram_slot_free *next; -}; - -struct zram { - struct zram_meta *meta; - struct rw_semaphore lock; /* protect compression buffers, table, - * 32bit stat counters against concurrent - * notifications, reads and writes */ - - struct work_struct free_work; /* handle pending free request */ - struct zram_slot_free *slot_free_rq; /* list head of free request */ - - struct request_queue *queue; - struct gendisk *disk; - int init_done; - /* Prevent concurrent execution of device init, reset and R/W request */ - struct rw_semaphore init_lock; - /* - * This is the limit on amount of *uncompressed* worth of data - * we can store in a disk. - */ - u64 disksize; /* bytes */ - spinlock_t slot_free_lock; - - struct zram_stats stats; -}; -#endif -- cgit v1.2.3 From 15db1d2f8df0721faf81c4e23119a716e537b6cc Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 30 Jan 2014 15:45:54 -0800 Subject: zram: remove old private project comment Remove the old private compcache project address so upcoming patches should be sent to LKML because we Linux kernel community will take care. Signed-off-by: Minchan Kim Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 49061236a9c2e18b31617cef10d27ba136068bac) Signed-off-by: Alex Shi --- drivers/block/zram/Kconfig | 1 - drivers/block/zram/zram_drv.c | 1 - drivers/block/zram/zram_drv.h | 1 - 3 files changed, 3 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig index 983314c41349..3450be850399 100644 --- a/drivers/block/zram/Kconfig +++ b/drivers/block/zram/Kconfig @@ -14,7 +14,6 @@ config ZRAM disks and maybe many more. See zram.txt for more information. - Project home: config ZRAM_DEBUG bool "Compressed RAM block device debug support" diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 689ebf105acd..4c492eade671 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -9,7 +9,6 @@ * Released under the terms of 3-clause BSD License * Released under the terms of GNU General Public License Version 2.0 * - * Project home: http://compcache.googlecode.com */ #define KMSG_COMPONENT "zram" diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index d8f6596513c3..92f70e8f457c 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -9,7 +9,6 @@ * Released under the terms of 3-clause BSD License * Released under the terms of GNU General Public License Version 2.0 * - * Project home: http://compcache.googlecode.com */ #ifndef _ZRAM_DRV_H_ -- cgit v1.2.3 From 851a07391ee1cea24cc2a2b099e12f5ce55edca9 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 30 Jan 2014 15:45:55 -0800 Subject: zram: add copyright Add my copyright to the zram source code which I maintain. Signed-off-by: Minchan Kim Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 7bfb3de8a1b3bebc2dc68d381efe27448c0584c5) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 1 + drivers/block/zram/zram_drv.h | 1 + 2 files changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 4c492eade671..d66b404fc535 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -2,6 +2,7 @@ * Compressed RAM block device * * Copyright (C) 2008, 2009, 2010 Nitin Gupta + * 2012, 2013 Minchan Kim * * This code is released using a dual license strategy: BSD/GPL * You can choose the licence that better fits your requirements. diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 92f70e8f457c..0e46953c08e9 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -2,6 +2,7 @@ * Compressed RAM block device * * Copyright (C) 2008, 2009, 2010 Nitin Gupta + * 2012, 2013 Minchan Kim * * This code is released using a dual license strategy: BSD/GPL * You can choose the licence that better fits your requirements. -- cgit v1.2.3 From fbd4d659587f3be893860c57e7b338fe3f45284d Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 30 Jan 2014 15:45:58 -0800 Subject: zram: fix race between reset and flushing pending work Dan and Sergey reported that there is a racy between reset and flushing of pending work so that it could make oops by freeing zram->meta in reset while zram_slot_free can access zram->meta if new request is adding during the race window. This patch moves flush after taking init_lock so it prevents new request so that it closes the race. Signed-off-by: Minchan Kim Reported-by: Dan Carpenter Cc: Nitin Gupta Cc: Jerome Marchand Tested-by: Sergey Senozhatsky Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit da4a04126baa3be03bc566d4a2ee0944c5e783d0) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index d66b404fc535..1a377872729d 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -552,14 +552,14 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) size_t index; struct zram_meta *meta; - flush_work(&zram->free_work); - down_write(&zram->init_lock); if (!zram->init_done) { up_write(&zram->init_lock); return; } + flush_work(&zram->free_work); + meta = zram->meta; zram->init_done = 0; -- cgit v1.2.3 From ee76411779c7fb286de437bd15367ff39ae88e6a Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 30 Jan 2014 15:46:00 -0800 Subject: zram: delay pending free request in read path Sergey reported we don't need to handle pending free request every I/O so that this patch removes it in read path while we remain it in write path. Let's consider below example. Swap subsystem ask to zram "A" block free by swap_slot_free_notify but zram had been pended it without real freeing. Swap subsystem allocates "A" block for new data but request pended for a long time just handled and zram blindly free new data on the "A" block. :( That's why we couldn't remove handle pending free request right before zram-write. Signed-off-by: Minchan Kim Reported-by: Sergey Senozhatsky Tested-by: Sergey Senozhatsky Cc: Nitin Gupta Cc: Jerome Marchand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 9b353db16d18f87242337e3e61a948c023505a65) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 1a377872729d..8b88a8d064af 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -534,7 +534,6 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, if (rw == READ) { down_read(&zram->lock); - handle_pending_slot_free(zram); ret = zram_bvec_read(zram, bvec, index, offset, bio); up_read(&zram->lock); } else { -- cgit v1.2.3 From 6eb4a8a4531c2411703a9c43428ad9de7e762a2f Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 30 Jan 2014 15:46:01 -0800 Subject: zram: remove unnecessary free Commit a0c516cbfc74 ("zram: don't grab mutex in zram_slot_free_noity") introduced pending zram slot free in zram's write path in case of missing slot free by memory allocation failure in zram_slot_free_notify but it is not necessary because we have already freed the slot right before overwriting. Signed-off-by: Minchan Kim Cc: Nitin Gupta Cc: Jerome Marchand Tested-by: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 874e3cddc33f0c0f9cc08ad2b73fa0cbe7dfaa63) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 8b88a8d064af..a9c236de4676 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -440,14 +440,6 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, goto out; } - /* - * zram_slot_free_notify could miss free so that let's - * double check. - */ - if (unlikely(meta->table[index].handle || - zram_test_flag(meta, index, ZRAM_ZERO))) - zram_free_page(zram, index); - ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen, meta->compress_workmem); -- cgit v1.2.3 From 671b5561a6c05a36da64a2b798b1b52dada37c2c Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 30 Jan 2014 15:46:02 -0800 Subject: zram: use atomic operation for stat Some of fields in zram->stats are protected by zram->lock which is rather coarse-grained so let's use atomic operation without explict locking. This patch is ready for removing dependency of zram->lock in read path which is very coarse-grained rw_semaphore. Of course, this patch adds new atomic operation so it might make slow but my 12CPU test couldn't spot any regression. All gain/lose is marginal within stddev. iozone -t -T -l 12 -u 12 -r 16K -s 60M -I +Z -V 0 ==Initial write ==Initial write records: 50 records: 50 avg: 412875.17 avg: 415638.23 std: 38543.12 (9.34%) std: 36601.11 (8.81%) max: 521262.03 max: 502976.72 min: 343263.13 min: 351389.12 ==Rewrite ==Rewrite records: 50 records: 50 avg: 416640.34 avg: 397914.33 std: 60798.92 (14.59%) std: 46150.42 (11.60%) max: 543057.07 max: 522669.17 min: 304071.67 min: 316588.77 ==Read ==Read records: 50 records: 50 avg: 4147338.63 avg: 4070736.51 std: 179333.25 (4.32%) std: 223499.89 (5.49%) max: 4459295.28 max: 4539514.44 min: 3753057.53 min: 3444686.31 ==Re-read ==Re-read records: 50 records: 50 avg: 4096706.71 avg: 4117218.57 std: 229735.04 (5.61%) std: 171676.25 (4.17%) max: 4430012.09 max: 4459263.94 min: 2987217.80 min: 3666904.28 ==Reverse Read ==Reverse Read records: 50 records: 50 avg: 4062763.83 avg: 4078508.32 std: 186208.46 (4.58%) std: 172684.34 (4.23%) max: 4401358.78 max: 4424757.22 min: 3381625.00 min: 3679359.94 ==Stride read ==Stride read records: 50 records: 50 avg: 4094933.49 avg: 4082170.22 std: 185710.52 (4.54%) std: 196346.68 (4.81%) max: 4478241.25 max: 4460060.97 min: 3732593.23 min: 3584125.78 ==Random read ==Random read records: 50 records: 50 avg: 4031070.04 avg: 4074847.49 std: 192065.51 (4.76%) std: 206911.33 (5.08%) max: 4356931.16 max: 4399442.56 min: 3481619.62 min: 3548372.44 ==Mixed workload ==Mixed workload records: 50 records: 50 avg: 149925.73 avg: 149675.54 std: 7701.26 (5.14%) std: 6902.09 (4.61%) max: 191301.56 max: 175162.05 min: 133566.28 min: 137762.87 ==Random write ==Random write records: 50 records: 50 avg: 404050.11 avg: 393021.47 std: 58887.57 (14.57%) std: 42813.70 (10.89%) max: 601798.09 max: 524533.43 min: 325176.99 min: 313255.34 ==Pwrite ==Pwrite records: 50 records: 50 avg: 411217.70 avg: 411237.96 std: 43114.99 (10.48%) std: 33136.29 (8.06%) max: 530766.79 max: 471899.76 min: 320786.84 min: 317906.94 ==Pread ==Pread records: 50 records: 50 avg: 4154908.65 avg: 4087121.92 std: 151272.08 (3.64%) std: 219505.04 (5.37%) max: 4459478.12 max: 4435857.38 min: 3730512.41 min: 3101101.67 Signed-off-by: Minchan Kim Cc: Nitin Gupta Tested-by: Sergey Senozhatsky Cc: Jerome Marchand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit deb0bdeb2f3d6b81d37fc778316dae46b6daab56) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 20 ++++++++++---------- drivers/block/zram/zram_drv.h | 16 ++++++---------- 2 files changed, 16 insertions(+), 20 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index a9c236de4676..0b53d1db7eaf 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -104,7 +104,7 @@ static ssize_t zero_pages_show(struct device *dev, { struct zram *zram = dev_to_zram(dev); - return sprintf(buf, "%u\n", zram->stats.pages_zero); + return sprintf(buf, "%u\n", atomic_read(&zram->stats.pages_zero)); } static ssize_t orig_data_size_show(struct device *dev, @@ -113,7 +113,7 @@ static ssize_t orig_data_size_show(struct device *dev, struct zram *zram = dev_to_zram(dev); return sprintf(buf, "%llu\n", - (u64)(zram->stats.pages_stored) << PAGE_SHIFT); + (u64)(atomic_read(&zram->stats.pages_stored)) << PAGE_SHIFT); } static ssize_t compr_data_size_show(struct device *dev, @@ -292,21 +292,21 @@ static void zram_free_page(struct zram *zram, size_t index) */ if (zram_test_flag(meta, index, ZRAM_ZERO)) { zram_clear_flag(meta, index, ZRAM_ZERO); - zram->stats.pages_zero--; + atomic_dec(&zram->stats.pages_zero); } return; } if (unlikely(size > max_zpage_size)) - zram->stats.bad_compress--; + atomic_dec(&zram->stats.bad_compress); zs_free(meta->mem_pool, handle); if (size <= PAGE_SIZE / 2) - zram->stats.good_compress--; + atomic_dec(&zram->stats.good_compress); atomic64_sub(meta->table[index].size, &zram->stats.compr_size); - zram->stats.pages_stored--; + atomic_dec(&zram->stats.pages_stored); meta->table[index].handle = 0; meta->table[index].size = 0; @@ -434,7 +434,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, /* Free memory associated with this sector now. */ zram_free_page(zram, index); - zram->stats.pages_zero++; + atomic_inc(&zram->stats.pages_zero); zram_set_flag(meta, index, ZRAM_ZERO); ret = 0; goto out; @@ -455,7 +455,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, } if (unlikely(clen > max_zpage_size)) { - zram->stats.bad_compress++; + atomic_inc(&zram->stats.bad_compress); clen = PAGE_SIZE; src = NULL; if (is_partial_io(bvec)) @@ -492,9 +492,9 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, /* Update stats */ atomic64_add(clen, &zram->stats.compr_size); - zram->stats.pages_stored++; + atomic_inc(&zram->stats.pages_stored); if (clen <= PAGE_SIZE / 2) - zram->stats.good_compress++; + atomic_inc(&zram->stats.good_compress); out: if (is_partial_io(bvec)) diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 0e46953c08e9..81b0170de369 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -68,10 +68,6 @@ struct table { u8 flags; } __aligned(4); -/* - * All 64bit fields should only be manipulated by 64bit atomic accessors. - * All modifications to 32bit counter should be protected by zram->lock. - */ struct zram_stats { atomic64_t compr_size; /* compressed size of pages stored */ atomic64_t num_reads; /* failed + successful */ @@ -80,10 +76,10 @@ struct zram_stats { atomic64_t failed_writes; /* can happen when memory is too low */ atomic64_t invalid_io; /* non-page-aligned I/O requests */ atomic64_t notify_free; /* no. of swap slot free notifications */ - u32 pages_zero; /* no. of zero filled pages */ - u32 pages_stored; /* no. of pages currently stored */ - u32 good_compress; /* % of pages with compression ratio<=50% */ - u32 bad_compress; /* % of pages with compression ratio>=75% */ + atomic_t pages_zero; /* no. of zero filled pages */ + atomic_t pages_stored; /* no. of pages currently stored */ + atomic_t good_compress; /* % of pages with compression ratio<=50% */ + atomic_t bad_compress; /* % of pages with compression ratio>=75% */ }; struct zram_meta { @@ -101,8 +97,8 @@ struct zram_slot_free { struct zram { struct zram_meta *meta; struct rw_semaphore lock; /* protect compression buffers, table, - * 32bit stat counters against concurrent - * notifications, reads and writes */ + * reads and writes + */ struct work_struct free_work; /* handle pending free request */ struct zram_slot_free *slot_free_rq; /* list head of free request */ -- cgit v1.2.3 From 76b3c1eb150766da298e3c68074dbd1401f22a7c Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 30 Jan 2014 15:46:03 -0800 Subject: zram: introduce zram->tb_lock Currently, the zram table is protected by zram->lock but it's rather coarse-grained lock and it makes hard for scalibility. Let's use own rwlock instead of depending on zram->lock. This patch adds new locking so obviously, it would make slow but this patch is just prepartion for removing coarse-grained rw_semaphore(ie, zram->lock) which is hurdle about zram scalability. Final patch in this patchset series will remove the lock from read-path and change rw_semaphore with mutex in write path. With bonus, we could drop pending slot free mess in next patch. Signed-off-by: Minchan Kim Cc: Nitin Gupta Tested-by: Sergey Senozhatsky Cc: Jerome Marchand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 92967471b67163bb1654e9b7fe99449ab70a4aaa) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 26 +++++++++++++++++++++----- drivers/block/zram/zram_drv.h | 3 ++- 2 files changed, 23 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 0b53d1db7eaf..42358b61e360 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -140,6 +140,7 @@ static ssize_t mem_used_total_show(struct device *dev, return sprintf(buf, "%llu\n", val); } +/* flag operations needs meta->tb_lock */ static int zram_test_flag(struct zram_meta *meta, u32 index, enum zram_pageflags flag) { @@ -227,6 +228,7 @@ static struct zram_meta *zram_meta_alloc(u64 disksize) goto free_table; } + rwlock_init(&meta->tb_lock); return meta; free_table: @@ -279,6 +281,7 @@ static void handle_zero_page(struct bio_vec *bvec) flush_dcache_page(page); } +/* NOTE: caller should hold meta->tb_lock with write-side */ static void zram_free_page(struct zram *zram, size_t index) { struct zram_meta *meta = zram->meta; @@ -318,20 +321,26 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) size_t clen = PAGE_SIZE; unsigned char *cmem; struct zram_meta *meta = zram->meta; - unsigned long handle = meta->table[index].handle; + unsigned long handle; + u16 size; + + read_lock(&meta->tb_lock); + handle = meta->table[index].handle; + size = meta->table[index].size; if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) { + read_unlock(&meta->tb_lock); clear_page(mem); return 0; } cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO); - if (meta->table[index].size == PAGE_SIZE) + if (size == PAGE_SIZE) copy_page(mem, cmem); else - ret = lzo1x_decompress_safe(cmem, meta->table[index].size, - mem, &clen); + ret = lzo1x_decompress_safe(cmem, size, mem, &clen); zs_unmap_object(meta->mem_pool, handle); + read_unlock(&meta->tb_lock); /* Should NEVER happen. Return bio error if it does. */ if (unlikely(ret != LZO_E_OK)) { @@ -352,11 +361,14 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, struct zram_meta *meta = zram->meta; page = bvec->bv_page; + read_lock(&meta->tb_lock); if (unlikely(!meta->table[index].handle) || zram_test_flag(meta, index, ZRAM_ZERO)) { + read_unlock(&meta->tb_lock); handle_zero_page(bvec); return 0; } + read_unlock(&meta->tb_lock); if (is_partial_io(bvec)) /* Use a temporary buffer to decompress the page */ @@ -432,10 +444,12 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, if (page_zero_filled(uncmem)) { kunmap_atomic(user_mem); /* Free memory associated with this sector now. */ + write_lock(&zram->meta->tb_lock); zram_free_page(zram, index); + zram_set_flag(meta, index, ZRAM_ZERO); + write_unlock(&zram->meta->tb_lock); atomic_inc(&zram->stats.pages_zero); - zram_set_flag(meta, index, ZRAM_ZERO); ret = 0; goto out; } @@ -485,10 +499,12 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, * Free memory associated with this sector * before overwriting unused sectors. */ + write_lock(&zram->meta->tb_lock); zram_free_page(zram, index); meta->table[index].handle = handle; meta->table[index].size = clen; + write_unlock(&zram->meta->tb_lock); /* Update stats */ atomic64_add(clen, &zram->stats.compr_size); diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 81b0170de369..c3f453f04974 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -83,6 +83,7 @@ struct zram_stats { }; struct zram_meta { + rwlock_t tb_lock; /* protect table */ void *compress_workmem; void *compress_buffer; struct table *table; @@ -96,7 +97,7 @@ struct zram_slot_free { struct zram { struct zram_meta *meta; - struct rw_semaphore lock; /* protect compression buffers, table, + struct rw_semaphore lock; /* protect compression buffers, * reads and writes */ -- cgit v1.2.3 From fa5b73b76279cd7e88743b4b8ec2fa61478d6776 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 30 Jan 2014 15:46:04 -0800 Subject: zram: remove workqueue for freeing removed pending slot Commit a0c516cbfc74 ("zram: don't grab mutex in zram_slot_free_noity") introduced free request pending code to avoid scheduling by mutex under spinlock and it was a mess which made code lenghty and increased overhead. Now, we don't need zram->lock any more to free slot so this patch reverts it and then, tb_lock should protect it. Signed-off-by: Minchan Kim Cc: Nitin Gupta Tested-by: Sergey Senozhatsky Cc: Jerome Marchand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit f614a9f48dedd2b80d1dc8bae8094842fcdb39dd) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 54 +++++-------------------------------------- drivers/block/zram/zram_drv.h | 10 -------- 2 files changed, 6 insertions(+), 58 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 42358b61e360..3e797b844377 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -521,20 +521,6 @@ out: return ret; } -static void handle_pending_slot_free(struct zram *zram) -{ - struct zram_slot_free *free_rq; - - spin_lock(&zram->slot_free_lock); - while (zram->slot_free_rq) { - free_rq = zram->slot_free_rq; - zram->slot_free_rq = free_rq->next; - zram_free_page(zram, free_rq->index); - kfree(free_rq); - } - spin_unlock(&zram->slot_free_lock); -} - static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, int offset, struct bio *bio, int rw) { @@ -546,7 +532,6 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, up_read(&zram->lock); } else { down_write(&zram->lock); - handle_pending_slot_free(zram); ret = zram_bvec_write(zram, bvec, index, offset); up_write(&zram->lock); } @@ -565,8 +550,6 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) return; } - flush_work(&zram->free_work); - meta = zram->meta; zram->init_done = 0; @@ -766,40 +749,19 @@ error: bio_io_error(bio); } -static void zram_slot_free(struct work_struct *work) -{ - struct zram *zram; - - zram = container_of(work, struct zram, free_work); - down_write(&zram->lock); - handle_pending_slot_free(zram); - up_write(&zram->lock); -} - -static void add_slot_free(struct zram *zram, struct zram_slot_free *free_rq) -{ - spin_lock(&zram->slot_free_lock); - free_rq->next = zram->slot_free_rq; - zram->slot_free_rq = free_rq; - spin_unlock(&zram->slot_free_lock); -} - static void zram_slot_free_notify(struct block_device *bdev, unsigned long index) { struct zram *zram; - struct zram_slot_free *free_rq; + struct zram_meta *meta; zram = bdev->bd_disk->private_data; - atomic64_inc(&zram->stats.notify_free); - - free_rq = kmalloc(sizeof(struct zram_slot_free), GFP_ATOMIC); - if (!free_rq) - return; + meta = zram->meta; - free_rq->index = index; - add_slot_free(zram, free_rq); - schedule_work(&zram->free_work); + write_lock(&meta->tb_lock); + zram_free_page(zram, index); + write_unlock(&meta->tb_lock); + atomic64_inc(&zram->stats.notify_free); } static const struct block_device_operations zram_devops = { @@ -846,10 +808,6 @@ static int create_device(struct zram *zram, int device_id) init_rwsem(&zram->lock); init_rwsem(&zram->init_lock); - INIT_WORK(&zram->free_work, zram_slot_free); - spin_lock_init(&zram->slot_free_lock); - zram->slot_free_rq = NULL; - zram->queue = blk_alloc_queue(GFP_KERNEL); if (!zram->queue) { pr_err("Error allocating disk queue for device %d\n", diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index c3f453f04974..d876300da6c9 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -90,20 +90,11 @@ struct zram_meta { struct zs_pool *mem_pool; }; -struct zram_slot_free { - unsigned long index; - struct zram_slot_free *next; -}; - struct zram { struct zram_meta *meta; struct rw_semaphore lock; /* protect compression buffers, * reads and writes */ - - struct work_struct free_work; /* handle pending free request */ - struct zram_slot_free *slot_free_rq; /* list head of free request */ - struct request_queue *queue; struct gendisk *disk; int init_done; @@ -114,7 +105,6 @@ struct zram { * we can store in a disk. */ u64 disksize; /* bytes */ - spinlock_t slot_free_lock; struct zram_stats stats; }; -- cgit v1.2.3 From 3853d83925871721c6ee7b2bc40126183129a3ac Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 30 Jan 2014 15:46:06 -0800 Subject: zram: remove zram->lock in read path and change it with mutex Finally, we separated zram->lock dependency from 32bit stat/ table handling so there is no reason to use rw_semaphore between read and write path so this patch removes the lock from read path totally and changes rw_semaphore with mutex. So, we could do old: read-read: OK read-write: NO write-write: NO Now: read-read: OK read-write: OK write-write: NO The below data proves mixed workload performs well 11 times and there is also enhance on write-write path because current rw-semaphore doesn't support SPIN_ON_OWNER. It's side effect but anyway good thing for us. Write-related tests perform better (from 61% to 1058%) but read path has good/bad(from -2.22% to 1.45%) but they are all marginal within stddev. CPU 12 iozone -t -T -l 12 -u 12 -r 16K -s 60M -I +Z -V 0 ==Initial write ==Initial write records: 10 records: 10 avg: 516189.16 avg: 839907.96 std: 22486.53 (4.36%) std: 47902.17 (5.70%) max: 546970.60 max: 909910.35 min: 481131.54 min: 751148.38 ==Rewrite ==Rewrite records: 10 records: 10 avg: 509527.98 avg: 1050156.37 std: 45799.94 (8.99%) std: 40695.44 (3.88%) max: 611574.27 max: 1111929.26 min: 443679.95 min: 980409.62 ==Read ==Read records: 10 records: 10 avg: 4408624.17 avg: 4472546.76 std: 281152.61 (6.38%) std: 163662.78 (3.66%) max: 4867888.66 max: 4727351.03 min: 4058347.69 min: 4126520.88 ==Re-read ==Re-read records: 10 records: 10 avg: 4462147.53 avg: 4363257.75 std: 283546.11 (6.35%) std: 247292.63 (5.67%) max: 4912894.44 max: 4677241.75 min: 4131386.50 min: 4035235.84 ==Reverse Read ==Reverse Read records: 10 records: 10 avg: 4565865.97 avg: 4485818.08 std: 313395.63 (6.86%) std: 248470.10 (5.54%) max: 5232749.16 max: 4789749.94 min: 4185809.62 min: 3963081.34 ==Stride read ==Stride read records: 10 records: 10 avg: 4515981.80 avg: 4418806.01 std: 211192.32 (4.68%) std: 212837.97 (4.82%) max: 4889287.28 max: 4686967.22 min: 4210362.00 min: 4083041.84 ==Random read ==Random read records: 10 records: 10 avg: 4410525.23 avg: 4387093.18 std: 236693.22 (5.37%) std: 235285.23 (5.36%) max: 4713698.47 max: 4669760.62 min: 4057163.62 min: 3952002.16 ==Mixed workload ==Mixed workload records: 10 records: 10 avg: 243234.25 avg: 2818677.27 std: 28505.07 (11.72%) std: 195569.70 (6.94%) max: 288905.23 max: 3126478.11 min: 212473.16 min: 2484150.69 ==Random write ==Random write records: 10 records: 10 avg: 555887.07 avg: 1053057.79 std: 70841.98 (12.74%) std: 35195.36 (3.34%) max: 683188.28 max: 1096125.73 min: 437299.57 min: 992481.93 ==Pwrite ==Pwrite records: 10 records: 10 avg: 501745.93 avg: 810363.09 std: 16373.54 (3.26%) std: 19245.01 (2.37%) max: 518724.52 max: 833359.70 min: 464208.73 min: 765501.87 ==Pread ==Pread records: 10 records: 10 avg: 4539894.60 avg: 4457680.58 std: 197094.66 (4.34%) std: 188965.60 (4.24%) max: 4877170.38 max: 4689905.53 min: 4226326.03 min: 4095739.72 Signed-off-by: Minchan Kim Cc: Nitin Gupta Tested-by: Sergey Senozhatsky Cc: Jerome Marchand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit e46e33152eb82b8e2db7ffb3790a2a2653c34513) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 17 ++++++++--------- drivers/block/zram/zram_drv.h | 4 +--- 2 files changed, 9 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 3e797b844377..c450dd9390ab 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -229,6 +229,7 @@ static struct zram_meta *zram_meta_alloc(u64 disksize) } rwlock_init(&meta->tb_lock); + mutex_init(&meta->buffer_lock); return meta; free_table: @@ -411,6 +412,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, struct page *page; unsigned char *user_mem, *cmem, *src, *uncmem = NULL; struct zram_meta *meta = zram->meta; + bool locked = false; page = bvec->bv_page; src = meta->compress_buffer; @@ -430,6 +432,8 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, goto out; } + mutex_lock(&meta->buffer_lock); + locked = true; user_mem = kmap_atomic(page); if (is_partial_io(bvec)) { @@ -456,7 +460,6 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen, meta->compress_workmem); - if (!is_partial_io(bvec)) { kunmap_atomic(user_mem); user_mem = NULL; @@ -513,6 +516,8 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, atomic_inc(&zram->stats.good_compress); out: + if (locked) + mutex_unlock(&meta->buffer_lock); if (is_partial_io(bvec)) kfree(uncmem); @@ -526,15 +531,10 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, { int ret; - if (rw == READ) { - down_read(&zram->lock); + if (rw == READ) ret = zram_bvec_read(zram, bvec, index, offset, bio); - up_read(&zram->lock); - } else { - down_write(&zram->lock); + else ret = zram_bvec_write(zram, bvec, index, offset); - up_write(&zram->lock); - } return ret; } @@ -805,7 +805,6 @@ static int create_device(struct zram *zram, int device_id) { int ret = -ENOMEM; - init_rwsem(&zram->lock); init_rwsem(&zram->init_lock); zram->queue = blk_alloc_queue(GFP_KERNEL); diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index d876300da6c9..ad8aa35bae00 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -88,13 +88,11 @@ struct zram_meta { void *compress_buffer; struct table *table; struct zs_pool *mem_pool; + struct mutex buffer_lock; /* protect compress buffers */ }; struct zram { struct zram_meta *meta; - struct rw_semaphore lock; /* protect compression buffers, - * reads and writes - */ struct request_queue *queue; struct gendisk *disk; int init_done; -- cgit v1.2.3 From b048aa137fe66b7f19d842a4b145296bbc26caff Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Mon, 3 Mar 2014 15:38:34 -0800 Subject: zram: avoid null access when fail to alloc meta zram_meta_alloc could fail so caller should check it. Otherwise, your system will hang. Signed-off-by: Minchan Kim Acked-by: Jerome Marchand Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit db5d711e2db776f18219b033e5dc4fb7e4264dd7) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index c450dd9390ab..cf77a8a1ae97 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -611,6 +611,8 @@ static ssize_t disksize_store(struct device *dev, disksize = PAGE_ALIGN(disksize); meta = zram_meta_alloc(disksize); + if (!meta) + return -ENOMEM; down_write(&zram->init_lock); if (zram->init_done) { up_write(&zram->init_lock); -- cgit v1.2.3 From fafe199bf14378d4a0a309d15bba16ce6742bd19 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:00 -0700 Subject: zram: drop `init_done' struct zram member Introduce init_done() helper function which allows us to drop `init_done' struct zram member. init_done() uses the fact that ->init_done == 1 equals to ->meta != NULL. Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Acked-by: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit be2d1d56c82d8cf20e6c77515eb499f8e86eb5be) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 21 +++++++++++---------- drivers/block/zram/zram_drv.h | 1 - 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index cf77a8a1ae97..e75ccdf194a6 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -42,6 +42,11 @@ static struct zram *zram_devices; /* Module params (documentation at end) */ static unsigned int num_devices = 1; +static inline int init_done(struct zram *zram) +{ + return zram->meta != NULL; +} + static inline struct zram *dev_to_zram(struct device *dev) { return (struct zram *)dev_to_disk(dev)->private_data; @@ -60,7 +65,7 @@ static ssize_t initstate_show(struct device *dev, { struct zram *zram = dev_to_zram(dev); - return sprintf(buf, "%u\n", zram->init_done); + return sprintf(buf, "%u\n", init_done(zram)); } static ssize_t num_reads_show(struct device *dev, @@ -133,7 +138,7 @@ static ssize_t mem_used_total_show(struct device *dev, struct zram_meta *meta = zram->meta; down_read(&zram->init_lock); - if (zram->init_done) + if (init_done(zram)) val = zs_get_total_size_bytes(meta->mem_pool); up_read(&zram->init_lock); @@ -545,14 +550,12 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) struct zram_meta *meta; down_write(&zram->init_lock); - if (!zram->init_done) { + if (!init_done(zram)) { up_write(&zram->init_lock); return; } meta = zram->meta; - zram->init_done = 0; - /* Free all pages that are still in this zram device */ for (index = 0; index < zram->disksize >> PAGE_SHIFT; index++) { unsigned long handle = meta->table[index].handle; @@ -593,8 +596,6 @@ static void zram_init_device(struct zram *zram, struct zram_meta *meta) queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); zram->meta = meta; - zram->init_done = 1; - pr_debug("Initialization done!\n"); } @@ -614,7 +615,7 @@ static ssize_t disksize_store(struct device *dev, if (!meta) return -ENOMEM; down_write(&zram->init_lock); - if (zram->init_done) { + if (init_done(zram)) { up_write(&zram->init_lock); zram_meta_free(meta); pr_info("Cannot change disksize for initialized device\n"); @@ -733,7 +734,7 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio) struct zram *zram = queue->queuedata; down_read(&zram->init_lock); - if (unlikely(!zram->init_done)) + if (unlikely(!init_done(zram))) goto error; if (!valid_io_request(zram, bio)) { @@ -856,7 +857,7 @@ static int create_device(struct zram *zram, int device_id) goto out_free_disk; } - zram->init_done = 0; + zram->meta = NULL; return 0; out_free_disk: diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index ad8aa35bae00..e81e9cdf4147 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -95,7 +95,6 @@ struct zram { struct zram_meta *meta; struct request_queue *queue; struct gendisk *disk; - int init_done; /* Prevent concurrent execution of device init, reset and R/W request */ struct rw_semaphore init_lock; /* -- cgit v1.2.3 From 18bebabb7ecc3c03e3339d825141908814a4cf59 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:01 -0700 Subject: zram: do not pass rw argument to __zram_make_request() Do not pass rw argument down the __zram_make_request() -> zram_bvec_rw() chain, decode it in zram_bvec_rw() instead. Besides, this is the place where we distinguish READ and WRITE bio data directions, so account zram RW stats here, instead of __zram_make_request(). This also allows to account a real number of zram READ/WRITE operations, not just requests (single RW request may cause a number of zram RW ops with separate locking, compression/decompression, etc). Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Acked-by: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit be257c61306750d11c20d2ac567bf63304c696a3) Signed-off-by: Alex Shi Conflicts: drivers/block/zram/zram_drv.c Conflicts solution: keep bio struct as old before commit 4f024f3797 'block: Abstract out bvec iterator' --- drivers/block/zram/zram_drv.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index e75ccdf194a6..3f98cf21c7f0 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -532,14 +532,18 @@ out: } static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, - int offset, struct bio *bio, int rw) + int offset, struct bio *bio) { int ret; + int rw = bio_data_dir(bio); - if (rw == READ) + if (rw == READ) { + atomic64_inc(&zram->stats.num_reads); ret = zram_bvec_read(zram, bvec, index, offset, bio); - else + } else { + atomic64_inc(&zram->stats.num_writes); ret = zram_bvec_write(zram, bvec, index, offset); + } return ret; } @@ -671,20 +675,12 @@ out: return ret; } -static void __zram_make_request(struct zram *zram, struct bio *bio, int rw) +static void __zram_make_request(struct zram *zram, struct bio *bio) { int i, offset; u32 index; struct bio_vec *bvec; - switch (rw) { - case READ: - atomic64_inc(&zram->stats.num_reads); - break; - case WRITE: - atomic64_inc(&zram->stats.num_writes); - break; - } index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT; offset = (bio->bi_sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; @@ -703,16 +699,15 @@ static void __zram_make_request(struct zram *zram, struct bio *bio, int rw) bv.bv_len = max_transfer_size; bv.bv_offset = bvec->bv_offset; - if (zram_bvec_rw(zram, &bv, index, offset, bio, rw) < 0) + if (zram_bvec_rw(zram, &bv, index, offset, bio) < 0) goto out; bv.bv_len = bvec->bv_len - max_transfer_size; bv.bv_offset += max_transfer_size; - if (zram_bvec_rw(zram, &bv, index+1, 0, bio, rw) < 0) + if (zram_bvec_rw(zram, &bv, index + 1, 0, bio) < 0) goto out; } else - if (zram_bvec_rw(zram, bvec, index, offset, bio, rw) - < 0) + if (zram_bvec_rw(zram, bvec, index, offset, bio) < 0) goto out; update_position(&index, &offset, bvec); @@ -742,7 +737,7 @@ static void zram_make_request(struct request_queue *queue, struct bio *bio) goto error; } - __zram_make_request(zram, bio, bio_data_dir(bio)); + __zram_make_request(zram, bio); up_read(&zram->init_lock); return; -- cgit v1.2.3 From e700d5f0d65058709b17623728a356e2c16a3a3a Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:02 -0700 Subject: zram: remove good and bad compress stats Remove `good' and `bad' compressed sub-requests stats. RW request may cause a number of RW sub-requests. zram used to account `good' compressed sub-queries (with compressed size less than 50% of original size), `bad' compressed sub-queries (with compressed size greater that 75% of original size), leaving sub-requests with compression size between 50% and 75% of original size not accounted and not reported. zram already accounts each sub-request's compression size so we can calculate real device compression ratio. Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Acked-by: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit b7cccf8b4009bf74df61f3c9d86b95fabd807c11) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 11 ----------- drivers/block/zram/zram_drv.h | 2 -- 2 files changed, 13 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 3f98cf21c7f0..a09618cc9e68 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -292,7 +292,6 @@ static void zram_free_page(struct zram *zram, size_t index) { struct zram_meta *meta = zram->meta; unsigned long handle = meta->table[index].handle; - u16 size = meta->table[index].size; if (unlikely(!handle)) { /* @@ -306,14 +305,8 @@ static void zram_free_page(struct zram *zram, size_t index) return; } - if (unlikely(size > max_zpage_size)) - atomic_dec(&zram->stats.bad_compress); - zs_free(meta->mem_pool, handle); - if (size <= PAGE_SIZE / 2) - atomic_dec(&zram->stats.good_compress); - atomic64_sub(meta->table[index].size, &zram->stats.compr_size); atomic_dec(&zram->stats.pages_stored); @@ -477,7 +470,6 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, } if (unlikely(clen > max_zpage_size)) { - atomic_inc(&zram->stats.bad_compress); clen = PAGE_SIZE; src = NULL; if (is_partial_io(bvec)) @@ -517,9 +509,6 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, /* Update stats */ atomic64_add(clen, &zram->stats.compr_size); atomic_inc(&zram->stats.pages_stored); - if (clen <= PAGE_SIZE / 2) - atomic_inc(&zram->stats.good_compress); - out: if (locked) mutex_unlock(&meta->buffer_lock); diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index e81e9cdf4147..2f173cb1fd0a 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -78,8 +78,6 @@ struct zram_stats { atomic64_t notify_free; /* no. of swap slot free notifications */ atomic_t pages_zero; /* no. of zero filled pages */ atomic_t pages_stored; /* no. of pages currently stored */ - atomic_t good_compress; /* % of pages with compression ratio<=50% */ - atomic_t bad_compress; /* % of pages with compression ratio>=75% */ }; struct zram_meta { -- cgit v1.2.3 From 2385e8be54460cae2cf803a482a0b8b76cf8d001 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:03 -0700 Subject: zram: use atomic64_t for all zram stats This is a preparation patch for stats code duplication removal. 1) use atomic64_t for `pages_zero' and `pages_stored' zram stats. 2) `compr_size' and `pages_zero' struct zram_stats members did not follow the existing device attr naming scheme: zram_stats.ATTR has ATTR_show() function. rename them: -- compr_size -> compr_data_size -- pages_zero -> zero_pages Minchan Kim's note: If we really have trouble with atomic stat operation, we could change it with percpu_counter so that it could solve atomic overhead and unnecessary memory space by introducing unsigned long instead of 64bit atomic_t. Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Acked-by: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 90a7806ea9b9f7cb4751859cc2506e2d80e36ef1) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 18 +++++++++--------- drivers/block/zram/zram_drv.h | 6 +++--- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index a09618cc9e68..4da4210b3454 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -109,7 +109,7 @@ static ssize_t zero_pages_show(struct device *dev, { struct zram *zram = dev_to_zram(dev); - return sprintf(buf, "%u\n", atomic_read(&zram->stats.pages_zero)); + return sprintf(buf, "%llu\n", (u64)atomic64_read(&zram->stats.zero_pages)); } static ssize_t orig_data_size_show(struct device *dev, @@ -118,7 +118,7 @@ static ssize_t orig_data_size_show(struct device *dev, struct zram *zram = dev_to_zram(dev); return sprintf(buf, "%llu\n", - (u64)(atomic_read(&zram->stats.pages_stored)) << PAGE_SHIFT); + (u64)(atomic64_read(&zram->stats.pages_stored)) << PAGE_SHIFT); } static ssize_t compr_data_size_show(struct device *dev, @@ -127,7 +127,7 @@ static ssize_t compr_data_size_show(struct device *dev, struct zram *zram = dev_to_zram(dev); return sprintf(buf, "%llu\n", - (u64)atomic64_read(&zram->stats.compr_size)); + (u64)atomic64_read(&zram->stats.compr_data_size)); } static ssize_t mem_used_total_show(struct device *dev, @@ -300,15 +300,15 @@ static void zram_free_page(struct zram *zram, size_t index) */ if (zram_test_flag(meta, index, ZRAM_ZERO)) { zram_clear_flag(meta, index, ZRAM_ZERO); - atomic_dec(&zram->stats.pages_zero); + atomic64_dec(&zram->stats.zero_pages); } return; } zs_free(meta->mem_pool, handle); - atomic64_sub(meta->table[index].size, &zram->stats.compr_size); - atomic_dec(&zram->stats.pages_stored); + atomic64_sub(meta->table[index].size, &zram->stats.compr_data_size); + atomic64_dec(&zram->stats.pages_stored); meta->table[index].handle = 0; meta->table[index].size = 0; @@ -451,7 +451,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, zram_set_flag(meta, index, ZRAM_ZERO); write_unlock(&zram->meta->tb_lock); - atomic_inc(&zram->stats.pages_zero); + atomic64_inc(&zram->stats.zero_pages); ret = 0; goto out; } @@ -507,8 +507,8 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, write_unlock(&zram->meta->tb_lock); /* Update stats */ - atomic64_add(clen, &zram->stats.compr_size); - atomic_inc(&zram->stats.pages_stored); + atomic64_add(clen, &zram->stats.compr_data_size); + atomic64_inc(&zram->stats.pages_stored); out: if (locked) mutex_unlock(&meta->buffer_lock); diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 2f173cb1fd0a..58d4ac537f65 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -69,15 +69,15 @@ struct table { } __aligned(4); struct zram_stats { - atomic64_t compr_size; /* compressed size of pages stored */ + atomic64_t compr_data_size; /* compressed size of pages stored */ atomic64_t num_reads; /* failed + successful */ atomic64_t num_writes; /* --do-- */ atomic64_t failed_reads; /* should NEVER! happen */ atomic64_t failed_writes; /* can happen when memory is too low */ atomic64_t invalid_io; /* non-page-aligned I/O requests */ atomic64_t notify_free; /* no. of swap slot free notifications */ - atomic_t pages_zero; /* no. of zero filled pages */ - atomic_t pages_stored; /* no. of pages currently stored */ + atomic64_t zero_pages; /* no. of zero filled pages */ + atomic64_t pages_stored; /* no. of pages currently stored */ }; struct zram_meta { -- cgit v1.2.3 From e72baa0d78767e5eeaba4f3a3f82be4b584f57fd Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:04 -0700 Subject: zram: remove zram stats code duplication Introduce ZRAM_ATTR_RO macro that generates device_attribute and default ATTR show() function for existing atomic64_t zram stats. Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit a68eb3b65e658406d386bebef02277f4007b2f45) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 82 ++++++++++++------------------------------- 1 file changed, 23 insertions(+), 59 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 4da4210b3454..c4179fb54fde 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -42,6 +42,17 @@ static struct zram *zram_devices; /* Module params (documentation at end) */ static unsigned int num_devices = 1; +#define ZRAM_ATTR_RO(name) \ +static ssize_t zram_attr_##name##_show(struct device *d, \ + struct device_attribute *attr, char *b) \ +{ \ + struct zram *zram = dev_to_zram(d); \ + return sprintf(b, "%llu\n", \ + (u64)atomic64_read(&zram->stats.name)); \ +} \ +static struct device_attribute dev_attr_##name = \ + __ATTR(name, S_IRUGO, zram_attr_##name##_show, NULL); + static inline int init_done(struct zram *zram) { return zram->meta != NULL; @@ -63,53 +74,14 @@ static ssize_t disksize_show(struct device *dev, static ssize_t initstate_show(struct device *dev, struct device_attribute *attr, char *buf) { + u32 val; struct zram *zram = dev_to_zram(dev); - return sprintf(buf, "%u\n", init_done(zram)); -} - -static ssize_t num_reads_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - (u64)atomic64_read(&zram->stats.num_reads)); -} - -static ssize_t num_writes_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - (u64)atomic64_read(&zram->stats.num_writes)); -} - -static ssize_t invalid_io_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - (u64)atomic64_read(&zram->stats.invalid_io)); -} - -static ssize_t notify_free_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - (u64)atomic64_read(&zram->stats.notify_free)); -} - -static ssize_t zero_pages_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); + down_read(&zram->init_lock); + val = init_done(zram); + up_read(&zram->init_lock); - return sprintf(buf, "%llu\n", (u64)atomic64_read(&zram->stats.zero_pages)); + return sprintf(buf, "%u\n", val); } static ssize_t orig_data_size_show(struct device *dev, @@ -121,15 +93,6 @@ static ssize_t orig_data_size_show(struct device *dev, (u64)(atomic64_read(&zram->stats.pages_stored)) << PAGE_SHIFT); } -static ssize_t compr_data_size_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct zram *zram = dev_to_zram(dev); - - return sprintf(buf, "%llu\n", - (u64)atomic64_read(&zram->stats.compr_data_size)); -} - static ssize_t mem_used_total_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -760,15 +723,16 @@ static DEVICE_ATTR(disksize, S_IRUGO | S_IWUSR, disksize_show, disksize_store); static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL); static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store); -static DEVICE_ATTR(num_reads, S_IRUGO, num_reads_show, NULL); -static DEVICE_ATTR(num_writes, S_IRUGO, num_writes_show, NULL); -static DEVICE_ATTR(invalid_io, S_IRUGO, invalid_io_show, NULL); -static DEVICE_ATTR(notify_free, S_IRUGO, notify_free_show, NULL); -static DEVICE_ATTR(zero_pages, S_IRUGO, zero_pages_show, NULL); static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL); -static DEVICE_ATTR(compr_data_size, S_IRUGO, compr_data_size_show, NULL); static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL); +ZRAM_ATTR_RO(num_reads); +ZRAM_ATTR_RO(num_writes); +ZRAM_ATTR_RO(invalid_io); +ZRAM_ATTR_RO(notify_free); +ZRAM_ATTR_RO(zero_pages); +ZRAM_ATTR_RO(compr_data_size); + static struct attribute *zram_disk_attrs[] = { &dev_attr_disksize.attr, &dev_attr_initstate.attr, -- cgit v1.2.3 From 0a0d055ef99c20fe0e6b1cced9d358ab2e56eafd Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:05 -0700 Subject: zram: report failed read and write stats zram accounted but did not report numbers of failed read and write queries. make these stats available as failed_reads and failed_writes attrs. Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Acked-by: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 6444724939db5de7390c90f7b4a657159b3b4465) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index c4179fb54fde..4043e783e50a 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -728,6 +728,8 @@ static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL); ZRAM_ATTR_RO(num_reads); ZRAM_ATTR_RO(num_writes); +ZRAM_ATTR_RO(failed_reads); +ZRAM_ATTR_RO(failed_writes); ZRAM_ATTR_RO(invalid_io); ZRAM_ATTR_RO(notify_free); ZRAM_ATTR_RO(zero_pages); @@ -739,6 +741,8 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_reset.attr, &dev_attr_num_reads.attr, &dev_attr_num_writes.attr, + &dev_attr_failed_reads.attr, + &dev_attr_failed_writes.attr, &dev_attr_invalid_io.attr, &dev_attr_notify_free.attr, &dev_attr_zero_pages.attr, -- cgit v1.2.3 From 7d9e350a92b55768e644d5f7b1aacab9acd032f6 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:06 -0700 Subject: zram: drop not used table `count' member struct table `count' member is not used. Signed-off-by: Sergey Senozhatsky Cc: Minchan Kim Acked-by: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 59fc86a4922f1a1c0f69eac758a7e2b2b138aab4) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.h | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 58d4ac537f65..1d5b1f5786a8 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -64,7 +64,6 @@ enum zram_pageflags { struct table { unsigned long handle; u16 size; /* object size (excluding header) */ - u8 count; /* object ref count (not yet used) */ u8 flags; } __aligned(4); -- cgit v1.2.3 From 65281bd78e42b4137f9cf87c4b734ff2325bf483 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:07 -0700 Subject: zram: move zram size warning to documentation Move zram warning about disksize and size of memory correlation to zram documentation. Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit e64cd51d2fa87733176246101df871a8ac5c7c20) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 4043e783e50a..a083dffb699e 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -534,23 +534,8 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) static void zram_init_device(struct zram *zram, struct zram_meta *meta) { - if (zram->disksize > 2 * (totalram_pages << PAGE_SHIFT)) { - pr_info( - "There is little point creating a zram of greater than " - "twice the size of memory since we expect a 2:1 compression " - "ratio. Note that zram uses about 0.1%% of the size of " - "the disk when not in use so a huge zram is " - "wasteful.\n" - "\tMemory Size: %lu kB\n" - "\tSize you selected: %llu kB\n" - "Continuing anyway ...\n", - (totalram_pages << PAGE_SHIFT) >> 10, zram->disksize >> 10 - ); - } - /* zram devices sort of resembles non-rotational disks */ queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); - zram->meta = meta; pr_debug("Initialization done!\n"); } -- cgit v1.2.3 From 6ab557fb5b5851a3848743ad6df597323b9d39ee Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:09 -0700 Subject: zram: delete zram_init_device() allocate new `zram_meta' in disksize_store() only for uninitialised zram device, saving a number of allocations and deallocations in case if disksize_store() was called on currently used device. at the same time zram_meta stack variable is not necessary, because we can set ->meta directly. there is also no need in setting QUEUE_FLAG_NONROT queue on every disksize_store(), set it once during device creation. [minchan@kernel.org: handle zram->meta alloc fail case] [minchan@kernel.org: prevent lockdep spew of init_lock] Signed-off-by: Sergey Senozhatsky Signed-off-by: Minchan Kim Acked-by: Jerome Marchand Cc: Sasha Levin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit b67d1ec189ffb92cdad9b2bd29475fb1e0166983) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index a083dffb699e..9c71757aa1c6 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -532,14 +532,6 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) up_write(&zram->init_lock); } -static void zram_init_device(struct zram *zram, struct zram_meta *meta) -{ - /* zram devices sort of resembles non-rotational disks */ - queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); - zram->meta = meta; - pr_debug("Initialization done!\n"); -} - static ssize_t disksize_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { @@ -555,17 +547,18 @@ static ssize_t disksize_store(struct device *dev, meta = zram_meta_alloc(disksize); if (!meta) return -ENOMEM; + down_write(&zram->init_lock); if (init_done(zram)) { - up_write(&zram->init_lock); zram_meta_free(meta); + up_write(&zram->init_lock); pr_info("Cannot change disksize for initialized device\n"); return -EBUSY; } + zram->meta = meta; zram->disksize = disksize; set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); - zram_init_device(zram, meta); up_write(&zram->init_lock); return len; @@ -774,7 +767,8 @@ static int create_device(struct zram *zram, int device_id) /* Actual capacity set using syfs (/sys/block/zram/disksize */ set_capacity(zram->disk, 0); - + /* zram devices sort of resembles non-rotational disks */ + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue); /* * To ensure that we always get PAGE_SIZE aligned * and n*PAGE_SIZED sized I/O requests. -- cgit v1.2.3 From 4c48f57e379f75106cde867e4561375ed37ef69f Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:11 -0700 Subject: zram: introduce compressing backend abstraction ZRAM performs direct LZO compression algorithm calls, making it the one and only option. While LZO is generally performs well, LZ4 algorithm tends to have a faster decompression (see http://code.google.com/p/lz4/ for full report) Name Ratio C.speed D.speed MB/s MB/s LZ4 (r101) 2.084 422 1820 LZO 2.06 2.106 414 600 Thus, users who have mostly read (decompress) usage scenarious or mixed workflow (writes with relatively high read ops number) will benefit from using LZ4 compression backend. Introduce compressing backend abstraction zcomp in order to support multiple compression algorithms with the following set of operations: .create .destroy .compress .decompress Schematically zram write() usually contains the following steps: 0) preparation (decompression of partioal IO, etc.) 1) lock buffer_lock mutex (protects meta compress buffers) 2) compress (using meta compress buffers) 3) alloc and map zs_pool object 4) copy compressed data (from meta compress buffers) to object allocated by 3) 5) free previous pool page, assign a new one 6) unlock buffer_lock mutex As we can see, compressing buffers must remain untouched from 1) to 4), because, otherwise, concurrent write() can overwrite data. At the same time, zram_meta must be aware of a) specific compression algorithm memory requirements and b) necessary locking to protect compression buffers. To remove requirement a) new struct zcomp_strm introduced, which contains a compress/decompress `buffer' and compression algorithm `private' part. While struct zcomp implements zcomp_strm stream handling and locking and removes requirement b) from zram meta. zcomp ->create() and ->destroy(), respectively, allocate and deallocate algorithm specific zcomp_strm `private' part. Every zcomp has zcomp stream and mutex to protect its compression stream. Stream usage semantics remains the same -- only one write can hold stream lock and use its buffers. zcomp_strm_find() turns caller into exclusive user of a stream (holding stream mutex until zram release stream), and zcomp_strm_release() makes zcomp stream available (unlock the stream mutex). Hence no concurrent write (compression) operations possible at the moment. iozone -t 3 -R -r 16K -s 60M -I +Z test base patched -------------------------------------------------- Initial write 597992.91 591660.58 Rewrite 609674.34 616054.97 Read 2404771.75 2452909.12 Re-read 2459216.81 2470074.44 Reverse Read 1652769.66 1589128.66 Stride read 2202441.81 2202173.31 Random read 2236311.47 2276565.31 Mixed workload 1423760.41 1709760.06 Random write 579584.08 615933.86 Pwrite 597550.02 594933.70 Pread 1703672.53 1718126.72 Fwrite 1330497.06 1461054.00 Fread 3922851.00 3957242.62 Usage examples: comp = zcomp_create(NAME) /* NAME e.g. "lzo" */ which initialises compressing backend if requested algorithm is supported. Compress: zstrm = zcomp_strm_find(comp) zcomp_compress(comp, zstrm, src, &dst_len) [..] /* copy compressed data */ zcomp_strm_release(comp, zstrm) Decompress: zcomp_decompress(comp, src, src_len, dst); Free compessing backend and its zcomp stream: zcomp_destroy(comp) Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit e7e1ef439d18f9a21521116ea9f2b976d7230e54) Signed-off-by: Alex Shi --- drivers/block/zram/zcomp.c | 115 +++++++++++++++++++++++++++++++++++++++++ drivers/block/zram/zcomp.h | 58 +++++++++++++++++++++ drivers/block/zram/zcomp_lzo.c | 47 +++++++++++++++++ drivers/block/zram/zcomp_lzo.h | 17 ++++++ 4 files changed, 237 insertions(+) create mode 100644 drivers/block/zram/zcomp.c create mode 100644 drivers/block/zram/zcomp.h create mode 100644 drivers/block/zram/zcomp_lzo.c create mode 100644 drivers/block/zram/zcomp_lzo.h (limited to 'drivers') diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c new file mode 100644 index 000000000000..22f4ae235660 --- /dev/null +++ b/drivers/block/zram/zcomp.c @@ -0,0 +1,115 @@ +/* + * Copyright (C) 2014 Sergey Senozhatsky. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include + +#include "zcomp.h" +#include "zcomp_lzo.h" + +static struct zcomp_backend *find_backend(const char *compress) +{ + if (strncmp(compress, "lzo", 3) == 0) + return &zcomp_lzo; + return NULL; +} + +static void zcomp_strm_free(struct zcomp *comp, struct zcomp_strm *zstrm) +{ + if (zstrm->private) + comp->backend->destroy(zstrm->private); + free_pages((unsigned long)zstrm->buffer, 1); + kfree(zstrm); +} + +/* + * allocate new zcomp_strm structure with ->private initialized by + * backend, return NULL on error + */ +static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp) +{ + struct zcomp_strm *zstrm = kmalloc(sizeof(*zstrm), GFP_KERNEL); + if (!zstrm) + return NULL; + + zstrm->private = comp->backend->create(); + /* + * allocate 2 pages. 1 for compressed data, plus 1 extra for the + * case when compressed size is larger than the original one + */ + zstrm->buffer = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1); + if (!zstrm->private || !zstrm->buffer) { + zcomp_strm_free(comp, zstrm); + zstrm = NULL; + } + return zstrm; +} + +struct zcomp_strm *zcomp_strm_find(struct zcomp *comp) +{ + mutex_lock(&comp->strm_lock); + return comp->zstrm; +} + +void zcomp_strm_release(struct zcomp *comp, struct zcomp_strm *zstrm) +{ + mutex_unlock(&comp->strm_lock); +} + +int zcomp_compress(struct zcomp *comp, struct zcomp_strm *zstrm, + const unsigned char *src, size_t *dst_len) +{ + return comp->backend->compress(src, zstrm->buffer, dst_len, + zstrm->private); +} + +int zcomp_decompress(struct zcomp *comp, const unsigned char *src, + size_t src_len, unsigned char *dst) +{ + return comp->backend->decompress(src, src_len, dst); +} + +void zcomp_destroy(struct zcomp *comp) +{ + zcomp_strm_free(comp, comp->zstrm); + kfree(comp); +} + +/* + * search available compressors for requested algorithm. + * allocate new zcomp and initialize it. return NULL + * if requested algorithm is not supported or in case + * of init error + */ +struct zcomp *zcomp_create(const char *compress) +{ + struct zcomp *comp; + struct zcomp_backend *backend; + + backend = find_backend(compress); + if (!backend) + return NULL; + + comp = kzalloc(sizeof(struct zcomp), GFP_KERNEL); + if (!comp) + return NULL; + + comp->backend = backend; + mutex_init(&comp->strm_lock); + + comp->zstrm = zcomp_strm_alloc(comp); + if (!comp->zstrm) { + kfree(comp); + return NULL; + } + return comp; +} diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h new file mode 100644 index 000000000000..c9a98e1317fe --- /dev/null +++ b/drivers/block/zram/zcomp.h @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2014 Sergey Senozhatsky. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _ZCOMP_H_ +#define _ZCOMP_H_ + +#include + +struct zcomp_strm { + /* compression/decompression buffer */ + void *buffer; + /* + * The private data of the compression stream, only compression + * stream backend can touch this (e.g. compression algorithm + * working memory) + */ + void *private; +}; + +/* static compression backend */ +struct zcomp_backend { + int (*compress)(const unsigned char *src, unsigned char *dst, + size_t *dst_len, void *private); + + int (*decompress)(const unsigned char *src, size_t src_len, + unsigned char *dst); + + void *(*create)(void); + void (*destroy)(void *private); + + const char *name; +}; + +/* dynamic per-device compression frontend */ +struct zcomp { + struct mutex strm_lock; + struct zcomp_strm *zstrm; + struct zcomp_backend *backend; +}; + +struct zcomp *zcomp_create(const char *comp); +void zcomp_destroy(struct zcomp *comp); + +struct zcomp_strm *zcomp_strm_find(struct zcomp *comp); +void zcomp_strm_release(struct zcomp *comp, struct zcomp_strm *zstrm); + +int zcomp_compress(struct zcomp *comp, struct zcomp_strm *zstrm, + const unsigned char *src, size_t *dst_len); + +int zcomp_decompress(struct zcomp *comp, const unsigned char *src, + size_t src_len, unsigned char *dst); +#endif /* _ZCOMP_H_ */ diff --git a/drivers/block/zram/zcomp_lzo.c b/drivers/block/zram/zcomp_lzo.c new file mode 100644 index 000000000000..da1bc47d588e --- /dev/null +++ b/drivers/block/zram/zcomp_lzo.c @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2014 Sergey Senozhatsky. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include + +#include "zcomp_lzo.h" + +static void *lzo_create(void) +{ + return kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); +} + +static void lzo_destroy(void *private) +{ + kfree(private); +} + +static int lzo_compress(const unsigned char *src, unsigned char *dst, + size_t *dst_len, void *private) +{ + int ret = lzo1x_1_compress(src, PAGE_SIZE, dst, dst_len, private); + return ret == LZO_E_OK ? 0 : ret; +} + +static int lzo_decompress(const unsigned char *src, size_t src_len, + unsigned char *dst) +{ + size_t dst_len = PAGE_SIZE; + int ret = lzo1x_decompress_safe(src, src_len, dst, &dst_len); + return ret == LZO_E_OK ? 0 : ret; +} + +struct zcomp_backend zcomp_lzo = { + .compress = lzo_compress, + .decompress = lzo_decompress, + .create = lzo_create, + .destroy = lzo_destroy, + .name = "lzo", +}; diff --git a/drivers/block/zram/zcomp_lzo.h b/drivers/block/zram/zcomp_lzo.h new file mode 100644 index 000000000000..128c5807fa14 --- /dev/null +++ b/drivers/block/zram/zcomp_lzo.h @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2014 Sergey Senozhatsky. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _ZCOMP_LZO_H_ +#define _ZCOMP_LZO_H_ + +#include "zcomp.h" + +extern struct zcomp_backend zcomp_lzo; + +#endif /* _ZCOMP_LZO_H_ */ -- cgit v1.2.3 From 92e9d71d940158389d3553a382045c3c9a7a7e98 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:12 -0700 Subject: zram: use zcomp compressing backends Do not perform direct LZO compress/decompress calls, initialise and use zcomp LZO backend (single compression stream) instead. [akpm@linux-foundation.org: resolve conflicts with zram-delete-zram_init_device-fix.patch] Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit b7ca232ee7e85ed3b18e39eb20a7f458ee1d6047) Signed-off-by: Alex Shi --- drivers/block/zram/Makefile | 2 +- drivers/block/zram/zram_drv.c | 69 +++++++++++++++++++------------------------ drivers/block/zram/zram_drv.h | 8 ++--- 3 files changed, 36 insertions(+), 43 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/Makefile b/drivers/block/zram/Makefile index cb0f9ced6a93..757c6a5cadff 100644 --- a/drivers/block/zram/Makefile +++ b/drivers/block/zram/Makefile @@ -1,3 +1,3 @@ -zram-y := zram_drv.o +zram-y := zcomp_lzo.o zcomp.o zram_drv.o obj-$(CONFIG_ZRAM) += zram.o diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 9c71757aa1c6..9f5d2c2f9ea7 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include @@ -38,6 +37,7 @@ /* Globals */ static int zram_major; static struct zram *zram_devices; +static const char *default_compressor = "lzo"; /* Module params (documentation at end) */ static unsigned int num_devices = 1; @@ -159,8 +159,6 @@ static inline int valid_io_request(struct zram *zram, struct bio *bio) static void zram_meta_free(struct zram_meta *meta) { zs_destroy_pool(meta->mem_pool); - kfree(meta->compress_workmem); - free_pages((unsigned long)meta->compress_buffer, 1); vfree(meta->table); kfree(meta); } @@ -172,22 +170,11 @@ static struct zram_meta *zram_meta_alloc(u64 disksize) if (!meta) goto out; - meta->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); - if (!meta->compress_workmem) - goto free_meta; - - meta->compress_buffer = - (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1); - if (!meta->compress_buffer) { - pr_err("Error allocating compressor buffer space\n"); - goto free_workmem; - } - num_pages = disksize >> PAGE_SHIFT; meta->table = vzalloc(num_pages * sizeof(*meta->table)); if (!meta->table) { pr_err("Error allocating zram address table\n"); - goto free_buffer; + goto free_meta; } meta->mem_pool = zs_create_pool(GFP_NOIO | __GFP_HIGHMEM); @@ -197,15 +184,10 @@ static struct zram_meta *zram_meta_alloc(u64 disksize) } rwlock_init(&meta->tb_lock); - mutex_init(&meta->buffer_lock); return meta; free_table: vfree(meta->table); -free_buffer: - free_pages((unsigned long)meta->compress_buffer, 1); -free_workmem: - kfree(meta->compress_workmem); free_meta: kfree(meta); meta = NULL; @@ -279,8 +261,7 @@ static void zram_free_page(struct zram *zram, size_t index) static int zram_decompress_page(struct zram *zram, char *mem, u32 index) { - int ret = LZO_E_OK; - size_t clen = PAGE_SIZE; + int ret = 0; unsigned char *cmem; struct zram_meta *meta = zram->meta; unsigned long handle; @@ -300,12 +281,12 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) if (size == PAGE_SIZE) copy_page(mem, cmem); else - ret = lzo1x_decompress_safe(cmem, size, mem, &clen); + ret = zcomp_decompress(zram->comp, cmem, size, mem); zs_unmap_object(meta->mem_pool, handle); read_unlock(&meta->tb_lock); /* Should NEVER happen. Return bio error if it does. */ - if (unlikely(ret != LZO_E_OK)) { + if (unlikely(ret)) { pr_err("Decompression failed! err=%d, page=%u\n", ret, index); atomic64_inc(&zram->stats.failed_reads); return ret; @@ -348,7 +329,7 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, ret = zram_decompress_page(zram, uncmem, index); /* Should NEVER happen. Return bio error if it does. */ - if (unlikely(ret != LZO_E_OK)) + if (unlikely(ret)) goto out_cleanup; if (is_partial_io(bvec)) @@ -373,11 +354,10 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, struct page *page; unsigned char *user_mem, *cmem, *src, *uncmem = NULL; struct zram_meta *meta = zram->meta; + struct zcomp_strm *zstrm; bool locked = false; page = bvec->bv_page; - src = meta->compress_buffer; - if (is_partial_io(bvec)) { /* * This is a partial IO. We need to read the full page @@ -393,7 +373,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, goto out; } - mutex_lock(&meta->buffer_lock); + zstrm = zcomp_strm_find(zram->comp); locked = true; user_mem = kmap_atomic(page); @@ -419,22 +399,20 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, goto out; } - ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen, - meta->compress_workmem); + ret = zcomp_compress(zram->comp, zstrm, uncmem, &clen); if (!is_partial_io(bvec)) { kunmap_atomic(user_mem); user_mem = NULL; uncmem = NULL; } - if (unlikely(ret != LZO_E_OK)) { + if (unlikely(ret)) { pr_err("Compression failed! err=%d\n", ret); goto out; } - + src = zstrm->buffer; if (unlikely(clen > max_zpage_size)) { clen = PAGE_SIZE; - src = NULL; if (is_partial_io(bvec)) src = uncmem; } @@ -456,6 +434,8 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, memcpy(cmem, src, clen); } + zcomp_strm_release(zram->comp, zstrm); + locked = false; zs_unmap_object(meta->mem_pool, handle); /* @@ -474,10 +454,9 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, atomic64_inc(&zram->stats.pages_stored); out: if (locked) - mutex_unlock(&meta->buffer_lock); + zcomp_strm_release(zram->comp, zstrm); if (is_partial_io(bvec)) kfree(uncmem); - if (ret) atomic64_inc(&zram->stats.failed_writes); return ret; @@ -521,6 +500,7 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) zs_free(meta->mem_pool, handle); } + zcomp_destroy(zram->comp); zram_meta_free(zram->meta); zram->meta = NULL; /* Reset stats */ @@ -538,6 +518,7 @@ static ssize_t disksize_store(struct device *dev, u64 disksize; struct zram_meta *meta; struct zram *zram = dev_to_zram(dev); + int err; disksize = memparse(buf, NULL); if (!disksize) @@ -550,10 +531,17 @@ static ssize_t disksize_store(struct device *dev, down_write(&zram->init_lock); if (init_done(zram)) { - zram_meta_free(meta); - up_write(&zram->init_lock); pr_info("Cannot change disksize for initialized device\n"); - return -EBUSY; + err = -EBUSY; + goto out_free_meta; + } + + zram->comp = zcomp_create(default_compressor); + if (!zram->comp) { + pr_info("Cannot initialise %s compressing backend\n", + default_compressor); + err = -EINVAL; + goto out_free_meta; } zram->meta = meta; @@ -562,6 +550,11 @@ static ssize_t disksize_store(struct device *dev, up_write(&zram->init_lock); return len; + +out_free_meta: + up_write(&zram->init_lock); + zram_meta_free(meta); + return err; } static ssize_t reset_store(struct device *dev, diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 1d5b1f5786a8..45e04f7b713f 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -16,9 +16,10 @@ #define _ZRAM_DRV_H_ #include -#include #include +#include "zcomp.h" + /* * Some arbitrary value. This is just to catch * invalid value for num_devices module parameter. @@ -81,17 +82,16 @@ struct zram_stats { struct zram_meta { rwlock_t tb_lock; /* protect table */ - void *compress_workmem; - void *compress_buffer; struct table *table; struct zs_pool *mem_pool; - struct mutex buffer_lock; /* protect compress buffers */ }; struct zram { struct zram_meta *meta; struct request_queue *queue; struct gendisk *disk; + struct zcomp *comp; + /* Prevent concurrent execution of device init, reset and R/W request */ struct rw_semaphore init_lock; /* -- cgit v1.2.3 From 9381faeb8bc1d1fdfb6931b69ee70a594d04c4fe Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:13 -0700 Subject: zram: factor out single stream compression This is preparation patch to add multi stream support to zcomp. Introduce struct zcomp_strm_single and a set of functions to manage zcomp_strm stream access. zcomp_strm_single implements single compession stream, same way as current zcomp implementation. This moves zcomp_strm stream control and locking from zcomp, so compressing backend zcomp is not aware of required locking. Single and multi streams require different locking schemes. Minchan Kim reported that spinlock-based locking scheme (which is used in multi stream implementation) has demonstrated a severe perfomance regression for single compression stream case, comparing to mutex-based. see https://lkml.org/lkml/2014/2/18/16 The following set of functions added: - zcomp_strm_single_find()/zcomp_strm_single_release() find and release a compression stream, implement required locking - zcomp_strm_single_create()/zcomp_strm_single_destroy() create and destroy zcomp_strm_single New ->strm_find() and ->strm_release() callbacks added to zcomp, which are set to zcomp_strm_single_find() and zcomp_strm_single_release() during initialisation. Instead of direct locking and zcomp_strm access from zcomp_strm_find() and zcomp_strm_release(), zcomp now calls ->strm_find() and ->strm_release() correspondingly. Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 9cc97529a180b369fcb7e5265771b6ba7e01f05b) Signed-off-by: Alex Shi --- drivers/block/zram/zcomp.c | 62 ++++++++++++++++++++++++++++++++++++++++------ drivers/block/zram/zcomp.h | 7 ++++-- 2 files changed, 59 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index 22f4ae235660..72e8071f9d73 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -16,6 +16,14 @@ #include "zcomp.h" #include "zcomp_lzo.h" +/* + * single zcomp_strm backend + */ +struct zcomp_strm_single { + struct mutex strm_lock; + struct zcomp_strm *zstrm; +}; + static struct zcomp_backend *find_backend(const char *compress) { if (strncmp(compress, "lzo", 3) == 0) @@ -54,15 +62,56 @@ static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp) return zstrm; } +static struct zcomp_strm *zcomp_strm_single_find(struct zcomp *comp) +{ + struct zcomp_strm_single *zs = comp->stream; + mutex_lock(&zs->strm_lock); + return zs->zstrm; +} + +static void zcomp_strm_single_release(struct zcomp *comp, + struct zcomp_strm *zstrm) +{ + struct zcomp_strm_single *zs = comp->stream; + mutex_unlock(&zs->strm_lock); +} + +static void zcomp_strm_single_destroy(struct zcomp *comp) +{ + struct zcomp_strm_single *zs = comp->stream; + zcomp_strm_free(comp, zs->zstrm); + kfree(zs); +} + +static int zcomp_strm_single_create(struct zcomp *comp) +{ + struct zcomp_strm_single *zs; + + comp->destroy = zcomp_strm_single_destroy; + comp->strm_find = zcomp_strm_single_find; + comp->strm_release = zcomp_strm_single_release; + zs = kmalloc(sizeof(struct zcomp_strm_single), GFP_KERNEL); + if (!zs) + return -ENOMEM; + + comp->stream = zs; + mutex_init(&zs->strm_lock); + zs->zstrm = zcomp_strm_alloc(comp); + if (!zs->zstrm) { + kfree(zs); + return -ENOMEM; + } + return 0; +} + struct zcomp_strm *zcomp_strm_find(struct zcomp *comp) { - mutex_lock(&comp->strm_lock); - return comp->zstrm; + return comp->strm_find(comp); } void zcomp_strm_release(struct zcomp *comp, struct zcomp_strm *zstrm) { - mutex_unlock(&comp->strm_lock); + comp->strm_release(comp, zstrm); } int zcomp_compress(struct zcomp *comp, struct zcomp_strm *zstrm, @@ -80,7 +129,7 @@ int zcomp_decompress(struct zcomp *comp, const unsigned char *src, void zcomp_destroy(struct zcomp *comp) { - zcomp_strm_free(comp, comp->zstrm); + comp->destroy(comp); kfree(comp); } @@ -104,10 +153,7 @@ struct zcomp *zcomp_create(const char *compress) return NULL; comp->backend = backend; - mutex_init(&comp->strm_lock); - - comp->zstrm = zcomp_strm_alloc(comp); - if (!comp->zstrm) { + if (zcomp_strm_single_create(comp) != 0) { kfree(comp); return NULL; } diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h index c9a98e1317fe..dc3500d842a3 100644 --- a/drivers/block/zram/zcomp.h +++ b/drivers/block/zram/zcomp.h @@ -39,9 +39,12 @@ struct zcomp_backend { /* dynamic per-device compression frontend */ struct zcomp { - struct mutex strm_lock; - struct zcomp_strm *zstrm; + void *stream; struct zcomp_backend *backend; + + struct zcomp_strm *(*strm_find)(struct zcomp *comp); + void (*strm_release)(struct zcomp *comp, struct zcomp_strm *zstrm); + void (*destroy)(struct zcomp *comp); }; struct zcomp *zcomp_create(const char *comp); -- cgit v1.2.3 From de980e9147bd30182894d0bb6778fbd0c8a7f293 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:14 -0700 Subject: zram: add multi stream functionality Existing zram (zcomp) implementation has only one compression stream (buffer and algorithm private part), so in order to prevent data corruption only one write (compress operation) can use this compression stream, forcing all concurrent write operations to wait for stream lock to be released. This patch changes zcomp to keep a compression streams list of user-defined size (via sysfs device attr). Each write operation still exclusively holds compression stream, the difference is that we can have N write operations (depending on size of streams list) executing in parallel. See TEST section later in commit message for performance data. Introduce struct zcomp_strm_multi and a set of functions to manage zcomp_strm stream access. zcomp_strm_multi has a list of idle zcomp_strm structs, spinlock to protect idle list and wait queue, making it possible to perform parallel compressions. The following set of functions added: - zcomp_strm_multi_find()/zcomp_strm_multi_release() find and release a compression stream, implement required locking - zcomp_strm_multi_create()/zcomp_strm_multi_destroy() create and destroy zcomp_strm_multi zcomp ->strm_find() and ->strm_release() callbacks are set during initialisation to zcomp_strm_multi_find()/zcomp_strm_multi_release() correspondingly. Each time zcomp issues a zcomp_strm_multi_find() call, the following set of operations performed: - spin lock strm_lock - if idle list is not empty, remove zcomp_strm from idle list, spin unlock and return zcomp stream pointer to caller - if idle list is empty, current adds itself to wait queue. it will be awaken by zcomp_strm_multi_release() caller. zcomp_strm_multi_release(): - spin lock strm_lock - add zcomp stream to idle list - spin unlock, wake up sleeper Minchan Kim reported that spinlock-based locking scheme has demonstrated a severe perfomance regression for single compression stream case, comparing to mutex-based (see https://lkml.org/lkml/2014/2/18/16) base spinlock mutex ==Initial write ==Initial write ==Initial write records: 5 records: 5 records: 5 avg: 1642424.35 avg: 699610.40 avg: 1655583.71 std: 39890.95(2.43%) std: 232014.19(33.16%) std: 52293.96 max: 1690170.94 max: 1163473.45 max: 1697164.75 min: 1568669.52 min: 573429.88 min: 1553410.23 ==Rewrite ==Rewrite ==Rewrite records: 5 records: 5 records: 5 avg: 1611775.39 avg: 501406.64 avg: 1684419.11 std: 17144.58(1.06%) std: 15354.41(3.06%) std: 18367.42 max: 1641800.95 max: 531356.78 max: 1706445.84 min: 1593515.27 min: 488817.78 min: 1655335.73 When only one compression stream available, mutex with spin on owner tends to perform much better than frequent wait_event()/wake_up(). This is why single stream implemented as a special case with mutex locking. Introduce and document zram device attribute max_comp_streams. This attr shows and stores current zcomp's max number of zcomp streams (max_strm). Extend zcomp's zcomp_create() with `max_strm' parameter. `max_strm' limits the number of zcomp_strm structs in compression backend's idle list (max_comp_streams). max_comp_streams used during initialisation as follows: -- passing to zcomp_create() max_strm equals to 1 will initialise zcomp using single compression stream zcomp_strm_single (mutex-based locking). -- passing to zcomp_create() max_strm greater than 1 will initialise zcomp using multi compression stream zcomp_strm_multi (spinlock-based locking). default max_comp_streams value is 1, meaning that zram with single stream will be initialised. Later patch will introduce configuration knob to change max_comp_streams on already initialised and used zcomp. TEST iozone -t 3 -R -r 16K -s 60M -I +Z test base 1 strm (mutex) 3 strm (spinlock) ----------------------------------------------------------------------- Initial write 589286.78 583518.39 718011.05 Rewrite 604837.97 596776.38 1515125.72 Random write 584120.11 595714.58 1388850.25 Pwrite 535731.17 541117.38 739295.27 Fwrite 1418083.88 1478612.72 1484927.06 Usage example: set max_comp_streams to 4 echo 4 > /sys/block/zram0/max_comp_streams show current max_comp_streams (default value is 1). cat /sys/block/zram0/max_comp_streams Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit beca3ec71fe5490ee9237dc42400f50402baf83e) Signed-off-by: Alex Shi --- drivers/block/zram/zcomp.c | 124 +++++++++++++++++++++++++++++++++++++++++- drivers/block/zram/zcomp.h | 4 +- drivers/block/zram/zram_drv.c | 42 +++++++++++++- drivers/block/zram/zram_drv.h | 2 +- 4 files changed, 167 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index 72e8071f9d73..c06f75f54718 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -24,6 +24,21 @@ struct zcomp_strm_single { struct zcomp_strm *zstrm; }; +/* + * multi zcomp_strm backend + */ +struct zcomp_strm_multi { + /* protect strm list */ + spinlock_t strm_lock; + /* max possible number of zstrm streams */ + int max_strm; + /* number of available zstrm streams */ + int avail_strm; + /* list of available strms */ + struct list_head idle_strm; + wait_queue_head_t strm_wait; +}; + static struct zcomp_backend *find_backend(const char *compress) { if (strncmp(compress, "lzo", 3) == 0) @@ -62,6 +77,107 @@ static struct zcomp_strm *zcomp_strm_alloc(struct zcomp *comp) return zstrm; } +/* + * get idle zcomp_strm or wait until other process release + * (zcomp_strm_release()) one for us + */ +static struct zcomp_strm *zcomp_strm_multi_find(struct zcomp *comp) +{ + struct zcomp_strm_multi *zs = comp->stream; + struct zcomp_strm *zstrm; + + while (1) { + spin_lock(&zs->strm_lock); + if (!list_empty(&zs->idle_strm)) { + zstrm = list_entry(zs->idle_strm.next, + struct zcomp_strm, list); + list_del(&zstrm->list); + spin_unlock(&zs->strm_lock); + return zstrm; + } + /* zstrm streams limit reached, wait for idle stream */ + if (zs->avail_strm >= zs->max_strm) { + spin_unlock(&zs->strm_lock); + wait_event(zs->strm_wait, !list_empty(&zs->idle_strm)); + continue; + } + /* allocate new zstrm stream */ + zs->avail_strm++; + spin_unlock(&zs->strm_lock); + + zstrm = zcomp_strm_alloc(comp); + if (!zstrm) { + spin_lock(&zs->strm_lock); + zs->avail_strm--; + spin_unlock(&zs->strm_lock); + wait_event(zs->strm_wait, !list_empty(&zs->idle_strm)); + continue; + } + break; + } + return zstrm; +} + +/* add stream back to idle list and wake up waiter or free the stream */ +static void zcomp_strm_multi_release(struct zcomp *comp, struct zcomp_strm *zstrm) +{ + struct zcomp_strm_multi *zs = comp->stream; + + spin_lock(&zs->strm_lock); + if (zs->avail_strm <= zs->max_strm) { + list_add(&zstrm->list, &zs->idle_strm); + spin_unlock(&zs->strm_lock); + wake_up(&zs->strm_wait); + return; + } + + zs->avail_strm--; + spin_unlock(&zs->strm_lock); + zcomp_strm_free(comp, zstrm); +} + +static void zcomp_strm_multi_destroy(struct zcomp *comp) +{ + struct zcomp_strm_multi *zs = comp->stream; + struct zcomp_strm *zstrm; + + while (!list_empty(&zs->idle_strm)) { + zstrm = list_entry(zs->idle_strm.next, + struct zcomp_strm, list); + list_del(&zstrm->list); + zcomp_strm_free(comp, zstrm); + } + kfree(zs); +} + +static int zcomp_strm_multi_create(struct zcomp *comp, int max_strm) +{ + struct zcomp_strm *zstrm; + struct zcomp_strm_multi *zs; + + comp->destroy = zcomp_strm_multi_destroy; + comp->strm_find = zcomp_strm_multi_find; + comp->strm_release = zcomp_strm_multi_release; + zs = kmalloc(sizeof(struct zcomp_strm_multi), GFP_KERNEL); + if (!zs) + return -ENOMEM; + + comp->stream = zs; + spin_lock_init(&zs->strm_lock); + INIT_LIST_HEAD(&zs->idle_strm); + init_waitqueue_head(&zs->strm_wait); + zs->max_strm = max_strm; + zs->avail_strm = 1; + + zstrm = zcomp_strm_alloc(comp); + if (!zstrm) { + kfree(zs); + return -ENOMEM; + } + list_add(&zstrm->list, &zs->idle_strm); + return 0; +} + static struct zcomp_strm *zcomp_strm_single_find(struct zcomp *comp) { struct zcomp_strm_single *zs = comp->stream; @@ -139,7 +255,7 @@ void zcomp_destroy(struct zcomp *comp) * if requested algorithm is not supported or in case * of init error */ -struct zcomp *zcomp_create(const char *compress) +struct zcomp *zcomp_create(const char *compress, int max_strm) { struct zcomp *comp; struct zcomp_backend *backend; @@ -153,7 +269,11 @@ struct zcomp *zcomp_create(const char *compress) return NULL; comp->backend = backend; - if (zcomp_strm_single_create(comp) != 0) { + if (max_strm > 1) + zcomp_strm_multi_create(comp, max_strm); + else + zcomp_strm_single_create(comp); + if (!comp->stream) { kfree(comp); return NULL; } diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h index dc3500d842a3..2a3684446160 100644 --- a/drivers/block/zram/zcomp.h +++ b/drivers/block/zram/zcomp.h @@ -21,6 +21,8 @@ struct zcomp_strm { * working memory) */ void *private; + /* used in multi stream backend, protected by backend strm_lock */ + struct list_head list; }; /* static compression backend */ @@ -47,7 +49,7 @@ struct zcomp { void (*destroy)(struct zcomp *comp); }; -struct zcomp *zcomp_create(const char *comp); +struct zcomp *zcomp_create(const char *comp, int max_strm); void zcomp_destroy(struct zcomp *comp); struct zcomp_strm *zcomp_strm_find(struct zcomp *comp); diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 9f5d2c2f9ea7..88ee317ce6d2 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -108,6 +108,40 @@ static ssize_t mem_used_total_show(struct device *dev, return sprintf(buf, "%llu\n", val); } +static ssize_t max_comp_streams_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + int val; + struct zram *zram = dev_to_zram(dev); + + down_read(&zram->init_lock); + val = zram->max_comp_streams; + up_read(&zram->init_lock); + + return sprintf(buf, "%d\n", val); +} + +static ssize_t max_comp_streams_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + int num; + struct zram *zram = dev_to_zram(dev); + + if (kstrtoint(buf, 0, &num)) + return -EINVAL; + if (num < 1) + return -EINVAL; + down_write(&zram->init_lock); + if (init_done(zram)) { + up_write(&zram->init_lock); + pr_info("Can't set max_comp_streams for initialized device\n"); + return -EBUSY; + } + zram->max_comp_streams = num; + up_write(&zram->init_lock); + return len; +} + /* flag operations needs meta->tb_lock */ static int zram_test_flag(struct zram_meta *meta, u32 index, enum zram_pageflags flag) @@ -501,6 +535,8 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) } zcomp_destroy(zram->comp); + zram->max_comp_streams = 1; + zram_meta_free(zram->meta); zram->meta = NULL; /* Reset stats */ @@ -536,7 +572,7 @@ static ssize_t disksize_store(struct device *dev, goto out_free_meta; } - zram->comp = zcomp_create(default_compressor); + zram->comp = zcomp_create(default_compressor, zram->max_comp_streams); if (!zram->comp) { pr_info("Cannot initialise %s compressing backend\n", default_compressor); @@ -696,6 +732,8 @@ static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL); static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store); static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL); static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL); +static DEVICE_ATTR(max_comp_streams, S_IRUGO | S_IWUSR, + max_comp_streams_show, max_comp_streams_store); ZRAM_ATTR_RO(num_reads); ZRAM_ATTR_RO(num_writes); @@ -720,6 +758,7 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_orig_data_size.attr, &dev_attr_compr_data_size.attr, &dev_attr_mem_used_total.attr, + &dev_attr_max_comp_streams.attr, NULL, }; @@ -782,6 +821,7 @@ static int create_device(struct zram *zram, int device_id) } zram->meta = NULL; + zram->max_comp_streams = 1; return 0; out_free_disk: diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 45e04f7b713f..ccf36d11755a 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -99,7 +99,7 @@ struct zram { * we can store in a disk. */ u64 disksize; /* bytes */ - + int max_comp_streams; struct zram_stats stats; }; #endif -- cgit v1.2.3 From 9634c9a147869de6d78f53a49823980aaea7233c Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:15 -0700 Subject: zram: add set_max_streams knob This patch allows to change max_comp_streams on initialised zcomp. Introduce zcomp set_max_streams() knob, zcomp_strm_multi_set_max_streams() and zcomp_strm_single_set_max_streams() callbacks to change streams limit for zcomp_strm_multi and zcomp_strm_single, accordingly. set_max_streams for single steam zcomp does nothing. If user has lowered the limit, then zcomp_strm_multi_set_max_streams() attempts to immediately free extra streams (as much as it can, depending on idle streams availability). Note, this patch does not allow to change stream 'policy' from single to multi stream (or vice versa) on already initialised compression backend. Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit fe8eb122c82b2049c460fc6df6e8583a2f935cff) Signed-off-by: Alex Shi --- drivers/block/zram/zcomp.c | 36 ++++++++++++++++++++++++++++++++++++ drivers/block/zram/zcomp.h | 3 +++ drivers/block/zram/zram_drv.c | 5 ++--- 3 files changed, 41 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index c06f75f54718..ac276f79f21c 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -136,6 +136,29 @@ static void zcomp_strm_multi_release(struct zcomp *comp, struct zcomp_strm *zstr zcomp_strm_free(comp, zstrm); } +/* change max_strm limit */ +static int zcomp_strm_multi_set_max_streams(struct zcomp *comp, int num_strm) +{ + struct zcomp_strm_multi *zs = comp->stream; + struct zcomp_strm *zstrm; + + spin_lock(&zs->strm_lock); + zs->max_strm = num_strm; + /* + * if user has lowered the limit and there are idle streams, + * immediately free as much streams (and memory) as we can. + */ + while (zs->avail_strm > num_strm && !list_empty(&zs->idle_strm)) { + zstrm = list_entry(zs->idle_strm.next, + struct zcomp_strm, list); + list_del(&zstrm->list); + zcomp_strm_free(comp, zstrm); + zs->avail_strm--; + } + spin_unlock(&zs->strm_lock); + return 0; +} + static void zcomp_strm_multi_destroy(struct zcomp *comp) { struct zcomp_strm_multi *zs = comp->stream; @@ -158,6 +181,7 @@ static int zcomp_strm_multi_create(struct zcomp *comp, int max_strm) comp->destroy = zcomp_strm_multi_destroy; comp->strm_find = zcomp_strm_multi_find; comp->strm_release = zcomp_strm_multi_release; + comp->set_max_streams = zcomp_strm_multi_set_max_streams; zs = kmalloc(sizeof(struct zcomp_strm_multi), GFP_KERNEL); if (!zs) return -ENOMEM; @@ -192,6 +216,12 @@ static void zcomp_strm_single_release(struct zcomp *comp, mutex_unlock(&zs->strm_lock); } +static int zcomp_strm_single_set_max_streams(struct zcomp *comp, int num_strm) +{ + /* zcomp_strm_single support only max_comp_streams == 1 */ + return -ENOTSUPP; +} + static void zcomp_strm_single_destroy(struct zcomp *comp) { struct zcomp_strm_single *zs = comp->stream; @@ -206,6 +236,7 @@ static int zcomp_strm_single_create(struct zcomp *comp) comp->destroy = zcomp_strm_single_destroy; comp->strm_find = zcomp_strm_single_find; comp->strm_release = zcomp_strm_single_release; + comp->set_max_streams = zcomp_strm_single_set_max_streams; zs = kmalloc(sizeof(struct zcomp_strm_single), GFP_KERNEL); if (!zs) return -ENOMEM; @@ -220,6 +251,11 @@ static int zcomp_strm_single_create(struct zcomp *comp) return 0; } +int zcomp_set_max_streams(struct zcomp *comp, int num_strm) +{ + return comp->set_max_streams(comp, num_strm); +} + struct zcomp_strm *zcomp_strm_find(struct zcomp *comp) { return comp->strm_find(comp); diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h index 2a3684446160..bd11d59c5dd1 100644 --- a/drivers/block/zram/zcomp.h +++ b/drivers/block/zram/zcomp.h @@ -46,6 +46,7 @@ struct zcomp { struct zcomp_strm *(*strm_find)(struct zcomp *comp); void (*strm_release)(struct zcomp *comp, struct zcomp_strm *zstrm); + int (*set_max_streams)(struct zcomp *comp, int num_strm); void (*destroy)(struct zcomp *comp); }; @@ -60,4 +61,6 @@ int zcomp_compress(struct zcomp *comp, struct zcomp_strm *zstrm, int zcomp_decompress(struct zcomp *comp, const unsigned char *src, size_t src_len, unsigned char *dst); + +int zcomp_set_max_streams(struct zcomp *comp, int num_strm); #endif /* _ZCOMP_H_ */ diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 88ee317ce6d2..c03d0053309b 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -133,9 +133,8 @@ static ssize_t max_comp_streams_store(struct device *dev, return -EINVAL; down_write(&zram->init_lock); if (init_done(zram)) { - up_write(&zram->init_lock); - pr_info("Can't set max_comp_streams for initialized device\n"); - return -EBUSY; + if (zcomp_set_max_streams(zram->comp, num)) + pr_info("Cannot change max compression streams\n"); } zram->max_comp_streams = num; up_write(&zram->init_lock); -- cgit v1.2.3 From 0626b80d185372c3b9f28f5c6c5e11647f381472 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:17 -0700 Subject: zram: make compression algorithm selection possible Add and document `comp_algorithm' device attribute. This attribute allows to show supported compression and currently selected compression algorithms: cat /sys/block/zram0/comp_algorithm [lzo] lz4 and change selected compression algorithm: echo lzo > /sys/block/zram0/comp_algorithm Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit e46b8a030d76d3c94156c545c3f4c3676d813435) Signed-off-by: Alex Shi --- drivers/block/zram/zcomp.c | 32 +++++++++++++++++++++++++++++--- drivers/block/zram/zcomp.h | 2 ++ drivers/block/zram/zram_drv.c | 37 ++++++++++++++++++++++++++++++++++--- drivers/block/zram/zram_drv.h | 1 + 4 files changed, 66 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index ac276f79f21c..aad533a8bc55 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -39,11 +39,20 @@ struct zcomp_strm_multi { wait_queue_head_t strm_wait; }; +static struct zcomp_backend *backends[] = { + &zcomp_lzo, + NULL +}; + static struct zcomp_backend *find_backend(const char *compress) { - if (strncmp(compress, "lzo", 3) == 0) - return &zcomp_lzo; - return NULL; + int i = 0; + while (backends[i]) { + if (sysfs_streq(compress, backends[i]->name)) + break; + i++; + } + return backends[i]; } static void zcomp_strm_free(struct zcomp *comp, struct zcomp_strm *zstrm) @@ -251,6 +260,23 @@ static int zcomp_strm_single_create(struct zcomp *comp) return 0; } +/* show available compressors */ +ssize_t zcomp_available_show(const char *comp, char *buf) +{ + ssize_t sz = 0; + int i = 0; + + while (backends[i]) { + if (sysfs_streq(comp, backends[i]->name)) + sz += sprintf(buf + sz, "[%s] ", backends[i]->name); + else + sz += sprintf(buf + sz, "%s ", backends[i]->name); + i++; + } + sz += sprintf(buf + sz, "\n"); + return sz; +} + int zcomp_set_max_streams(struct zcomp *comp, int num_strm) { return comp->set_max_streams(comp, num_strm); diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h index bd11d59c5dd1..8b8997f8613b 100644 --- a/drivers/block/zram/zcomp.h +++ b/drivers/block/zram/zcomp.h @@ -50,6 +50,8 @@ struct zcomp { void (*destroy)(struct zcomp *comp); }; +ssize_t zcomp_available_show(const char *comp, char *buf); + struct zcomp *zcomp_create(const char *comp, int max_strm); void zcomp_destroy(struct zcomp *comp); diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index c03d0053309b..f15564e65d87 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -141,6 +141,34 @@ static ssize_t max_comp_streams_store(struct device *dev, return len; } +static ssize_t comp_algorithm_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + size_t sz; + struct zram *zram = dev_to_zram(dev); + + down_read(&zram->init_lock); + sz = zcomp_available_show(zram->compressor, buf); + up_read(&zram->init_lock); + + return sz; +} + +static ssize_t comp_algorithm_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct zram *zram = dev_to_zram(dev); + down_write(&zram->init_lock); + if (init_done(zram)) { + up_write(&zram->init_lock); + pr_info("Can't change algorithm for initialized device\n"); + return -EBUSY; + } + strlcpy(zram->compressor, buf, sizeof(zram->compressor)); + up_write(&zram->init_lock); + return len; +} + /* flag operations needs meta->tb_lock */ static int zram_test_flag(struct zram_meta *meta, u32 index, enum zram_pageflags flag) @@ -571,10 +599,10 @@ static ssize_t disksize_store(struct device *dev, goto out_free_meta; } - zram->comp = zcomp_create(default_compressor, zram->max_comp_streams); + zram->comp = zcomp_create(zram->compressor, zram->max_comp_streams); if (!zram->comp) { pr_info("Cannot initialise %s compressing backend\n", - default_compressor); + zram->compressor); err = -EINVAL; goto out_free_meta; } @@ -733,6 +761,8 @@ static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL); static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL); static DEVICE_ATTR(max_comp_streams, S_IRUGO | S_IWUSR, max_comp_streams_show, max_comp_streams_store); +static DEVICE_ATTR(comp_algorithm, S_IRUGO | S_IWUSR, + comp_algorithm_show, comp_algorithm_store); ZRAM_ATTR_RO(num_reads); ZRAM_ATTR_RO(num_writes); @@ -758,6 +788,7 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_compr_data_size.attr, &dev_attr_mem_used_total.attr, &dev_attr_max_comp_streams.attr, + &dev_attr_comp_algorithm.attr, NULL, }; @@ -818,7 +849,7 @@ static int create_device(struct zram *zram, int device_id) pr_warn("Error creating sysfs group"); goto out_free_disk; } - + strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor)); zram->meta = NULL; zram->max_comp_streams = 1; return 0; diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index ccf36d11755a..7f21c145e317 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -101,5 +101,6 @@ struct zram { u64 disksize; /* bytes */ int max_comp_streams; struct zram_stats stats; + char compressor[10]; }; #endif -- cgit v1.2.3 From f84e7a4599807591a02c524314c1a78cc01ba41d Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:18 -0700 Subject: zram: add lz4 algorithm backend Introduce LZ4 compression backend and make it available for selection. LZ4 support is optional and requires user to set ZRAM_LZ4_COMPRESS config option. The default compression backend is LZO. TEST (x86_64, core i5, 2 cores + 2 hyperthreading, zram disk size 1G, ext4 file system, 3 compression streams) iozone -t 3 -R -r 16K -s 60M -I +Z Test LZO LZ4 ---------------------------------------------- Initial write 1642744.62 1317005.09 Rewrite 2498980.88 1800645.16 Read 3957026.38 5877043.75 Re-read 3950997.38 5861847.00 Reverse Read 2937114.56 5047384.00 Stride read 2948163.19 4929587.38 Random read 3292692.69 4880793.62 Mixed workload 1545602.62 3502940.38 Random write 2448039.75 1758786.25 Pwrite 1670051.03 1338329.69 Pread 2530682.00 5097177.62 Fwrite 3232085.62 3275942.56 Fread 6306880.25 6645271.12 So on my system LZ4 is slower in write-only tests, while it performs better in read-only and mixed (reads + writes) tests. Official LZ4 benchmarks available here http://code.google.com/p/lz4/ (linux kernel uses revision r90). Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 6e76668e415adf799839f0ab205142ad7002d260) Signed-off-by: Alex Shi --- drivers/block/zram/Kconfig | 10 +++++++++ drivers/block/zram/Makefile | 2 ++ drivers/block/zram/zcomp.c | 6 ++++++ drivers/block/zram/zcomp_lz4.c | 47 ++++++++++++++++++++++++++++++++++++++++++ drivers/block/zram/zcomp_lz4.h | 17 +++++++++++++++ 5 files changed, 82 insertions(+) create mode 100644 drivers/block/zram/zcomp_lz4.c create mode 100644 drivers/block/zram/zcomp_lz4.h (limited to 'drivers') diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig index 3450be850399..6489c0fd0ea6 100644 --- a/drivers/block/zram/Kconfig +++ b/drivers/block/zram/Kconfig @@ -15,6 +15,16 @@ config ZRAM See zram.txt for more information. +config ZRAM_LZ4_COMPRESS + bool "Enable LZ4 algorithm support" + depends on ZRAM + select LZ4_COMPRESS + select LZ4_DECOMPRESS + default n + help + This option enables LZ4 compression algorithm support. Compression + algorithm can be changed using `comp_algorithm' device attribute. + config ZRAM_DEBUG bool "Compressed RAM block device debug support" depends on ZRAM diff --git a/drivers/block/zram/Makefile b/drivers/block/zram/Makefile index 757c6a5cadff..be0763ff57a2 100644 --- a/drivers/block/zram/Makefile +++ b/drivers/block/zram/Makefile @@ -1,3 +1,5 @@ zram-y := zcomp_lzo.o zcomp.o zram_drv.o +zram-$(CONFIG_ZRAM_LZ4_COMPRESS) += zcomp_lz4.o + obj-$(CONFIG_ZRAM) += zram.o diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index aad533a8bc55..d5919031ca8b 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -15,6 +15,9 @@ #include "zcomp.h" #include "zcomp_lzo.h" +#ifdef CONFIG_ZRAM_LZ4_COMPRESS +#include "zcomp_lz4.h" +#endif /* * single zcomp_strm backend @@ -41,6 +44,9 @@ struct zcomp_strm_multi { static struct zcomp_backend *backends[] = { &zcomp_lzo, +#ifdef CONFIG_ZRAM_LZ4_COMPRESS + &zcomp_lz4, +#endif NULL }; diff --git a/drivers/block/zram/zcomp_lz4.c b/drivers/block/zram/zcomp_lz4.c new file mode 100644 index 000000000000..f2afb7e988c3 --- /dev/null +++ b/drivers/block/zram/zcomp_lz4.c @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2014 Sergey Senozhatsky. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include + +#include "zcomp_lz4.h" + +static void *zcomp_lz4_create(void) +{ + return kzalloc(LZ4_MEM_COMPRESS, GFP_KERNEL); +} + +static void zcomp_lz4_destroy(void *private) +{ + kfree(private); +} + +static int zcomp_lz4_compress(const unsigned char *src, unsigned char *dst, + size_t *dst_len, void *private) +{ + /* return : Success if return 0 */ + return lz4_compress(src, PAGE_SIZE, dst, dst_len, private); +} + +static int zcomp_lz4_decompress(const unsigned char *src, size_t src_len, + unsigned char *dst) +{ + size_t dst_len = PAGE_SIZE; + /* return : Success if return 0 */ + return lz4_decompress_unknownoutputsize(src, src_len, dst, &dst_len); +} + +struct zcomp_backend zcomp_lz4 = { + .compress = zcomp_lz4_compress, + .decompress = zcomp_lz4_decompress, + .create = zcomp_lz4_create, + .destroy = zcomp_lz4_destroy, + .name = "lz4", +}; diff --git a/drivers/block/zram/zcomp_lz4.h b/drivers/block/zram/zcomp_lz4.h new file mode 100644 index 000000000000..60613fb29dd8 --- /dev/null +++ b/drivers/block/zram/zcomp_lz4.h @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2014 Sergey Senozhatsky. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _ZCOMP_LZ4_H_ +#define _ZCOMP_LZ4_H_ + +#include "zcomp.h" + +extern struct zcomp_backend zcomp_lz4; + +#endif /* _ZCOMP_LZ4_H_ */ -- cgit v1.2.3 From c7bb5623e03444502f37821e1a391ef61828d4c3 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:19 -0700 Subject: zram: move comp allocation out of init_lock While fixing lockdep spew of ->init_lock reported by Sasha Levin [1], Minchan Kim noted [2] that it's better to move compression backend allocation (using GPF_KERNEL) out of the ->init_lock lock, same way as with zram_meta_alloc(), in order to prevent the same lockdep spew. [1] https://lkml.org/lkml/2014/2/27/337 [2] https://lkml.org/lkml/2014/3/3/32 Signed-off-by: Sergey Senozhatsky Reported-by: Minchan Kim Acked-by: Minchan Kim Cc: Sasha Levin Acked-by: Jerome Marchand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit d61f98c70e8b0d324e8e83be2ed546d6295e63f3) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index f15564e65d87..fe0daa9fe59e 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -579,9 +579,10 @@ static ssize_t disksize_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { u64 disksize; + struct zcomp *comp; struct zram_meta *meta; struct zram *zram = dev_to_zram(dev); - int err; + int err = -EINVAL; disksize = memparse(buf, NULL); if (!disksize) @@ -592,30 +593,32 @@ static ssize_t disksize_store(struct device *dev, if (!meta) return -ENOMEM; + comp = zcomp_create(zram->compressor, zram->max_comp_streams); + if (!comp) { + pr_info("Cannot initialise %s compressing backend\n", + zram->compressor); + goto out_cleanup; + } + down_write(&zram->init_lock); if (init_done(zram)) { + up_write(&zram->init_lock); pr_info("Cannot change disksize for initialized device\n"); err = -EBUSY; - goto out_free_meta; - } - - zram->comp = zcomp_create(zram->compressor, zram->max_comp_streams); - if (!zram->comp) { - pr_info("Cannot initialise %s compressing backend\n", - zram->compressor); - err = -EINVAL; - goto out_free_meta; + goto out_cleanup; } zram->meta = meta; + zram->comp = comp; zram->disksize = disksize; set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); up_write(&zram->init_lock); return len; -out_free_meta: - up_write(&zram->init_lock); +out_cleanup: + if (comp) + zcomp_destroy(comp); zram_meta_free(meta); return err; } -- cgit v1.2.3 From 6fef1131ae139299f9a45924ef866ede77c5c500 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:20 -0700 Subject: zram: return error-valued pointer from zcomp_create() Instead of returning just NULL, return ERR_PTR from zcomp_create() if compressing backend creation has failed. ERR_PTR(-EINVAL) for unsupported compression algorithm request, ERR_PTR(-ENOMEM) for allocation (zcomp or compression stream) error. Perform IS_ERR() check of returned from zcomp_create() value in disksize_store() and set return code to PTR_ERR(). Change suggested by Jerome Marchand. [akpm@linux-foundation.org: clean up error recovery flow] Signed-off-by: Sergey Senozhatsky Reported-by: Jerome Marchand Cc: Minchan Kim Cc: Nitin Gupta Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit fcfa8d95cacf5cbbe6dee6b8d229fe86142266e0) Signed-off-by: Alex Shi --- drivers/block/zram/zcomp.c | 14 ++++++++------ drivers/block/zram/zram_drv.c | 19 ++++++++++--------- 2 files changed, 18 insertions(+), 15 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index d5919031ca8b..5647d8fe1dc1 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -319,9 +320,10 @@ void zcomp_destroy(struct zcomp *comp) /* * search available compressors for requested algorithm. - * allocate new zcomp and initialize it. return NULL - * if requested algorithm is not supported or in case - * of init error + * allocate new zcomp and initialize it. return compressing + * backend pointer or ERR_PTR if things went bad. ERR_PTR(-EINVAL) + * if requested algorithm is not supported, ERR_PTR(-ENOMEM) in + * case of allocation error. */ struct zcomp *zcomp_create(const char *compress, int max_strm) { @@ -330,11 +332,11 @@ struct zcomp *zcomp_create(const char *compress, int max_strm) backend = find_backend(compress); if (!backend) - return NULL; + return ERR_PTR(-EINVAL); comp = kzalloc(sizeof(struct zcomp), GFP_KERNEL); if (!comp) - return NULL; + return ERR_PTR(-ENOMEM); comp->backend = backend; if (max_strm > 1) @@ -343,7 +345,7 @@ struct zcomp *zcomp_create(const char *compress, int max_strm) zcomp_strm_single_create(comp); if (!comp->stream) { kfree(comp); - return NULL; + return ERR_PTR(-ENOMEM); } return comp; } diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index fe0daa9fe59e..407f541b26a1 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "zram_drv.h" @@ -582,7 +583,7 @@ static ssize_t disksize_store(struct device *dev, struct zcomp *comp; struct zram_meta *meta; struct zram *zram = dev_to_zram(dev); - int err = -EINVAL; + int err; disksize = memparse(buf, NULL); if (!disksize) @@ -594,18 +595,18 @@ static ssize_t disksize_store(struct device *dev, return -ENOMEM; comp = zcomp_create(zram->compressor, zram->max_comp_streams); - if (!comp) { + if (IS_ERR(comp)) { pr_info("Cannot initialise %s compressing backend\n", zram->compressor); - goto out_cleanup; + err = PTR_ERR(comp); + goto out_free_meta; } down_write(&zram->init_lock); if (init_done(zram)) { - up_write(&zram->init_lock); pr_info("Cannot change disksize for initialized device\n"); err = -EBUSY; - goto out_cleanup; + goto out_destroy_comp; } zram->meta = meta; @@ -613,12 +614,12 @@ static ssize_t disksize_store(struct device *dev, zram->disksize = disksize; set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); up_write(&zram->init_lock); - return len; -out_cleanup: - if (comp) - zcomp_destroy(comp); +out_destroy_comp: + up_write(&zram->init_lock); + zcomp_destroy(comp); +out_free_meta: zram_meta_free(meta); return err; } -- cgit v1.2.3 From 1b2dc1d89b1a5bff45782c9ddb6e0d384c7c6420 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Mon, 7 Apr 2014 15:38:21 -0700 Subject: zram: propagate error to user When we initialized zcomp with single, we couldn't change max_comp_streams without zram reset but current interface doesn't show any error to user and even it changes max_comp_streams's value without any effect so it would make user very confusing. This patch prevents max_comp_streams's change when zcomp was initialized as single zcomp and emit the error to user(ex, echo). [akpm@linux-foundation.org: don't return with the lock held, per Sergey] [fengguang.wu@intel.com: fix coccinelle warnings] Signed-off-by: Minchan Kim Cc: Nitin Gupta Cc: Jerome Marchand Acked-by: Sergey Senozhatsky Signed-off-by: Fengguang Wu Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 60a726e33375a1096e85399cfa1327081b4c38be) Signed-off-by: Alex Shi --- drivers/block/zram/zcomp.c | 10 +++++----- drivers/block/zram/zcomp.h | 4 ++-- drivers/block/zram/zram_drv.c | 17 +++++++++++++---- 3 files changed, 20 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index 5647d8fe1dc1..b0e7592c44d8 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -153,7 +153,7 @@ static void zcomp_strm_multi_release(struct zcomp *comp, struct zcomp_strm *zstr } /* change max_strm limit */ -static int zcomp_strm_multi_set_max_streams(struct zcomp *comp, int num_strm) +static bool zcomp_strm_multi_set_max_streams(struct zcomp *comp, int num_strm) { struct zcomp_strm_multi *zs = comp->stream; struct zcomp_strm *zstrm; @@ -172,7 +172,7 @@ static int zcomp_strm_multi_set_max_streams(struct zcomp *comp, int num_strm) zs->avail_strm--; } spin_unlock(&zs->strm_lock); - return 0; + return true; } static void zcomp_strm_multi_destroy(struct zcomp *comp) @@ -232,10 +232,10 @@ static void zcomp_strm_single_release(struct zcomp *comp, mutex_unlock(&zs->strm_lock); } -static int zcomp_strm_single_set_max_streams(struct zcomp *comp, int num_strm) +static bool zcomp_strm_single_set_max_streams(struct zcomp *comp, int num_strm) { /* zcomp_strm_single support only max_comp_streams == 1 */ - return -ENOTSUPP; + return false; } static void zcomp_strm_single_destroy(struct zcomp *comp) @@ -284,7 +284,7 @@ ssize_t zcomp_available_show(const char *comp, char *buf) return sz; } -int zcomp_set_max_streams(struct zcomp *comp, int num_strm) +bool zcomp_set_max_streams(struct zcomp *comp, int num_strm) { return comp->set_max_streams(comp, num_strm); } diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h index 8b8997f8613b..c59d1fca72c0 100644 --- a/drivers/block/zram/zcomp.h +++ b/drivers/block/zram/zcomp.h @@ -46,7 +46,7 @@ struct zcomp { struct zcomp_strm *(*strm_find)(struct zcomp *comp); void (*strm_release)(struct zcomp *comp, struct zcomp_strm *zstrm); - int (*set_max_streams)(struct zcomp *comp, int num_strm); + bool (*set_max_streams)(struct zcomp *comp, int num_strm); void (*destroy)(struct zcomp *comp); }; @@ -64,5 +64,5 @@ int zcomp_compress(struct zcomp *comp, struct zcomp_strm *zstrm, int zcomp_decompress(struct zcomp *comp, const unsigned char *src, size_t src_len, unsigned char *dst); -int zcomp_set_max_streams(struct zcomp *comp, int num_strm); +bool zcomp_set_max_streams(struct zcomp *comp, int num_strm); #endif /* _ZCOMP_H_ */ diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 407f541b26a1..27da5967b57b 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -127,19 +127,28 @@ static ssize_t max_comp_streams_store(struct device *dev, { int num; struct zram *zram = dev_to_zram(dev); + int ret; - if (kstrtoint(buf, 0, &num)) - return -EINVAL; + ret = kstrtoint(buf, 0, &num); + if (ret < 0) + return ret; if (num < 1) return -EINVAL; + down_write(&zram->init_lock); if (init_done(zram)) { - if (zcomp_set_max_streams(zram->comp, num)) + if (!zcomp_set_max_streams(zram->comp, num)) { pr_info("Cannot change max compression streams\n"); + ret = -EINVAL; + goto out; + } } + zram->max_comp_streams = num; + ret = len; +out: up_write(&zram->init_lock); - return len; + return ret; } static ssize_t comp_algorithm_show(struct device *dev, -- cgit v1.2.3 From 575ef7bc322df6e0384ef661f510928bfa1dab81 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Mon, 7 Apr 2014 15:38:22 -0700 Subject: zram: use scnprintf() in attrs show() methods sysfs.txt documentation lists the following requirements: - The buffer will always be PAGE_SIZE bytes in length. On i386, this is 4096. - show() methods should return the number of bytes printed into the buffer. This is the return value of scnprintf(). - show() should always use scnprintf(). Use scnprintf() in show() functions. Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Cc: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 56b4e8cb85827a2ccc4752a2a7148e56b62b7e96) Signed-off-by: Alex Shi --- drivers/block/zram/zcomp.c | 8 +++++--- drivers/block/zram/zram_drv.c | 12 ++++++------ 2 files changed, 11 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index b0e7592c44d8..f1ff39a3d1c1 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -275,12 +275,14 @@ ssize_t zcomp_available_show(const char *comp, char *buf) while (backends[i]) { if (sysfs_streq(comp, backends[i]->name)) - sz += sprintf(buf + sz, "[%s] ", backends[i]->name); + sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2, + "[%s] ", backends[i]->name); else - sz += sprintf(buf + sz, "%s ", backends[i]->name); + sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2, + "%s ", backends[i]->name); i++; } - sz += sprintf(buf + sz, "\n"); + sz += scnprintf(buf + sz, PAGE_SIZE - sz, "\n"); return sz; } diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 27da5967b57b..031598bc14b4 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -48,7 +48,7 @@ static ssize_t zram_attr_##name##_show(struct device *d, \ struct device_attribute *attr, char *b) \ { \ struct zram *zram = dev_to_zram(d); \ - return sprintf(b, "%llu\n", \ + return scnprintf(b, PAGE_SIZE, "%llu\n", \ (u64)atomic64_read(&zram->stats.name)); \ } \ static struct device_attribute dev_attr_##name = \ @@ -69,7 +69,7 @@ static ssize_t disksize_show(struct device *dev, { struct zram *zram = dev_to_zram(dev); - return sprintf(buf, "%llu\n", zram->disksize); + return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize); } static ssize_t initstate_show(struct device *dev, @@ -82,7 +82,7 @@ static ssize_t initstate_show(struct device *dev, val = init_done(zram); up_read(&zram->init_lock); - return sprintf(buf, "%u\n", val); + return scnprintf(buf, PAGE_SIZE, "%u\n", val); } static ssize_t orig_data_size_show(struct device *dev, @@ -90,7 +90,7 @@ static ssize_t orig_data_size_show(struct device *dev, { struct zram *zram = dev_to_zram(dev); - return sprintf(buf, "%llu\n", + return scnprintf(buf, PAGE_SIZE, "%llu\n", (u64)(atomic64_read(&zram->stats.pages_stored)) << PAGE_SHIFT); } @@ -106,7 +106,7 @@ static ssize_t mem_used_total_show(struct device *dev, val = zs_get_total_size_bytes(meta->mem_pool); up_read(&zram->init_lock); - return sprintf(buf, "%llu\n", val); + return scnprintf(buf, PAGE_SIZE, "%llu\n", val); } static ssize_t max_comp_streams_show(struct device *dev, @@ -119,7 +119,7 @@ static ssize_t max_comp_streams_show(struct device *dev, val = zram->max_comp_streams; up_read(&zram->init_lock); - return sprintf(buf, "%d\n", val); + return scnprintf(buf, PAGE_SIZE, "%d\n", val); } static ssize_t max_comp_streams_store(struct device *dev, -- cgit v1.2.3 From ba6d7663fa3c9b310f33b9b8a18743af8b3727c9 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Mon, 7 Apr 2014 15:38:24 -0700 Subject: zram: support REQ_DISCARD zram is ram based block device and can be used by backend of filesystem. When filesystem deletes a file, it normally doesn't do anything on data block of that file. It just marks on metadata of that file. This behavior has no problem on disk based block device, but has problems on ram based block device, since we can't free memory used for data block. To overcome this disadvantage, there is REQ_DISCARD functionality. If block device support REQ_DISCARD and filesystem is mounted with discard option, filesystem sends REQ_DISCARD to block device whenever some data blocks are discarded. All we have to do is to handle this request. This patch implements to flag up QUEUE_FLAG_DISCARD and handle this REQ_DISCARD request. With it, we can free memory used by zram if it isn't used. [akpm@linux-foundation.org: tweak comments] Signed-off-by: Joonsoo Kim Cc: Minchan Kim Cc: Nitin Gupta Cc: Sergey Senozhatsky Cc: Jerome Marchand Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit f4659d8e620d08bd1a84a8aec5d2f5294a242764) Signed-off-by: Alex Shi Conflicts: drivers/block/zram/zram_drv.c Conflicts solution: keep use old bio struct, and bio_for_each_segment() --- drivers/block/zram/zram_drv.c | 62 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 031598bc14b4..19cf51ad48ef 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -550,6 +550,47 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, return ret; } +/* + * zram_bio_discard - handler on discard request + * @index: physical block index in PAGE_SIZE units + * @offset: byte offset within physical block + */ +static void zram_bio_discard(struct zram *zram, u32 index, + int offset, struct bio *bio) +{ + size_t n = bio->bi_size; + + /* + * zram manages data in physical block size units. Because logical block + * size isn't identical with physical block size on some arch, we + * could get a discard request pointing to a specific offset within a + * certain physical block. Although we can handle this request by + * reading that physiclal block and decompressing and partially zeroing + * and re-compressing and then re-storing it, this isn't reasonable + * because our intent with a discard request is to save memory. So + * skipping this logical block is appropriate here. + */ + if (offset) { + if (n < offset) + return; + + n -= offset; + index++; + } + + while (n >= PAGE_SIZE) { + /* + * Discard request can be large so the lock hold times could be + * lengthy. So take the lock once per page. + */ + write_lock(&zram->meta->tb_lock); + zram_free_page(zram, index); + write_unlock(&zram->meta->tb_lock); + index++; + n -= PAGE_SIZE; + } +} + static void zram_reset_device(struct zram *zram, bool reset_capacity) { size_t index; @@ -684,6 +725,12 @@ static void __zram_make_request(struct zram *zram, struct bio *bio) index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT; offset = (bio->bi_sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT; + if (unlikely(bio->bi_rw & REQ_DISCARD)) { + zram_bio_discard(zram, index, offset, bio); + bio_endio(bio, 0); + return; + } + bio_for_each_segment(bvec, bio, i) { int max_transfer_size = PAGE_SIZE - offset; @@ -853,6 +900,21 @@ static int create_device(struct zram *zram, int device_id) ZRAM_LOGICAL_BLOCK_SIZE); blk_queue_io_min(zram->disk->queue, PAGE_SIZE); blk_queue_io_opt(zram->disk->queue, PAGE_SIZE); + zram->disk->queue->limits.discard_granularity = PAGE_SIZE; + zram->disk->queue->limits.max_discard_sectors = UINT_MAX; + /* + * zram_bio_discard() will clear all logical blocks if logical block + * size is identical with physical block size(PAGE_SIZE). But if it is + * different, we will skip discarding some parts of logical blocks in + * the part of the request range which isn't aligned to physical block + * size. So we can't ensure that all discarded logical blocks are + * zeroed. + */ + if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE) + zram->disk->queue->limits.discard_zeroes_data = 1; + else + zram->disk->queue->limits.discard_zeroes_data = 0; + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue); add_disk(zram->disk); -- cgit v1.2.3 From ce58d51a0e9b366876270f03555c209917d3b80d Mon Sep 17 00:00:00 2001 From: Weijie Yang Date: Wed, 4 Jun 2014 16:11:06 -0700 Subject: zram: correct offset usage in zram_bio_discard We want to skip the physical block(PAGE_SIZE) which is partially covered by the discard bio, so we check the remaining size and subtract it if there is a need to goto the next physical block. The current offset usage in zram_bio_discard is incorrect, it will cause its upper filesystem breakdown. Consider the following scenario: On some architecture or config, PAGE_SIZE is 64K for example, filesystem is set up on zram disk without PAGE_SIZE aligned, a discard bio leads to a offset = 4K and size=72K, normally, it should not really discard any physical block as it partially cover two physical blocks. However, with the current offset usage, it will discard the second physical block and free its memory, which will cause filesystem breakdown. This patch corrects the offset usage in zram_bio_discard. Signed-off-by: Weijie Yang Cc: Minchan Kim Cc: Nitin Gupta Acked-by: Joonsoo Kim Cc: Sergey Senozhatsky Cc: Bob Liu Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 38515c73398a4c58059ecf1087e844561b58ee0f) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 19cf51ad48ef..efb6ff2a3735 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -571,10 +571,10 @@ static void zram_bio_discard(struct zram *zram, u32 index, * skipping this logical block is appropriate here. */ if (offset) { - if (n < offset) + if (n <= (PAGE_SIZE - offset)) return; - n -= offset; + n -= (PAGE_SIZE - offset); index++; } -- cgit v1.2.3 From 888210267e226f437abb7fd0604530032a98cafa Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 2 Jul 2014 15:22:36 -0700 Subject: zram: revalidate disk after capacity change Alexander reported mkswap on /dev/zram0 is failed if other process is opening the block device file. Step is as follows, 0. Reset the unused zram device. 1. Use a program that opens /dev/zram0 with O_RDWR and sleeps until killed. 2. While that program sleeps, echo the correct value to /sys/block/zram0/disksize. 3. Verify (e.g. in /proc/partitions) that the disk size is applied correctly. It is. 4. While that program still sleeps, attempt to mkswap /dev/zram0. This fails: mkswap: error: swap area needs to be at least 40 KiB When I investigated, the size get by ioctl(fd, BLKGETSIZE64, xxx) on mkswap to get a size of blockdev was zero although zram0 has right size by 2. The reason is zram didn't revalidate disk after changing capacity so that size of blockdev's inode is not uptodate until all of file is close. This patch should fix the BUG. Signed-off-by: Minchan Kim Reported-by: Alexander E. Patrakov Tested-by: Alexander E. Patrakov Reviewed-by: Sergey Senozhatsky Cc: Nitin Gupta Acked-by: Jerome Marchand Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 2e32baea46ce542c561a519414c840295b229c8f) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index efb6ff2a3735..1e14825bd02b 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -621,8 +621,10 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) memset(&zram->stats, 0, sizeof(zram->stats)); zram->disksize = 0; - if (reset_capacity) + if (reset_capacity) { set_capacity(zram->disk, 0); + revalidate_disk(zram->disk); + } up_write(&zram->init_lock); } @@ -663,6 +665,7 @@ static ssize_t disksize_store(struct device *dev, zram->comp = comp; zram->disksize = disksize; set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); + revalidate_disk(zram->disk); up_write(&zram->init_lock); return len; -- cgit v1.2.3 From d022fbe2a0a481cb0d0c41fcacc47d62c3a16581 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 23 Jul 2014 14:00:04 -0700 Subject: zram: avoid lockdep splat by revalidate_disk Sasha reported lockdep warning [1] introduced by [2]. It could be fixed by doing disk revalidation out of the init_lock. It's okay because disk capacity change is protected by init_lock so that revalidate_disk always sees up-to-date value so there is no race. [1] https://lkml.org/lkml/2014/7/3/735 [2] zram: revalidate disk after capacity change Fixes 2e32baea46ce ("zram: revalidate disk after capacity change"). Signed-off-by: Minchan Kim Reported-by: Sasha Levin Cc: "Alexander E. Patrakov" Cc: Nitin Gupta Cc: Jerome Marchand Cc: Sergey Senozhatsky CC: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit b4c5c60920e3b0c4598f43e7317559f6aec51531) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 1e14825bd02b..674b8517694d 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -621,11 +621,18 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) memset(&zram->stats, 0, sizeof(zram->stats)); zram->disksize = 0; - if (reset_capacity) { + if (reset_capacity) set_capacity(zram->disk, 0); - revalidate_disk(zram->disk); - } + up_write(&zram->init_lock); + + /* + * Revalidate disk out of the init_lock to avoid lockdep splat. + * It's okay because disk's capacity is protected by init_lock + * so that revalidate_disk always sees up-to-date capacity. + */ + if (reset_capacity) + revalidate_disk(zram->disk); } static ssize_t disksize_store(struct device *dev, @@ -665,8 +672,15 @@ static ssize_t disksize_store(struct device *dev, zram->comp = comp; zram->disksize = disksize; set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT); - revalidate_disk(zram->disk); up_write(&zram->init_lock); + + /* + * Revalidate disk out of the init_lock to avoid lockdep splat. + * It's okay because disk's capacity is protected by init_lock + * so that revalidate_disk always sees up-to-date capacity. + */ + revalidate_disk(zram->disk); + return len; out_destroy_comp: -- cgit v1.2.3 From e11cb6668f1a349250873337858a502e2ab73c8e Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 6 Aug 2014 16:08:25 -0700 Subject: zram: rename struct `table' to `zram_table_entry' Andrew Morton has recently noted that `struct table' actually represents table entry and, thus, should be renamed. Rename to `zram_table_entry'. Signed-off-by: Sergey Senozhatsky Cc: Minchan Kim Cc: Nitin Gupta Cc: Weijie Yang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit cb8f2eec3c5c87e31219c5e58625b8e890004e48) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 7f21c145e317..8909f86caf0d 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -62,7 +62,7 @@ enum zram_pageflags { /*-- Data structures */ /* Allocated for each disk page */ -struct table { +struct zram_table_entry { unsigned long handle; u16 size; /* object size (excluding header) */ u8 flags; @@ -82,7 +82,7 @@ struct zram_stats { struct zram_meta { rwlock_t tb_lock; /* protect table */ - struct table *table; + struct zram_table_entry *table; struct zs_pool *mem_pool; }; -- cgit v1.2.3 From 61febc60cc77c7e06ec7d457f21fc9ce9885f7e8 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Wed, 6 Aug 2014 16:08:27 -0700 Subject: zram: remove unused SECTOR_SIZE define Drop SECTOR_SIZE define, because it's not used. Signed-off-by: Sergey Senozhatsky Cc: Minchan Kim Cc: Nitin Gupta Cc: Weijie Yang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit a830eff749eb2bf906783f6bf74a74dad3de3aea) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.h | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 8909f86caf0d..c8161bd8969c 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -43,7 +43,6 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3; /*-- End of configurable params */ #define SECTOR_SHIFT 9 -#define SECTOR_SIZE (1 << SECTOR_SHIFT) #define SECTORS_PER_PAGE_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) #define SECTORS_PER_PAGE (1 << SECTORS_PER_PAGE_SHIFT) #define ZRAM_LOGICAL_BLOCK_SHIFT 12 -- cgit v1.2.3 From 244e41f94febd73d877a397a05e626563ebd2ceb Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 6 Aug 2014 16:08:29 -0700 Subject: zram: use size_t instead of u16 Some architectures (eg, hexagon and PowerPC) could use PAGE_SHIFT of 16 or more. In these cases u16 is not sufficiently large to represent a compressed page's size so use size_t. Signed-off-by: Minchan Kim Reported-by: Weijie Yang Acked-by: Sergey Senozhatsky Cc: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 023b409f9dac4cdea3322009f2e592068558690c) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 674b8517694d..efd9e4f1605c 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -336,7 +336,7 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) unsigned char *cmem; struct zram_meta *meta = zram->meta; unsigned long handle; - u16 size; + size_t size; read_lock(&meta->tb_lock); handle = meta->table[index].handle; -- cgit v1.2.3 From 3a4d2c69adaee7391fcdbc782913629df2e49726 Mon Sep 17 00:00:00 2001 From: Weijie Yang Date: Wed, 6 Aug 2014 16:08:31 -0700 Subject: zram: replace global tb_lock with fine grain lock Currently, we use a rwlock tb_lock to protect concurrent access to the whole zram meta table. However, according to the actual access model, there is only a small chance for upper user to access the same table[index], so the current lock granularity is too big. The idea of optimization is to change the lock granularity from whole meta table to per table entry (table -> table[index]), so that we can protect concurrent access to the same table[index], meanwhile allow the maximum concurrency. With this in mind, several kinds of locks which could be used as a per-entry lock were tested and compared: Test environment: x86-64 Intel Core2 Q8400, system memory 4GB, Ubuntu 12.04, kernel v3.15.0-rc3 as base, zram with 4 max_comp_streams LZO. iozone test: iozone -t 4 -R -r 16K -s 200M -I +Z (1GB zram with ext4 filesystem, take the average of 10 tests, KB/s) Test base CAS spinlock rwlock bit_spinlock ------------------------------------------------------------------- Initial write 1381094 1425435 1422860 1423075 1421521 Rewrite 1529479 1641199 1668762 1672855 1654910 Read 8468009 11324979 11305569 11117273 10997202 Re-read 8467476 11260914 11248059 11145336 10906486 Reverse Read 6821393 8106334 8282174 8279195 8109186 Stride read 7191093 8994306 9153982 8961224 9004434 Random read 7156353 8957932 9167098 8980465 8940476 Mixed workload 4172747 5680814 5927825 5489578 5972253 Random write 1483044 1605588 1594329 1600453 1596010 Pwrite 1276644 1303108 1311612 1314228 1300960 Pread 4324337 4632869 4618386 4457870 4500166 To enhance the possibility of access the same table[index] concurrently, set zram a small disksize(10MB) and let threads run with large loop count. fio test: fio --bs=32k --randrepeat=1 --randseed=100 --refill_buffers --scramble_buffers=1 --direct=1 --loops=3000 --numjobs=4 --filename=/dev/zram0 --name=seq-write --rw=write --stonewall --name=seq-read --rw=read --stonewall --name=seq-readwrite --rw=rw --stonewall --name=rand-readwrite --rw=randrw --stonewall (10MB zram raw block device, take the average of 10 tests, KB/s) Test base CAS spinlock rwlock bit_spinlock ------------------------------------------------------------- seq-write 933789 999357 1003298 995961 1001958 seq-read 5634130 6577930 6380861 6243912 6230006 seq-rw 1405687 1638117 1640256 1633903 1634459 rand-rw 1386119 1614664 1617211 1609267 1612471 All the optimization methods show a higher performance than the base, however, it is hard to say which method is the most appropriate. On the other hand, zram is mostly used on small embedded system, so we don't want to increase any memory footprint. This patch pick the bit_spinlock method, pack object size and page_flag into an unsigned long table.value, so as to not increase any memory overhead on both 32-bit and 64-bit system. On the third hand, even though different kinds of locks have different performances, we can ignore this difference, because: if zram is used as zram swapfile, the swap subsystem can prevent concurrent access to the same swapslot; if zram is used as zram-blk for set up filesystem on it, the upper filesystem and the page cache also prevent concurrent access of the same block mostly. So we can ignore the different performances among locks. Acked-by: Sergey Senozhatsky Reviewed-by: Davidlohr Bueso Signed-off-by: Weijie Yang Signed-off-by: Minchan Kim Cc: Jerome Marchand Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit d2d5e762c8990c4031890e03565983a05febd64a) Signed-off-by: Alex Shi Conflicts: drivers/block/zram/zram_drv.c Conflicts solution: using old bio struct --- drivers/block/zram/zram_drv.c | 69 ++++++++++++++++++++++++++----------------- drivers/block/zram/zram_drv.h | 24 +++++++++++---- 2 files changed, 60 insertions(+), 33 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index efd9e4f1605c..a21f466c4e15 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -183,19 +183,32 @@ static ssize_t comp_algorithm_store(struct device *dev, static int zram_test_flag(struct zram_meta *meta, u32 index, enum zram_pageflags flag) { - return meta->table[index].flags & BIT(flag); + return meta->table[index].value & BIT(flag); } static void zram_set_flag(struct zram_meta *meta, u32 index, enum zram_pageflags flag) { - meta->table[index].flags |= BIT(flag); + meta->table[index].value |= BIT(flag); } static void zram_clear_flag(struct zram_meta *meta, u32 index, enum zram_pageflags flag) { - meta->table[index].flags &= ~BIT(flag); + meta->table[index].value &= ~BIT(flag); +} + +static size_t zram_get_obj_size(struct zram_meta *meta, u32 index) +{ + return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1); +} + +static void zram_set_obj_size(struct zram_meta *meta, + u32 index, size_t size) +{ + unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT; + + meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size; } static inline int is_partial_io(struct bio_vec *bvec) @@ -254,7 +267,6 @@ static struct zram_meta *zram_meta_alloc(u64 disksize) goto free_table; } - rwlock_init(&meta->tb_lock); return meta; free_table: @@ -303,7 +315,12 @@ static void handle_zero_page(struct bio_vec *bvec) flush_dcache_page(page); } -/* NOTE: caller should hold meta->tb_lock with write-side */ + +/* + * To protect concurrent access to the same index entry, + * caller should hold this table index entry's bit_spinlock to + * indicate this index entry is accessing. + */ static void zram_free_page(struct zram *zram, size_t index) { struct zram_meta *meta = zram->meta; @@ -323,11 +340,12 @@ static void zram_free_page(struct zram *zram, size_t index) zs_free(meta->mem_pool, handle); - atomic64_sub(meta->table[index].size, &zram->stats.compr_data_size); + atomic64_sub(zram_get_obj_size(meta, index), + &zram->stats.compr_data_size); atomic64_dec(&zram->stats.pages_stored); meta->table[index].handle = 0; - meta->table[index].size = 0; + zram_set_obj_size(meta, index, 0); } static int zram_decompress_page(struct zram *zram, char *mem, u32 index) @@ -338,12 +356,12 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) unsigned long handle; size_t size; - read_lock(&meta->tb_lock); + bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); handle = meta->table[index].handle; - size = meta->table[index].size; + size = zram_get_obj_size(meta, index); if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) { - read_unlock(&meta->tb_lock); + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); clear_page(mem); return 0; } @@ -354,7 +372,7 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) else ret = zcomp_decompress(zram->comp, cmem, size, mem); zs_unmap_object(meta->mem_pool, handle); - read_unlock(&meta->tb_lock); + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); /* Should NEVER happen. Return bio error if it does. */ if (unlikely(ret)) { @@ -375,14 +393,14 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, struct zram_meta *meta = zram->meta; page = bvec->bv_page; - read_lock(&meta->tb_lock); + bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); if (unlikely(!meta->table[index].handle) || zram_test_flag(meta, index, ZRAM_ZERO)) { - read_unlock(&meta->tb_lock); + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); handle_zero_page(bvec); return 0; } - read_unlock(&meta->tb_lock); + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); if (is_partial_io(bvec)) /* Use a temporary buffer to decompress the page */ @@ -460,10 +478,10 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, if (page_zero_filled(uncmem)) { kunmap_atomic(user_mem); /* Free memory associated with this sector now. */ - write_lock(&zram->meta->tb_lock); + bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); zram_free_page(zram, index); zram_set_flag(meta, index, ZRAM_ZERO); - write_unlock(&zram->meta->tb_lock); + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); atomic64_inc(&zram->stats.zero_pages); ret = 0; @@ -513,12 +531,12 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, * Free memory associated with this sector * before overwriting unused sectors. */ - write_lock(&zram->meta->tb_lock); + bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); zram_free_page(zram, index); meta->table[index].handle = handle; - meta->table[index].size = clen; - write_unlock(&zram->meta->tb_lock); + zram_set_obj_size(meta, index, clen); + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); /* Update stats */ atomic64_add(clen, &zram->stats.compr_data_size); @@ -559,6 +577,7 @@ static void zram_bio_discard(struct zram *zram, u32 index, int offset, struct bio *bio) { size_t n = bio->bi_size; + struct zram_meta *meta = zram->meta; /* * zram manages data in physical block size units. Because logical block @@ -579,13 +598,9 @@ static void zram_bio_discard(struct zram *zram, u32 index, } while (n >= PAGE_SIZE) { - /* - * Discard request can be large so the lock hold times could be - * lengthy. So take the lock once per page. - */ - write_lock(&zram->meta->tb_lock); + bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); zram_free_page(zram, index); - write_unlock(&zram->meta->tb_lock); + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); index++; n -= PAGE_SIZE; } @@ -819,9 +834,9 @@ static void zram_slot_free_notify(struct block_device *bdev, zram = bdev->bd_disk->private_data; meta = zram->meta; - write_lock(&meta->tb_lock); + bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); zram_free_page(zram, index); - write_unlock(&meta->tb_lock); + bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); atomic64_inc(&zram->stats.notify_free); } diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index c8161bd8969c..5b0afde729cd 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -50,10 +50,24 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3; #define ZRAM_SECTOR_PER_LOGICAL_BLOCK \ (1 << (ZRAM_LOGICAL_BLOCK_SHIFT - SECTOR_SHIFT)) -/* Flags for zram pages (table[page_no].flags) */ + +/* + * The lower ZRAM_FLAG_SHIFT bits of table.value is for + * object size (excluding header), the higher bits is for + * zram_pageflags. + * + * zram is mainly used for memory efficiency so we want to keep memory + * footprint small so we can squeeze size and flags into a field. + * The lower ZRAM_FLAG_SHIFT bits is for object size (excluding header), + * the higher bits is for zram_pageflags. + */ +#define ZRAM_FLAG_SHIFT 24 + +/* Flags for zram pages (table[page_no].value) */ enum zram_pageflags { /* Page consists entirely of zeros */ - ZRAM_ZERO, + ZRAM_ZERO = ZRAM_FLAG_SHIFT + 1, + ZRAM_ACCESS, /* page in now accessed */ __NR_ZRAM_PAGEFLAGS, }; @@ -63,9 +77,8 @@ enum zram_pageflags { /* Allocated for each disk page */ struct zram_table_entry { unsigned long handle; - u16 size; /* object size (excluding header) */ - u8 flags; -} __aligned(4); + unsigned long value; +}; struct zram_stats { atomic64_t compr_data_size; /* compressed size of pages stored */ @@ -80,7 +93,6 @@ struct zram_stats { }; struct zram_meta { - rwlock_t tb_lock; /* protect table */ struct zram_table_entry *table; struct zs_pool *mem_pool; }; -- cgit v1.2.3 From 548233701e2be93471a703b490b6702c000d60bc Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 29 Aug 2014 15:18:37 -0700 Subject: zram: fix incorrect stat with failed_reads Since we allocate a temporary buffer in zram_bvec_read to handle partial page operations in commit 924bd88d703e ("Staging: zram: allow partial page operations"), our ->failed_reads value may be incorrect as we do not increase its value when failing to allocate the temporary buffer. Let's fix this issue and correct the annotation of failed_reads. Signed-off-by: Chao Yu Acked-by: Minchan Kim Cc: Nitin Gupta Acked-by: Jerome Marchand Acked-by: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 0cf1e9d6c34d4c82ac3af8015594849814843d36) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 10 +++++++--- drivers/block/zram/zram_drv.h | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index a21f466c4e15..600aeab83d2a 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -377,7 +377,6 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) /* Should NEVER happen. Return bio error if it does. */ if (unlikely(ret)) { pr_err("Decompression failed! err=%d, page=%u\n", ret, index); - atomic64_inc(&zram->stats.failed_reads); return ret; } @@ -546,8 +545,6 @@ out: zcomp_strm_release(zram->comp, zstrm); if (is_partial_io(bvec)) kfree(uncmem); - if (ret) - atomic64_inc(&zram->stats.failed_writes); return ret; } @@ -565,6 +562,13 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, ret = zram_bvec_write(zram, bvec, index, offset); } + if (unlikely(ret)) { + if (rw == READ) + atomic64_inc(&zram->stats.failed_reads); + else + atomic64_inc(&zram->stats.failed_writes); + } + return ret; } diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 5b0afde729cd..e0f725c87cc6 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -84,7 +84,7 @@ struct zram_stats { atomic64_t compr_data_size; /* compressed size of pages stored */ atomic64_t num_reads; /* failed + successful */ atomic64_t num_writes; /* --do-- */ - atomic64_t failed_reads; /* should NEVER! happen */ + atomic64_t failed_reads; /* can happen when memory is too low */ atomic64_t failed_writes; /* can happen when memory is too low */ atomic64_t invalid_io; /* non-page-aligned I/O requests */ atomic64_t notify_free; /* no. of swap slot free notifications */ -- cgit v1.2.3 From 70a5d237c239ddde3c055784980635e39bd93885 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 9 Oct 2014 15:29:50 -0700 Subject: zsmalloc: change return value unit of zs_get_total_size_bytes zs_get_total_size_bytes returns a amount of memory zsmalloc consumed with *byte unit* but zsmalloc operates *page unit* rather than byte unit so let's change the API so benefit we could get is that reduce unnecessary overhead (ie, change page unit with byte unit) in zsmalloc. Since return type is pages, "zs_get_total_pages" is better than "zs_get_total_size_bytes". Signed-off-by: Minchan Kim Reviewed-by: Dan Streetman Cc: Sergey Senozhatsky Cc: Jerome Marchand Cc: Cc: Cc: Luigi Semenzato Cc: Nitin Gupta Cc: Seth Jennings Cc: David Horner Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 722cdc17232f0f684011407f7cf3c40d39457971) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 600aeab83d2a..d0717743e2df 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -103,10 +103,10 @@ static ssize_t mem_used_total_show(struct device *dev, down_read(&zram->init_lock); if (init_done(zram)) - val = zs_get_total_size_bytes(meta->mem_pool); + val = zs_get_total_pages(meta->mem_pool); up_read(&zram->init_lock); - return scnprintf(buf, PAGE_SIZE, "%llu\n", val); + return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); } static ssize_t max_comp_streams_show(struct device *dev, -- cgit v1.2.3 From 3c854b64635888527504dfc898687ab10dad6191 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 9 Oct 2014 15:29:53 -0700 Subject: zram: zram memory size limitation Since zram has no control feature to limit memory usage, it makes hard to manage system memrory. This patch adds new knob "mem_limit" via sysfs to set up the a limit so that zram could fail allocation once it reaches the limit. In addition, user could change the limit in runtime so that he could manage the memory more dynamically. Initial state is no limit so it doesn't break old behavior. [akpm@linux-foundation.org: fix typo, per Sergey] Signed-off-by: Minchan Kim Cc: Dan Streetman Cc: Sergey Senozhatsky Cc: Jerome Marchand Cc: Cc: Cc: Luigi Semenzato Cc: Nitin Gupta Cc: Seth Jennings Cc: David Horner Cc: Joonsoo Kim Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 9ada9da9573f3460b156b7755c093e30b258eacb) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 45 +++++++++++++++++++++++++++++++++++++++++++ drivers/block/zram/zram_drv.h | 5 +++++ 2 files changed, 50 insertions(+) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index d0717743e2df..3f4da06c89c0 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -122,6 +122,37 @@ static ssize_t max_comp_streams_show(struct device *dev, return scnprintf(buf, PAGE_SIZE, "%d\n", val); } +static ssize_t mem_limit_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u64 val; + struct zram *zram = dev_to_zram(dev); + + down_read(&zram->init_lock); + val = zram->limit_pages; + up_read(&zram->init_lock); + + return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); +} + +static ssize_t mem_limit_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + u64 limit; + char *tmp; + struct zram *zram = dev_to_zram(dev); + + limit = memparse(buf, &tmp); + if (buf == tmp) /* no chars parsed, invalid input */ + return -EINVAL; + + down_write(&zram->init_lock); + zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT; + up_write(&zram->init_lock); + + return len; +} + static ssize_t max_comp_streams_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { @@ -512,6 +543,14 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, ret = -ENOMEM; goto out; } + + if (zram->limit_pages && + zs_get_total_pages(meta->mem_pool) > zram->limit_pages) { + zs_free(meta->mem_pool, handle); + ret = -ENOMEM; + goto out; + } + cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO); if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) { @@ -616,6 +655,9 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) struct zram_meta *meta; down_write(&zram->init_lock); + + zram->limit_pages = 0; + if (!init_done(zram)) { up_write(&zram->init_lock); return; @@ -855,6 +897,8 @@ static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL); static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store); static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL); static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL); +static DEVICE_ATTR(mem_limit, S_IRUGO | S_IWUSR, mem_limit_show, + mem_limit_store); static DEVICE_ATTR(max_comp_streams, S_IRUGO | S_IWUSR, max_comp_streams_show, max_comp_streams_store); static DEVICE_ATTR(comp_algorithm, S_IRUGO | S_IWUSR, @@ -883,6 +927,7 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_orig_data_size.attr, &dev_attr_compr_data_size.attr, &dev_attr_mem_used_total.attr, + &dev_attr_mem_limit.attr, &dev_attr_max_comp_streams.attr, &dev_attr_comp_algorithm.attr, NULL, diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index e0f725c87cc6..b7aa9c21553f 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -112,6 +112,11 @@ struct zram { u64 disksize; /* bytes */ int max_comp_streams; struct zram_stats stats; + /* + * the number of pages zram can consume for storing compressed data + */ + unsigned long limit_pages; + char compressor[10]; }; #endif -- cgit v1.2.3 From cda6b06454ce34ec784cc45c427eb4b0131581e1 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 9 Oct 2014 15:29:55 -0700 Subject: zram: report maximum used memory Normally, zram user could get maximum memory usage zram consumed via polling mem_used_total with sysfs in userspace. But it has a critical problem because user can miss peak memory usage during update inverval of polling. For avoiding that, user should poll it with shorter interval(ie, 0.0000000001s) with mlocking to avoid page fault delay when memory pressure is heavy. It would be troublesome. This patch adds new knob "mem_used_max" so user could see the maximum memory usage easily via reading the knob and reset it via "echo 0 > /sys/block/zram0/mem_used_max". Signed-off-by: Minchan Kim Reviewed-by: Dan Streetman Cc: Sergey Senozhatsky Cc: Jerome Marchand Cc: Cc: Cc: Luigi Semenzato Cc: Nitin Gupta Cc: Seth Jennings Reviewed-by: David Horner Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 461a8eee6af3b55745be64bea403ed0b743563cf) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 60 +++++++++++++++++++++++++++++++++++++++++-- drivers/block/zram/zram_drv.h | 1 + 2 files changed, 59 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 3f4da06c89c0..204b7fa9d78e 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -153,6 +153,41 @@ static ssize_t mem_limit_store(struct device *dev, return len; } +static ssize_t mem_used_max_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u64 val = 0; + struct zram *zram = dev_to_zram(dev); + + down_read(&zram->init_lock); + if (init_done(zram)) + val = atomic_long_read(&zram->stats.max_used_pages); + up_read(&zram->init_lock); + + return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); +} + +static ssize_t mem_used_max_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + int err; + unsigned long val; + struct zram *zram = dev_to_zram(dev); + struct zram_meta *meta = zram->meta; + + err = kstrtoul(buf, 10, &val); + if (err || val != 0) + return -EINVAL; + + down_read(&zram->init_lock); + if (init_done(zram)) + atomic_long_set(&zram->stats.max_used_pages, + zs_get_total_pages(meta->mem_pool)); + up_read(&zram->init_lock); + + return len; +} + static ssize_t max_comp_streams_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { @@ -464,6 +499,21 @@ out_cleanup: return ret; } +static inline void update_used_max(struct zram *zram, + const unsigned long pages) +{ + int old_max, cur_max; + + old_max = atomic_long_read(&zram->stats.max_used_pages); + + do { + cur_max = old_max; + if (pages > cur_max) + old_max = atomic_long_cmpxchg( + &zram->stats.max_used_pages, cur_max, pages); + } while (old_max != cur_max); +} + static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, int offset) { @@ -475,6 +525,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, struct zram_meta *meta = zram->meta; struct zcomp_strm *zstrm; bool locked = false; + unsigned long alloced_pages; page = bvec->bv_page; if (is_partial_io(bvec)) { @@ -544,13 +595,15 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, goto out; } - if (zram->limit_pages && - zs_get_total_pages(meta->mem_pool) > zram->limit_pages) { + alloced_pages = zs_get_total_pages(meta->mem_pool); + if (zram->limit_pages && alloced_pages > zram->limit_pages) { zs_free(meta->mem_pool, handle); ret = -ENOMEM; goto out; } + update_used_max(zram, alloced_pages); + cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO); if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) { @@ -899,6 +952,8 @@ static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL); static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL); static DEVICE_ATTR(mem_limit, S_IRUGO | S_IWUSR, mem_limit_show, mem_limit_store); +static DEVICE_ATTR(mem_used_max, S_IRUGO | S_IWUSR, mem_used_max_show, + mem_used_max_store); static DEVICE_ATTR(max_comp_streams, S_IRUGO | S_IWUSR, max_comp_streams_show, max_comp_streams_store); static DEVICE_ATTR(comp_algorithm, S_IRUGO | S_IWUSR, @@ -928,6 +983,7 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_compr_data_size.attr, &dev_attr_mem_used_total.attr, &dev_attr_mem_limit.attr, + &dev_attr_mem_used_max.attr, &dev_attr_max_comp_streams.attr, &dev_attr_comp_algorithm.attr, NULL, diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index b7aa9c21553f..c6ee271317f5 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -90,6 +90,7 @@ struct zram_stats { atomic64_t notify_free; /* no. of swap slot free notifications */ atomic64_t zero_pages; /* no. of zero filled pages */ atomic64_t pages_stored; /* no. of pages currently stored */ + atomic_long_t max_used_pages; /* no. of maximum pages stored */ }; struct zram_meta { -- cgit v1.2.3 From c85f1d31057bdde75716b19e6456a91c5c2273da Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 9 Oct 2014 15:29:57 -0700 Subject: zram: use notify_free to account all free notifications `notify_free' device attribute accounts the number of slot free notifications and internally represents the number of zram_free_page() calls. Slot free notifications are sent only when device is used as a swap device, hence `notify_free' is used only for swap devices. Since f4659d8e620d08 (zram: support REQ_DISCARD) ZRAM handles yet another one free notification (also via zram_free_page() call) -- REQ_DISCARD requests, which are sent by a filesystem, whenever some data blocks are discarded. However, there is no way to know the number of notifications in the latter case. Use `notify_free' to account the number of pages freed by zram_bio_discard() and zram_slot_free_notify(). Depending on usage scenario `notify_free' represents: a) the number of pages freed because of slot free notifications, which is equal to the number of swap_slot_free_notify() calls, so there is no behaviour change b) the number of pages freed because of REQ_DISCARD notifications Signed-off-by: Sergey Senozhatsky Acked-by: Minchan Kim Acked-by: Jerome Marchand Cc: Nitin Gupta Cc: Chao Yu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 015254daf1753003c19c46b90ee85a963260d270) Signed-off-by: Alex Shi Conflicts: Documentation/ABI/testing/sysfs-block-zram --- drivers/block/zram/zram_drv.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 204b7fa9d78e..3503019a9672 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -697,6 +697,7 @@ static void zram_bio_discard(struct zram *zram, u32 index, bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); zram_free_page(zram, index); bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); + atomic64_inc(&zram->stats.notify_free); index++; n -= PAGE_SIZE; } -- cgit v1.2.3 From ea56241654a2c7ef9a1122a7ad5c751527ed9b17 Mon Sep 17 00:00:00 2001 From: Weijie Yang Date: Wed, 29 Oct 2014 14:50:57 -0700 Subject: zram: avoid NULL pointer access in concurrent situation There is a rare NULL pointer bug in mem_used_total_show() and mem_used_max_store() in concurrent situation, like this: zram is not initialized, process A is a mem_used_total reader which runs periodically, while process B try to init zram. process A process B access meta, get a NULL value init zram, done init_done() is true access meta->mem_pool, get a NULL pointer BUG This patch fixes this issue. Signed-off-by: Weijie Yang Acked-by: Minchan Kim Acked-by: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 5a99e95b8d1cd47f6feddcdca6c71d22060df8a2) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 3503019a9672..bf9fea268db4 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -99,11 +99,12 @@ static ssize_t mem_used_total_show(struct device *dev, { u64 val = 0; struct zram *zram = dev_to_zram(dev); - struct zram_meta *meta = zram->meta; down_read(&zram->init_lock); - if (init_done(zram)) + if (init_done(zram)) { + struct zram_meta *meta = zram->meta; val = zs_get_total_pages(meta->mem_pool); + } up_read(&zram->init_lock); return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); @@ -173,16 +174,17 @@ static ssize_t mem_used_max_store(struct device *dev, int err; unsigned long val; struct zram *zram = dev_to_zram(dev); - struct zram_meta *meta = zram->meta; err = kstrtoul(buf, 10, &val); if (err || val != 0) return -EINVAL; down_read(&zram->init_lock); - if (init_done(zram)) + if (init_done(zram)) { + struct zram_meta *meta = zram->meta; atomic_long_set(&zram->stats.max_used_pages, zs_get_total_pages(meta->mem_pool)); + } up_read(&zram->init_lock); return len; -- cgit v1.2.3 From 7aecdc886e2e9bacfaf2377067072fa63d649b21 Mon Sep 17 00:00:00 2001 From: Weijie Yang Date: Thu, 13 Nov 2014 15:19:05 -0800 Subject: zram: avoid kunmap_atomic() of a NULL pointer zram could kunmap_atomic() a NULL pointer in a rare situation: a zram page becomes a full-zeroed page after a partial write io. The current code doesn't handle this case and performs kunmap_atomic() on a NULL pointer, which panics the kernel. This patch fixes this issue. Signed-off-by: Weijie Yang Cc: Sergey Senozhatsky Cc: Dan Streetman Cc: Nitin Gupta Cc: Weijie Yang Acked-by: Jerome Marchand Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit c406515239376fc93a30d5d03192182160cbd3fb) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index bf9fea268db4..8a1266ce8bbe 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -559,7 +559,8 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, } if (page_zero_filled(uncmem)) { - kunmap_atomic(user_mem); + if (user_mem) + kunmap_atomic(user_mem); /* Free memory associated with this sector now. */ bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); zram_free_page(zram, index); -- cgit v1.2.3 From 60acd81baccac53be98e07c140a0579a3607be9d Mon Sep 17 00:00:00 2001 From: Mahendran Ganesh Date: Fri, 12 Dec 2014 16:57:04 -0800 Subject: mm/zram: correct ZRAM_ZERO flag bit position In struct zram_table_entry, the element *value* contains obj size and obj zram flags. Bit 0 to bit (ZRAM_FLAG_SHIFT - 1) represent obj size, and bit ZRAM_FLAG_SHIFT to the highest bit of unsigned long represent obj zram_flags. So the first zram flag(ZRAM_ZERO) should be from ZRAM_FLAG_SHIFT instead of (ZRAM_FLAG_SHIFT + 1). This patch fixes this cosmetic issue. Also fix a typo, "page in now accessed" -> "page is now accessed" Signed-off-by: Mahendran Ganesh Acked-by: Minchan Kim Acked-by: Weijie Yang Acked-by: Sergey Senozhatsky Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit d49b1c254c997195872a9e8913660a788298921e) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index c6ee271317f5..b05a816b09ac 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -66,8 +66,8 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3; /* Flags for zram pages (table[page_no].value) */ enum zram_pageflags { /* Page consists entirely of zeros */ - ZRAM_ZERO = ZRAM_FLAG_SHIFT + 1, - ZRAM_ACCESS, /* page in now accessed */ + ZRAM_ZERO = ZRAM_FLAG_SHIFT, + ZRAM_ACCESS, /* page is now accessed */ __NR_ZRAM_PAGEFLAGS, }; -- cgit v1.2.3 From 837e91c8966924792bf5ba5ef9a62eaa55489ff7 Mon Sep 17 00:00:00 2001 From: Ganesh Mahendran Date: Thu, 12 Feb 2015 15:00:33 -0800 Subject: zram: free meta table in zram_meta_free zram_meta_alloc() and zram_meta_free() are a pair. In zram_meta_alloc(), meta table is allocated. So it it better to free it in zram_meta_free(). Signed-off-by: Ganesh Mahendran Acked-by: Minchan Kim Acked-by: Sergey Senozhatsky Cc: Nitin Gupta Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit 1fec117281d9f5349c35279c9521f4096fa33357) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 8a1266ce8bbe..3e075d65a158 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -308,8 +308,21 @@ static inline int valid_io_request(struct zram *zram, struct bio *bio) return 1; } -static void zram_meta_free(struct zram_meta *meta) +static void zram_meta_free(struct zram_meta *meta, u64 disksize) { + size_t num_pages = disksize >> PAGE_SHIFT; + size_t index; + + /* Free all pages that are still in this zram device */ + for (index = 0; index < num_pages; index++) { + unsigned long handle = meta->table[index].handle; + + if (!handle) + continue; + + zs_free(meta->mem_pool, handle); + } + zs_destroy_pool(meta->mem_pool); vfree(meta->table); kfree(meta); @@ -708,9 +721,6 @@ static void zram_bio_discard(struct zram *zram, u32 index, static void zram_reset_device(struct zram *zram, bool reset_capacity) { - size_t index; - struct zram_meta *meta; - down_write(&zram->init_lock); zram->limit_pages = 0; @@ -720,20 +730,9 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) return; } - meta = zram->meta; - /* Free all pages that are still in this zram device */ - for (index = 0; index < zram->disksize >> PAGE_SHIFT; index++) { - unsigned long handle = meta->table[index].handle; - if (!handle) - continue; - - zs_free(meta->mem_pool, handle); - } - zcomp_destroy(zram->comp); zram->max_comp_streams = 1; - - zram_meta_free(zram->meta); + zram_meta_free(zram->meta, zram->disksize); zram->meta = NULL; /* Reset stats */ memset(&zram->stats, 0, sizeof(zram->stats)); @@ -805,7 +804,7 @@ out_destroy_comp: up_write(&zram->init_lock); zcomp_destroy(comp); out_free_meta: - zram_meta_free(meta); + zram_meta_free(meta, disksize); return err; } -- cgit v1.2.3 From 043787104c5b1386a96c317f2e82ecff257dd9c5 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 12 Feb 2015 15:00:36 -0800 Subject: zram: fix umount-reset_store-mount race condition Ganesh Mahendran was the first one who proposed to use bdev->bd_mutex to avoid ->bd_holders race condition: CPU0 CPU1 umount /* zram->init_done is true */ reset_store() bdev->bd_holders == 0 mount ... zram_make_request() zram_reset_device() However, his solution required some considerable amount of code movement, which we can avoid. Apart from using bdev->bd_mutex in reset_store(), this patch also simplifies zram_reset_device(). zram_reset_device() has a bool parameter reset_capacity which tells it whether disk capacity and itself disk should be reset. There are two zram_reset_device() callers: -- zram_exit() passes reset_capacity=false -- reset_store() passes reset_capacity=true So we can move reset_capacity-sensitive work out of zram_reset_device() and perform it unconditionally in reset_store(). This also lets us drop reset_capacity parameter from zram_reset_device() and pass zram pointer only. Signed-off-by: Sergey Senozhatsky Reported-by: Ganesh Mahendran Cc: Minchan Kim Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds (cherry picked from commit ba6b17d68c8e3aa8d55d0474299cb931965c5ea5) Signed-off-by: Alex Shi --- drivers/block/zram/zram_drv.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) (limited to 'drivers') diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 3e075d65a158..45e2e85815ab 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -719,7 +719,7 @@ static void zram_bio_discard(struct zram *zram, u32 index, } } -static void zram_reset_device(struct zram *zram, bool reset_capacity) +static void zram_reset_device(struct zram *zram) { down_write(&zram->init_lock); @@ -738,18 +738,7 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) memset(&zram->stats, 0, sizeof(zram->stats)); zram->disksize = 0; - if (reset_capacity) - set_capacity(zram->disk, 0); - up_write(&zram->init_lock); - - /* - * Revalidate disk out of the init_lock to avoid lockdep splat. - * It's okay because disk's capacity is protected by init_lock - * so that revalidate_disk always sees up-to-date capacity. - */ - if (reset_capacity) - revalidate_disk(zram->disk); } static ssize_t disksize_store(struct device *dev, @@ -822,6 +811,7 @@ static ssize_t reset_store(struct device *dev, if (!bdev) return -ENOMEM; + mutex_lock(&bdev->bd_mutex); /* Do not reset an active device! */ if (bdev->bd_holders) { ret = -EBUSY; @@ -839,12 +829,17 @@ static ssize_t reset_store(struct device *dev, /* Make sure all pending I/O is finished */ fsync_bdev(bdev); + zram_reset_device(zram); + set_capacity(zram->disk, 0); + + mutex_unlock(&bdev->bd_mutex); + revalidate_disk(zram->disk); bdput(bdev); - zram_reset_device(zram, true); return len; out: + mutex_unlock(&bdev->bd_mutex); bdput(bdev); return ret; } @@ -1147,7 +1142,7 @@ static void __exit zram_exit(void) * Shouldn't access zram->disk after destroy_device * because destroy_device already released zram->disk. */ - zram_reset_device(zram, false); + zram_reset_device(zram); } unregister_blkdev(zram_major, "zram"); -- cgit v1.2.3