aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/ext4/ext4.h7
-rw-r--r--fs/ext4/extents_status.c156
-rw-r--r--fs/ext4/extents_status.h5
-rw-r--r--fs/ext4/super.c7
-rw-r--r--include/trace/events/ext4.h60
5 files changed, 235 insertions, 0 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0c565c941f7..6e16c186795 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -888,6 +888,8 @@ struct ext4_inode_info {
/* extents status tree */
struct ext4_es_tree i_es_tree;
rwlock_t i_es_lock;
+ struct list_head i_es_lru;
+ unsigned int i_es_lru_nr; /* protected by i_es_lock */
/* ialloc */
ext4_group_t i_last_alloc_group;
@@ -1303,6 +1305,11 @@ struct ext4_sb_info {
/* Precomputed FS UUID checksum for seeding other checksums */
__u32 s_csum_seed;
+
+ /* Reclaim extents from extent status tree */
+ struct shrinker s_es_shrinker;
+ struct list_head s_es_lru;
+ spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp;
};
static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index cce152c3c8d..9f1380e0547 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -145,6 +145,9 @@ static struct kmem_cache *ext4_es_cachep;
static int __es_insert_extent(struct inode *inode, struct extent_status *newes);
static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t end);
+static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
+ int nr_to_scan);
+static int ext4_es_reclaim_extents_count(struct super_block *sb);
int __init ext4_init_es(void)
{
@@ -280,6 +283,7 @@ out:
read_unlock(&EXT4_I(inode)->i_es_lock);
+ ext4_es_lru_add(inode);
trace_ext4_es_find_delayed_extent_exit(inode, es);
}
@@ -294,11 +298,24 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
es->es_lblk = lblk;
es->es_len = len;
es->es_pblk = pblk;
+
+ /*
+ * We don't count delayed extent because we never try to reclaim them
+ */
+ if (!ext4_es_is_delayed(es))
+ EXT4_I(inode)->i_es_lru_nr++;
+
return es;
}
static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
{
+ /* Decrease the lru counter when this es is not delayed */
+ if (!ext4_es_is_delayed(es)) {
+ BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0);
+ EXT4_I(inode)->i_es_lru_nr--;
+ }
+
kmem_cache_free(ext4_es_cachep, es);
}
@@ -456,6 +473,7 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
error:
write_unlock(&EXT4_I(inode)->i_es_lock);
+ ext4_es_lru_add(inode);
ext4_es_print_tree(inode);
return err;
@@ -517,6 +535,7 @@ out:
read_unlock(&EXT4_I(inode)->i_es_lock);
+ ext4_es_lru_add(inode);
trace_ext4_es_lookup_extent_exit(inode, es, found);
return found;
}
@@ -639,3 +658,140 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_es_print_tree(inode);
return err;
}
+
+static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
+{
+ struct ext4_sb_info *sbi = container_of(shrink,
+ struct ext4_sb_info, s_es_shrinker);
+ struct ext4_inode_info *ei;
+ struct list_head *cur, *tmp, scanned;
+ int nr_to_scan = sc->nr_to_scan;
+ int ret, nr_shrunk = 0;
+
+ trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan);
+
+ if (!nr_to_scan)
+ return ext4_es_reclaim_extents_count(sbi->s_sb);
+
+ INIT_LIST_HEAD(&scanned);
+
+ spin_lock(&sbi->s_es_lru_lock);
+ list_for_each_safe(cur, tmp, &sbi->s_es_lru) {
+ list_move_tail(cur, &scanned);
+
+ ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
+
+ read_lock(&ei->i_es_lock);
+ if (ei->i_es_lru_nr == 0) {
+ read_unlock(&ei->i_es_lock);
+ continue;
+ }
+ read_unlock(&ei->i_es_lock);
+
+ write_lock(&ei->i_es_lock);
+ ret = __es_try_to_reclaim_extents(ei, nr_to_scan);
+ write_unlock(&ei->i_es_lock);
+
+ nr_shrunk += ret;
+ nr_to_scan -= ret;
+ if (nr_to_scan == 0)
+ break;
+ }
+ list_splice_tail(&scanned, &sbi->s_es_lru);
+ spin_unlock(&sbi->s_es_lru_lock);
+ trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk);
+
+ return ext4_es_reclaim_extents_count(sbi->s_sb);
+}
+
+void ext4_es_register_shrinker(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi;
+
+ sbi = EXT4_SB(sb);
+ INIT_LIST_HEAD(&sbi->s_es_lru);
+ spin_lock_init(&sbi->s_es_lru_lock);
+ sbi->s_es_shrinker.shrink = ext4_es_shrink;
+ sbi->s_es_shrinker.seeks = DEFAULT_SEEKS;
+ register_shrinker(&sbi->s_es_shrinker);
+}
+
+void ext4_es_unregister_shrinker(struct super_block *sb)
+{
+ unregister_shrinker(&EXT4_SB(sb)->s_es_shrinker);
+}
+
+void ext4_es_lru_add(struct inode *inode)
+{
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+
+ spin_lock(&sbi->s_es_lru_lock);
+ if (list_empty(&ei->i_es_lru))
+ list_add_tail(&ei->i_es_lru, &sbi->s_es_lru);
+ else
+ list_move_tail(&ei->i_es_lru, &sbi->s_es_lru);
+ spin_unlock(&sbi->s_es_lru_lock);
+}
+
+void ext4_es_lru_del(struct inode *inode)
+{
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+
+ spin_lock(&sbi->s_es_lru_lock);
+ if (!list_empty(&ei->i_es_lru))
+ list_del_init(&ei->i_es_lru);
+ spin_unlock(&sbi->s_es_lru_lock);
+}
+
+static int ext4_es_reclaim_extents_count(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_inode_info *ei;
+ struct list_head *cur;
+ int nr_cached = 0;
+
+ spin_lock(&sbi->s_es_lru_lock);
+ list_for_each(cur, &sbi->s_es_lru) {
+ ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
+ read_lock(&ei->i_es_lock);
+ nr_cached += ei->i_es_lru_nr;
+ read_unlock(&ei->i_es_lock);
+ }
+ spin_unlock(&sbi->s_es_lru_lock);
+ trace_ext4_es_reclaim_extents_count(sb, nr_cached);
+ return nr_cached;
+}
+
+static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
+ int nr_to_scan)
+{
+ struct inode *inode = &ei->vfs_inode;
+ struct ext4_es_tree *tree = &ei->i_es_tree;
+ struct rb_node *node;
+ struct extent_status *es;
+ int nr_shrunk = 0;
+
+ if (ei->i_es_lru_nr == 0)
+ return 0;
+
+ node = rb_first(&tree->root);
+ while (node != NULL) {
+ es = rb_entry(node, struct extent_status, rb_node);
+ node = rb_next(&es->rb_node);
+ /*
+ * We can't reclaim delayed extent from status tree because
+ * fiemap, bigallic, and seek_data/hole need to use it.
+ */
+ if (!ext4_es_is_delayed(es)) {
+ rb_erase(&es->rb_node, &tree->root);
+ ext4_es_free_extent(inode, es);
+ nr_shrunk++;
+ if (--nr_to_scan == 0)
+ break;
+ }
+ }
+ tree->cache_es = NULL;
+ return nr_shrunk;
+}
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index 8ffc90c784f..cf83e77b16c 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -106,4 +106,9 @@ static inline void ext4_es_store_status(struct extent_status *es,
es->es_pblk = block;
}
+extern void ext4_es_register_shrinker(struct super_block *sb);
+extern void ext4_es_unregister_shrinker(struct super_block *sb);
+extern void ext4_es_lru_add(struct inode *inode);
+extern void ext4_es_lru_del(struct inode *inode);
+
#endif /* _EXT4_EXTENTS_STATUS_H */
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d80bfe5ac11..373d46cd5d3 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -755,6 +755,7 @@ static void ext4_put_super(struct super_block *sb)
ext4_abort(sb, "Couldn't clean up the journal");
}
+ ext4_es_unregister_shrinker(sb);
del_timer(&sbi->s_err_report);
ext4_release_system_zone(sb);
ext4_mb_release(sb);
@@ -840,6 +841,8 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
spin_lock_init(&ei->i_prealloc_lock);
ext4_es_init_tree(&ei->i_es_tree);
rwlock_init(&ei->i_es_lock);
+ INIT_LIST_HEAD(&ei->i_es_lru);
+ ei->i_es_lru_nr = 0;
ei->i_reserved_data_blocks = 0;
ei->i_reserved_meta_blocks = 0;
ei->i_allocated_meta_blocks = 0;
@@ -928,6 +931,7 @@ void ext4_clear_inode(struct inode *inode)
dquot_drop(inode);
ext4_discard_preallocations(inode);
ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
+ ext4_es_lru_del(inode);
if (EXT4_I(inode)->jinode) {
jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
EXT4_I(inode)->jinode);
@@ -3693,6 +3697,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_max_writeback_mb_bump = 128;
sbi->s_extent_max_zeroout_kb = 32;
+ /* Register extent status tree shrinker */
+ ext4_es_register_shrinker(sb);
+
/*
* set up enough so that it can read an inode
*/
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 1e590b68cec..c0457c0d1a6 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -2255,6 +2255,66 @@ TRACE_EVENT(ext4_es_lookup_extent_exit,
__entry->found ? __entry->status : 0)
);
+TRACE_EVENT(ext4_es_reclaim_extents_count,
+ TP_PROTO(struct super_block *sb, int nr_cached),
+
+ TP_ARGS(sb, nr_cached),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( int, nr_cached )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = sb->s_dev;
+ __entry->nr_cached = nr_cached;
+ ),
+
+ TP_printk("dev %d,%d cached objects nr %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->nr_cached)
+);
+
+TRACE_EVENT(ext4_es_shrink_enter,
+ TP_PROTO(struct super_block *sb, int nr_to_scan),
+
+ TP_ARGS(sb, nr_to_scan),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( int, nr_to_scan )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = sb->s_dev;
+ __entry->nr_to_scan = nr_to_scan;
+ ),
+
+ TP_printk("dev %d,%d nr to scan %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->nr_to_scan)
+);
+
+TRACE_EVENT(ext4_es_shrink_exit,
+ TP_PROTO(struct super_block *sb, int shrunk_nr),
+
+ TP_ARGS(sb, shrunk_nr),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( int, shrunk_nr )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = sb->s_dev;
+ __entry->shrunk_nr = shrunk_nr;
+ ),
+
+ TP_printk("dev %d,%d nr to scan %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->shrunk_nr)
+);
+
#endif /* _TRACE_EXT4_H */
/* This part must be outside protection */