From 063c4561f52a74de686fe0ff2f96f4f54c9fecd2 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 3 Jul 2007 13:34:11 -0700 Subject: ocfs2: support for removing file regions Provide an internal interface for the removal of arbitrary file regions. ocfs2_remove_inode_range() takes a byte range within a file and will remove existing extents within that range. Partial clusters will be zeroed so that any read from within the region will return zeros. Signed-off-by: Mark Fasheh --- fs/ocfs2/alloc.c | 20 ++--- fs/ocfs2/alloc.h | 10 +++ fs/ocfs2/file.c | 242 ++++++++++++++++++++++++++++++++++++++++++++++++++++- fs/ocfs2/journal.h | 2 + 4 files changed, 262 insertions(+), 12 deletions(-) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index df186d2e8248..f5e11f4fa952 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -4373,10 +4373,10 @@ out: return ret; } -static int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh, - u32 cpos, u32 len, handle_t *handle, - struct ocfs2_alloc_context *meta_ac, - struct ocfs2_cached_dealloc_ctxt *dealloc) +int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh, + u32 cpos, u32 len, handle_t *handle, + struct ocfs2_alloc_context *meta_ac, + struct ocfs2_cached_dealloc_ctxt *dealloc) { int ret, index; u32 rec_range, trunc_range; @@ -4506,7 +4506,7 @@ out: return ret; } -static inline int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb) +int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb) { struct buffer_head *tl_bh = osb->osb_tl_bh; struct ocfs2_dinode *di; @@ -4539,10 +4539,10 @@ static int ocfs2_truncate_log_can_coalesce(struct ocfs2_truncate_log *tl, return current_tail == new_start; } -static int ocfs2_truncate_log_append(struct ocfs2_super *osb, - handle_t *handle, - u64 start_blk, - unsigned int num_clusters) +int ocfs2_truncate_log_append(struct ocfs2_super *osb, + handle_t *handle, + u64 start_blk, + unsigned int num_clusters) { int status, index; unsigned int start_cluster, tl_count; @@ -4698,7 +4698,7 @@ bail: } /* Expects you to already be holding tl_inode->i_mutex */ -static int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) +int __ocfs2_flush_truncate_log(struct ocfs2_super *osb) { int status; unsigned int num_to_flush; diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index 752ef860873d..990df48ae8d3 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h @@ -41,6 +41,10 @@ int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh, handle_t *handle, u32 cpos, u32 len, u32 phys, struct ocfs2_alloc_context *meta_ac, struct ocfs2_cached_dealloc_ctxt *dealloc); +int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh, + u32 cpos, u32 len, handle_t *handle, + struct ocfs2_alloc_context *meta_ac, + struct ocfs2_cached_dealloc_ctxt *dealloc); int ocfs2_num_free_extents(struct ocfs2_super *osb, struct inode *inode, struct ocfs2_dinode *fe); @@ -68,6 +72,12 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb, struct ocfs2_dinode **tl_copy); int ocfs2_complete_truncate_log_recovery(struct ocfs2_super *osb, struct ocfs2_dinode *tl_copy); +int ocfs2_truncate_log_needs_flush(struct ocfs2_super *osb); +int ocfs2_truncate_log_append(struct ocfs2_super *osb, + handle_t *handle, + u64 start_blk, + unsigned int num_clusters); +int __ocfs2_flush_truncate_log(struct ocfs2_super *osb); /* * Process local structure which describes the block unlinks done diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index f0a6b1330a6e..11f7cf9f2511 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -541,12 +541,15 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, struct ocfs2_alloc_context **data_ac, struct ocfs2_alloc_context **meta_ac) { - int ret, num_free_extents; + int ret = 0, num_free_extents; unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); *meta_ac = NULL; - *data_ac = NULL; + if (data_ac) + *data_ac = NULL; + + BUG_ON(clusters_to_add != 0 && data_ac == NULL); mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, " "clusters_to_add = %u, extents_to_split = %u\n", @@ -583,6 +586,9 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, } } + if (clusters_to_add == 0) + goto out; + ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac); if (ret < 0) { if (ret != -ENOSPC) @@ -1252,6 +1258,238 @@ out: return ret; } +static int __ocfs2_remove_inode_range(struct inode *inode, + struct buffer_head *di_bh, + u32 cpos, u32 phys_cpos, u32 len, + struct ocfs2_cached_dealloc_ctxt *dealloc) +{ + int ret; + u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct inode *tl_inode = osb->osb_tl_inode; + handle_t *handle; + struct ocfs2_alloc_context *meta_ac = NULL; + struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; + + ret = ocfs2_lock_allocators(inode, di, 0, 1, NULL, &meta_ac); + if (ret) { + mlog_errno(ret); + return ret; + } + + mutex_lock(&tl_inode->i_mutex); + + if (ocfs2_truncate_log_needs_flush(osb)) { + ret = __ocfs2_flush_truncate_log(osb); + if (ret < 0) { + mlog_errno(ret); + goto out; + } + } + + handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); + if (handle == NULL) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + + ret = ocfs2_journal_access(handle, inode, di_bh, + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out; + } + + ret = ocfs2_remove_extent(inode, di_bh, cpos, len, handle, meta_ac, + dealloc); + if (ret) { + mlog_errno(ret); + goto out_commit; + } + + OCFS2_I(inode)->ip_clusters -= len; + di->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters); + + ret = ocfs2_journal_dirty(handle, di_bh); + if (ret) { + mlog_errno(ret); + goto out_commit; + } + + ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len); + if (ret) + mlog_errno(ret); + +out_commit: + ocfs2_commit_trans(osb, handle); +out: + mutex_unlock(&tl_inode->i_mutex); + + if (meta_ac) + ocfs2_free_alloc_context(meta_ac); + + return ret; +} + +/* + * Truncate a byte range, avoiding pages within partial clusters. This + * preserves those pages for the zeroing code to write to. + */ +static void ocfs2_truncate_cluster_pages(struct inode *inode, u64 byte_start, + u64 byte_len) +{ + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + loff_t start, end; + struct address_space *mapping = inode->i_mapping; + + start = (loff_t)ocfs2_align_bytes_to_clusters(inode->i_sb, byte_start); + end = byte_start + byte_len; + end = end & ~(osb->s_clustersize - 1); + + if (start < end) { + unmap_mapping_range(mapping, start, end - start, 0); + truncate_inode_pages_range(mapping, start, end - 1); + } +} + +static int ocfs2_zero_partial_clusters(struct inode *inode, + u64 start, u64 len) +{ + int ret = 0; + u64 tmpend, end = start + len; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + unsigned int csize = osb->s_clustersize; + handle_t *handle; + + /* + * The "start" and "end" values are NOT necessarily part of + * the range whose allocation is being deleted. Rather, this + * is what the user passed in with the request. We must zero + * partial clusters here. There's no need to worry about + * physical allocation - the zeroing code knows to skip holes. + */ + mlog(0, "byte start: %llu, end: %llu\n", + (unsigned long long)start, (unsigned long long)end); + + /* + * If both edges are on a cluster boundary then there's no + * zeroing required as the region is part of the allocation to + * be truncated. + */ + if ((start & (csize - 1)) == 0 && (end & (csize - 1)) == 0) + goto out; + + handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); + if (handle == NULL) { + ret = -ENOMEM; + mlog_errno(ret); + goto out; + } + + /* + * We want to get the byte offset of the end of the 1st cluster. + */ + tmpend = (u64)osb->s_clustersize + (start & ~(osb->s_clustersize - 1)); + if (tmpend > end) + tmpend = end; + + mlog(0, "1st range: start: %llu, tmpend: %llu\n", + (unsigned long long)start, (unsigned long long)tmpend); + + ret = ocfs2_zero_range_for_truncate(inode, handle, start, tmpend); + if (ret) + mlog_errno(ret); + + if (tmpend < end) { + /* + * This may make start and end equal, but the zeroing + * code will skip any work in that case so there's no + * need to catch it up here. + */ + start = end & ~(osb->s_clustersize - 1); + + mlog(0, "2nd range: start: %llu, end: %llu\n", + (unsigned long long)start, (unsigned long long)end); + + ret = ocfs2_zero_range_for_truncate(inode, handle, start, end); + if (ret) + mlog_errno(ret); + } + + ocfs2_commit_trans(osb, handle); +out: + return ret; +} + +static int ocfs2_remove_inode_range(struct inode *inode, + struct buffer_head *di_bh, u64 byte_start, + u64 byte_len) +{ + int ret = 0; + u32 trunc_start, trunc_len, cpos, phys_cpos, alloc_size; + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); + struct ocfs2_cached_dealloc_ctxt dealloc; + + ocfs2_init_dealloc_ctxt(&dealloc); + + if (byte_len == 0) + return 0; + + trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start); + trunc_len = (byte_start + byte_len) >> osb->s_clustersize_bits; + if (trunc_len >= trunc_start) + trunc_len -= trunc_start; + else + trunc_len = 0; + + mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, clen: %u\n", + (unsigned long long)OCFS2_I(inode)->ip_blkno, + (unsigned long long)byte_start, + (unsigned long long)byte_len, trunc_start, trunc_len); + + ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len); + if (ret) { + mlog_errno(ret); + goto out; + } + + cpos = trunc_start; + while (trunc_len) { + ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, + &alloc_size, NULL); + if (ret) { + mlog_errno(ret); + goto out; + } + + if (alloc_size > trunc_len) + alloc_size = trunc_len; + + /* Only do work for non-holes */ + if (phys_cpos != 0) { + ret = __ocfs2_remove_inode_range(inode, di_bh, cpos, + phys_cpos, alloc_size, + &dealloc); + if (ret) { + mlog_errno(ret); + goto out; + } + } + + cpos += alloc_size; + trunc_len -= alloc_size; + } + + ocfs2_truncate_cluster_pages(inode, byte_start, byte_len); + +out: + ocfs2_schedule_truncate_log_flush(osb, 1); + ocfs2_run_deallocs(osb, &dealloc); + + return ret; +} + static int ocfs2_prepare_inode_for_write(struct dentry *dentry, loff_t *ppos, size_t count, diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 3db5de4506da..ce60aab013aa 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -289,6 +289,8 @@ int ocfs2_journal_dirty_data(handle_t *handle, #define OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC (OCFS2_SUBALLOC_FREE \ + OCFS2_TRUNCATE_LOG_UPDATE) +#define OCFS2_REMOVE_EXTENT_CREDITS (OCFS2_TRUNCATE_LOG_UPDATE + OCFS2_INODE_UPDATE_CREDITS) + /* data block for new dir/symlink, 2 for bitmap updates (bitmap fe + * bitmap block for the new bit) */ #define OCFS2_DIR_LINK_ADDITIONAL_CREDITS (1 + 2) -- cgit v1.2.3