From 7c0ef28a2c9a768ffb63c1c3d9542b6e175ab260 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 3 Jul 2013 15:13:32 +0100 Subject: GFS2: Move gfs2_sync_meta to lops.c Since gfs2_sync_meta() is only called from a single file, lets move it to lops.c where it is used, and mark it static. At the same time, we can clean up the meta_io.h header too. Signed-off-by: Steven Whitehouse --- fs/gfs2/lops.c | 18 ++++++++++++++++++ fs/gfs2/meta_io.c | 18 ------------------ fs/gfs2/meta_io.h | 26 +++++++++++--------------- 3 files changed, 29 insertions(+), 33 deletions(-) diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 17c5b5d7dc8..010b9fb9fec 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -579,6 +579,24 @@ static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, return error; } +/** + * gfs2_meta_sync - Sync all buffers associated with a glock + * @gl: The glock + * + */ + +static void gfs2_meta_sync(struct gfs2_glock *gl) +{ + struct address_space *mapping = gfs2_glock2aspace(gl); + int error; + + filemap_fdatawrite(mapping); + error = filemap_fdatawait(mapping); + + if (error) + gfs2_io_error(gl->gl_sbd); +} + static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) { struct gfs2_inode *ip = GFS2_I(jd->jd_inode); diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 0da390686c0..93241505054 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -97,24 +97,6 @@ const struct address_space_operations gfs2_meta_aops = { .releasepage = gfs2_releasepage, }; -/** - * gfs2_meta_sync - Sync all buffers associated with a glock - * @gl: The glock - * - */ - -void gfs2_meta_sync(struct gfs2_glock *gl) -{ - struct address_space *mapping = gfs2_glock2aspace(gl); - int error; - - filemap_fdatawrite(mapping); - error = filemap_fdatawait(mapping); - - if (error) - gfs2_io_error(gl->gl_sbd); -} - /** * gfs2_getbuf - Get a buffer with a given address space * @gl: the glock diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h index 0d4c843b6f8..4823b934208 100644 --- a/fs/gfs2/meta_io.h +++ b/fs/gfs2/meta_io.h @@ -48,21 +48,17 @@ static inline struct gfs2_sbd *gfs2_mapping2sbd(struct address_space *mapping) return inode->i_sb->s_fs_info; } -void gfs2_meta_sync(struct gfs2_glock *gl); - -struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno); -int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, - int flags, struct buffer_head **bhp); -int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh); -struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create); - -void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, - int meta); - -void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen); - -int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num, - struct buffer_head **bhp); +extern struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno); +extern int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, + struct buffer_head **bhp); +extern int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh); +extern struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, + int create); +extern void gfs2_remove_from_journal(struct buffer_head *bh, + struct gfs2_trans *tr, int meta); +extern void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen); +extern int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num, + struct buffer_head **bhp); static inline int gfs2_meta_inode_buffer(struct gfs2_inode *ip, struct buffer_head **bhp) -- cgit v1.2.3 From 7286b31eaba6404fa92f68d04626da1f395b3916 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 20 Aug 2013 09:35:09 +0100 Subject: GFS2: Take glock reference in examine_bucket() We need to check the glock ref counter in a race free way in order to ensure that the gfs2_glock_hold() call will succeed. The easiest way to do that is to simply take the reference count early in the common code of examine_bucket, skipping any glocks with zero ref count. That means that the examiner functions all need to put their reference on the glock once they've performed their function. Signed-off-by: Steven Whitehouse Reported-by: David Teigland Tested-by: David Teigland --- fs/gfs2/glock.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 544a809819c..ce7078d5aa9 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1488,7 +1488,7 @@ static void examine_bucket(glock_examiner examiner, const struct gfs2_sbd *sdp, rcu_read_lock(); hlist_bl_for_each_entry_rcu(gl, pos, head, gl_list) { - if ((gl->gl_sbd == sdp) && atomic_read(&gl->gl_ref)) + if ((gl->gl_sbd == sdp) && atomic_inc_not_zero(&gl->gl_ref)) examiner(gl); } rcu_read_unlock(); @@ -1508,18 +1508,17 @@ static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) * thaw_glock - thaw out a glock which has an unprocessed reply waiting * @gl: The glock to thaw * - * N.B. When we freeze a glock, we leave a ref to the glock outstanding, - * so this has to result in the ref count being dropped by one. */ static void thaw_glock(struct gfs2_glock *gl) { if (!test_and_clear_bit(GLF_FROZEN, &gl->gl_flags)) - return; + goto out; set_bit(GLF_REPLY_PENDING, &gl->gl_flags); - gfs2_glock_hold(gl); - if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) + if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) { +out: gfs2_glock_put(gl); + } } /** @@ -1536,7 +1535,6 @@ static void clear_glock(struct gfs2_glock *gl) if (gl->gl_state != LM_ST_UNLOCKED) handle_callback(gl, LM_ST_UNLOCKED, 0, false); spin_unlock(&gl->gl_spin); - gfs2_glock_hold(gl); if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gfs2_glock_put(gl); } -- cgit v1.2.3 From 9d35814355e2baba9de523941e2ce52235359670 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 27 Aug 2013 21:22:07 +0100 Subject: GFS2: Merge ordered and writeback writepage The writepages function was recently merged between writeback and ordered mode. This completes the change by doing the same with writepage. The remaining differences in writepage were left over from some earlier time and not actually doing anything useful. Signed-off-by: Steven Whitehouse --- fs/gfs2/aops.c | 35 ++++------------------------------- 1 file changed, 4 insertions(+), 31 deletions(-) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index ee48ad37d9c..a9ea6f07774 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -122,14 +122,13 @@ out: } /** - * gfs2_writeback_writepage - Write page for writeback mappings + * gfs2_writepage - Write page for writeback mappings * @page: The page * @wbc: The writeback control * */ -static int gfs2_writeback_writepage(struct page *page, - struct writeback_control *wbc) +static int gfs2_writepage(struct page *page, struct writeback_control *wbc) { int ret; @@ -140,32 +139,6 @@ static int gfs2_writeback_writepage(struct page *page, return nobh_writepage(page, gfs2_get_block_noalloc, wbc); } -/** - * gfs2_ordered_writepage - Write page for ordered data files - * @page: The page to write - * @wbc: The writeback control - * - */ - -static int gfs2_ordered_writepage(struct page *page, - struct writeback_control *wbc) -{ - struct inode *inode = page->mapping->host; - struct gfs2_inode *ip = GFS2_I(inode); - int ret; - - ret = gfs2_writepage_common(page, wbc); - if (ret <= 0) - return ret; - - if (!page_has_buffers(page)) { - create_empty_buffers(page, inode->i_sb->s_blocksize, - (1 << BH_Dirty)|(1 << BH_Uptodate)); - } - gfs2_page_add_databufs(ip, page, 0, inode->i_sb->s_blocksize-1); - return block_write_full_page(page, gfs2_get_block_noalloc, wbc); -} - /** * __gfs2_jdata_writepage - The core of jdata writepage * @page: The page to write @@ -1107,7 +1080,7 @@ cannot_release: } static const struct address_space_operations gfs2_writeback_aops = { - .writepage = gfs2_writeback_writepage, + .writepage = gfs2_writepage, .writepages = gfs2_writepages, .readpage = gfs2_readpage, .readpages = gfs2_readpages, @@ -1123,7 +1096,7 @@ static const struct address_space_operations gfs2_writeback_aops = { }; static const struct address_space_operations gfs2_ordered_aops = { - .writepage = gfs2_ordered_writepage, + .writepage = gfs2_writepage, .writepages = gfs2_writepages, .readpage = gfs2_readpage, .readpages = gfs2_readpages, -- cgit v1.2.3 From 068213f7d3378d3e33d0f1b9415b2fdc3e9efa14 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 25 Jul 2013 09:53:49 -0400 Subject: GFS2: Remove unnecessary memory barrier Function test_and_clear_bit implies a memory barrier, so subsequent memory barriers are unnecessary. Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/glock.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index ce7078d5aa9..722329cac98 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1411,7 +1411,6 @@ __acquires(&lru_lock) if (demote_ok(gl)) handle_callback(gl, LM_ST_UNLOCKED, 0, false); WARN_ON(!test_and_clear_bit(GLF_LOCK, &gl->gl_flags)); - smp_mb__after_clear_bit(); if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) gfs2_glock_put_nolock(gl); spin_unlock(&gl->gl_spin); -- cgit v1.2.3 From 1d12d175ea3b56fdf2573bf6f168cce8f39b19e3 Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Wed, 4 Sep 2013 12:08:02 -0400 Subject: GFS2: Don't flag consistency error if first mounter is a spectator This patch checks for the first mounter being a specator. If so, it makes sure all the journals are clean. If there's a dirty journal, the mount fails. Testing results: # insmod gfs2.ko # mount -tgfs2 -o spectator /dev/sasdrives/scratch /mnt/gfs2 mount: permission denied # dmesg | tail -2 [ 3390.655996] GFS2: fsid=MUSKETEER:home: Now mounting FS... [ 3390.841336] GFS2: fsid=MUSKETEER:home.s: jid=0: Journal is dirty, so the first mounter must not be a spectator. # mount -tgfs2 /dev/sasdrives/scratch /mnt/gfs2 # umount /mnt/gfs2 # mount -tgfs2 -o spectator /dev/sasdrives/scratch /mnt/gfs2 # ls /mnt/gfs2|wc -l 352 # umount /mnt/gfs2 Signed-off-by: Bob Peterson Signed-off-by: Steven Whitehouse --- fs/gfs2/ops_fstype.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 51 insertions(+), 2 deletions(-) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 0262c190b6f..19ff5e8c285 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -646,6 +646,48 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) return error; } +/** + * check_journal_clean - Make sure a journal is clean for a spectator mount + * @sdp: The GFS2 superblock + * @jd: The journal descriptor + * + * Returns: 0 if the journal is clean or locked, else an error + */ +static int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd) +{ + int error; + struct gfs2_holder j_gh; + struct gfs2_log_header_host head; + struct gfs2_inode *ip; + + ip = GFS2_I(jd->jd_inode); + error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_NOEXP | + GL_EXACT | GL_NOCACHE, &j_gh); + if (error) { + fs_err(sdp, "Error locking journal for spectator mount.\n"); + return -EPERM; + } + error = gfs2_jdesc_check(jd); + if (error) { + fs_err(sdp, "Error checking journal for spectator mount.\n"); + goto out_unlock; + } + error = gfs2_find_jhead(jd, &head); + if (error) { + fs_err(sdp, "Error parsing journal for spectator mount.\n"); + goto out_unlock; + } + if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) { + error = -EPERM; + fs_err(sdp, "jid=%u: Journal is dirty, so the first mounter " + "must not be a spectator.\n", jd->jd_jid); + } + +out_unlock: + gfs2_glock_dq_uninit(&j_gh); + return error; +} + static int init_journal(struct gfs2_sbd *sdp, int undo) { struct inode *master = sdp->sd_master_dir->d_inode; @@ -732,8 +774,15 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) if (sdp->sd_lockstruct.ls_first) { unsigned int x; for (x = 0; x < sdp->sd_journals; x++) { - error = gfs2_recover_journal(gfs2_jdesc_find(sdp, x), - true); + struct gfs2_jdesc *jd = gfs2_jdesc_find(sdp, x); + + if (sdp->sd_args.ar_spectator) { + error = check_journal_clean(sdp, jd); + if (error) + goto fail_jinode_gh; + continue; + } + error = gfs2_recover_journal(jd, true); if (error) { fs_err(sdp, "error recovering journal %u: %d\n", x, error); -- cgit v1.2.3 From 0c9018097fe2966d80fe39e5c9ca94bb436ec369 Mon Sep 17 00:00:00 2001 From: Benjamin Marzinski Date: Tue, 3 Sep 2013 16:59:42 -0500 Subject: GFS2: dirty inode correctly in gfs2_write_end GFS2 was only setting I_DIRTY_DATASYNC on files that it wrote to, when it actually increased the file size. If gfs2_fsync was called without I_DIRTY_DATASYNC set, it didn't flush the incore data to the log before returning, so any metadata or journaled data changes were not getting fsynced. This meant that writes to the middle of files were not always getting fsynced properly. This patch makes gfs2 set I_DIRTY_DATASYNC whenever metadata has been updated during a write. It also make gfs2_sync flush the incore log if I_DIRTY_PAGES is set, and the file is using data journalling. This will make sure that all incore logged data gets written to disk before returning from a fsync. Signed-off-by: Benjamin Marzinski Signed-off-by: Steven Whitehouse --- fs/gfs2/aops.c | 9 +++++++-- fs/gfs2/file.c | 4 +++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index a9ea6f07774..1f7d8057ea6 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -815,6 +815,8 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, unsigned int from = pos & (PAGE_CACHE_SIZE - 1); unsigned int to = from + len; int ret; + struct gfs2_trans *tr = current->journal_info; + BUG_ON(!tr); BUG_ON(gfs2_glock_is_locked_by_me(ip->i_gl) == NULL); @@ -825,8 +827,6 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, goto failed; } - gfs2_trans_add_meta(ip->i_gl, dibh); - if (gfs2_is_stuffed(ip)) return gfs2_stuffed_write_end(inode, dibh, pos, len, copied, page); @@ -834,6 +834,11 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, gfs2_page_add_databufs(ip, page, from, to); ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); + if (tr->tr_num_buf_new) + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); + else + gfs2_trans_add_meta(ip->i_gl, dibh); + if (inode == sdp->sd_rindex) { adjust_fs_space(inode); diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 72c3866a732..0621b46d474 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -650,7 +650,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, { struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; - int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC); + int sync_state = inode->i_state & I_DIRTY; struct gfs2_inode *ip = GFS2_I(inode); int ret = 0, ret1 = 0; @@ -660,6 +660,8 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, return ret1; } + if (!gfs2_is_jdata(ip)) + sync_state &= ~I_DIRTY_PAGES; if (datasync) sync_state &= ~I_DIRTY_SYNC; -- cgit v1.2.3