From a99ebf43f49f4499ab0e2a8a9132ad6ed6ba2409 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 1 Dec 2011 11:24:20 +0000 Subject: xfs: fix allocation length overflow in xfs_bmapi_write() When testing the new xfstests --large-fs option that does very large file preallocations, this assert was tripped deep in xfs_alloc_vextent(): XFS: Assertion failed: args->minlen <= args->maxlen, file: fs/xfs/xfs_alloc.c, line: 2239 The allocation was trying to allocate a zero length extent because the lower 32 bits of the allocation length was zero. The remaining length of the allocation to be done was an exact multiple of 2^32 - the first case I saw was at 496TB remaining to be allocated. This turns out to be an overflow when converting the allocation length (a 64 bit quantity) into the extent length to allocate (a 32 bit quantity), and it requires the length to be allocated an exact multiple of 2^32 blocks to trip the assert. Fix it by limiting the extent lenth to allocate to MAXEXTLEN. Signed-off-by: Dave Chinner Signed-off-by: Ben Myers Reviewed-by: Christoph Hellwig --- fs/xfs/xfs_bmap.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index c68baeb0974..d0ab7883705 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -2383,6 +2383,8 @@ xfs_bmap_btalloc( int tryagain; int error; + ASSERT(ap->length); + mp = ap->ip->i_mount; align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0; if (unlikely(align)) { @@ -4629,6 +4631,8 @@ xfs_bmapi_allocate( int error; int rt; + ASSERT(bma->length > 0); + rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(bma->ip); /* @@ -4849,6 +4853,7 @@ xfs_bmapi_write( ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); ASSERT(!(flags & XFS_BMAPI_IGSTATE)); ASSERT(tp != NULL); + ASSERT(len > 0); whichfork = (flags & XFS_BMAPI_ATTRFORK) ? XFS_ATTR_FORK : XFS_DATA_FORK; @@ -4918,9 +4923,22 @@ xfs_bmapi_write( bma.eof = eof; bma.conv = !!(flags & XFS_BMAPI_CONVERT); bma.wasdel = wasdelay; - bma.length = len; bma.offset = bno; + /* + * There's a 32/64 bit type mismatch between the + * allocation length request (which can be 64 bits in + * length) and the bma length request, which is + * xfs_extlen_t and therefore 32 bits. Hence we have to + * check for 32-bit overflows and handle them here. + */ + if (len > (xfs_filblks_t)MAXEXTLEN) + bma.length = MAXEXTLEN; + else + bma.length = len; + + ASSERT(len > 0); + ASSERT(bma.length > 0); error = xfs_bmapi_allocate(&bma, flags); if (error) goto error0; -- cgit v1.2.3 From c29f7d457ac63311feb11928a866efd2fe153d74 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 30 Nov 2011 08:58:18 +0000 Subject: xfs: fix nfs export of 64-bit inodes numbers on 32-bit kernels The i_ino field in the VFS inode is of type unsigned long and thus can't hold the full 64-bit inode number on 32-bit kernels. We have the full inode number in the XFS inode, so use that one for nfs exports. Note that I've also switched the 32-bit file handles types to it, just to make the code more consistent and copy & paste errors less likely to happen. Reported-by: Guoquan Yang Reported-by: Hank Peng Signed-off-by: Christoph Hellwig Signed-off-by: Ben Myers --- fs/xfs/xfs_export.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index da108977b21..558910f5e3c 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c @@ -98,22 +98,22 @@ xfs_fs_encode_fh( switch (fileid_type) { case FILEID_INO32_GEN_PARENT: spin_lock(&dentry->d_lock); - fid->i32.parent_ino = dentry->d_parent->d_inode->i_ino; + fid->i32.parent_ino = XFS_I(dentry->d_parent->d_inode)->i_ino; fid->i32.parent_gen = dentry->d_parent->d_inode->i_generation; spin_unlock(&dentry->d_lock); /*FALLTHRU*/ case FILEID_INO32_GEN: - fid->i32.ino = inode->i_ino; + fid->i32.ino = XFS_I(inode)->i_ino; fid->i32.gen = inode->i_generation; break; case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: spin_lock(&dentry->d_lock); - fid64->parent_ino = dentry->d_parent->d_inode->i_ino; + fid64->parent_ino = XFS_I(dentry->d_parent->d_inode)->i_ino; fid64->parent_gen = dentry->d_parent->d_inode->i_generation; spin_unlock(&dentry->d_lock); /*FALLTHRU*/ case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG: - fid64->ino = inode->i_ino; + fid64->ino = XFS_I(inode)->i_ino; fid64->gen = inode->i_generation; break; } -- cgit v1.2.3 From 9f9c19ec1a59422c7687b11847ed3408128aa0d6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 28 Nov 2011 08:17:36 +0000 Subject: xfs: fix the logspace waiting algorithm Apply the scheme used in log_regrant_write_log_space to wake up any other threads waiting for log space before the newly added one to log_regrant_write_log_space as well, and factor the code into readable helpers. For each of the queues we have add two helpers: - one to try to wake up all waiting threads. This helper will also be usable by xfs_log_move_tail once we remove the current opportunistic wakeups in it. - one to sleep on t_wait until enough log space is available, loosely modelled after Linux waitqueues. And use them to reimplement the guts of log_regrant_write_log_space and log_regrant_write_log_space. These two function now use one and the same algorithm for waiting on log space instead of subtly different ones before, with an option to completely unify them in the near future. Also move the filesystem shutdown handling to the common caller given that we had to touch it anyway. Based on hard debugging and an earlier patch from Chandra Seetharaman . Signed-off-by: Christoph Hellwig Reviewed-by: Chandra Seetharaman Tested-by: Chandra Seetharaman Signed-off-by: Ben Myers --- fs/xfs/xfs_log.c | 348 ++++++++++++++++++++++++++--------------------------- fs/xfs/xfs_trace.h | 12 +- 2 files changed, 177 insertions(+), 183 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index a14cd89fe46..34817adf4b9 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -150,6 +150,117 @@ xlog_grant_add_space( } while (head_val != old); } +STATIC bool +xlog_reserveq_wake( + struct log *log, + int *free_bytes) +{ + struct xlog_ticket *tic; + int need_bytes; + + list_for_each_entry(tic, &log->l_reserveq, t_queue) { + if (tic->t_flags & XLOG_TIC_PERM_RESERV) + need_bytes = tic->t_unit_res * tic->t_cnt; + else + need_bytes = tic->t_unit_res; + + if (*free_bytes < need_bytes) + return false; + *free_bytes -= need_bytes; + + trace_xfs_log_grant_wake_up(log, tic); + wake_up(&tic->t_wait); + } + + return true; +} + +STATIC bool +xlog_writeq_wake( + struct log *log, + int *free_bytes) +{ + struct xlog_ticket *tic; + int need_bytes; + + list_for_each_entry(tic, &log->l_writeq, t_queue) { + ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV); + + need_bytes = tic->t_unit_res; + + if (*free_bytes < need_bytes) + return false; + *free_bytes -= need_bytes; + + trace_xfs_log_regrant_write_wake_up(log, tic); + wake_up(&tic->t_wait); + } + + return true; +} + +STATIC int +xlog_reserveq_wait( + struct log *log, + struct xlog_ticket *tic, + int need_bytes) +{ + list_add_tail(&tic->t_queue, &log->l_reserveq); + + do { + if (XLOG_FORCED_SHUTDOWN(log)) + goto shutdown; + xlog_grant_push_ail(log, need_bytes); + + XFS_STATS_INC(xs_sleep_logspace); + trace_xfs_log_grant_sleep(log, tic); + + xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock); + trace_xfs_log_grant_wake(log, tic); + + spin_lock(&log->l_grant_reserve_lock); + if (XLOG_FORCED_SHUTDOWN(log)) + goto shutdown; + } while (xlog_space_left(log, &log->l_grant_reserve_head) < need_bytes); + + list_del_init(&tic->t_queue); + return 0; +shutdown: + list_del_init(&tic->t_queue); + return XFS_ERROR(EIO); +} + +STATIC int +xlog_writeq_wait( + struct log *log, + struct xlog_ticket *tic, + int need_bytes) +{ + list_add_tail(&tic->t_queue, &log->l_writeq); + + do { + if (XLOG_FORCED_SHUTDOWN(log)) + goto shutdown; + xlog_grant_push_ail(log, need_bytes); + + XFS_STATS_INC(xs_sleep_logspace); + trace_xfs_log_regrant_write_sleep(log, tic); + + xlog_wait(&tic->t_wait, &log->l_grant_write_lock); + trace_xfs_log_regrant_write_wake(log, tic); + + spin_lock(&log->l_grant_write_lock); + if (XLOG_FORCED_SHUTDOWN(log)) + goto shutdown; + } while (xlog_space_left(log, &log->l_grant_write_head) < need_bytes); + + list_del_init(&tic->t_queue); + return 0; +shutdown: + list_del_init(&tic->t_queue); + return XFS_ERROR(EIO); +} + static void xlog_tic_reset_res(xlog_ticket_t *tic) { @@ -350,8 +461,19 @@ xfs_log_reserve( retval = xlog_grant_log_space(log, internal_ticket); } + if (unlikely(retval)) { + /* + * If we are failing, make sure the ticket doesn't have any + * current reservations. We don't want to add this back + * when the ticket/ transaction gets cancelled. + */ + internal_ticket->t_curr_res = 0; + /* ungrant will give back unit_res * t_cnt. */ + internal_ticket->t_cnt = 0; + } + return retval; -} /* xfs_log_reserve */ +} /* @@ -2481,8 +2603,8 @@ restart: /* * Atomically get the log space required for a log ticket. * - * Once a ticket gets put onto the reserveq, it will only return after - * the needed reservation is satisfied. + * Once a ticket gets put onto the reserveq, it will only return after the + * needed reservation is satisfied. * * This function is structured so that it has a lock free fast path. This is * necessary because every new transaction reservation will come through this @@ -2490,113 +2612,53 @@ restart: * every pass. * * As tickets are only ever moved on and off the reserveq under the - * l_grant_reserve_lock, we only need to take that lock if we are going - * to add the ticket to the queue and sleep. We can avoid taking the lock if the - * ticket was never added to the reserveq because the t_queue list head will be - * empty and we hold the only reference to it so it can safely be checked - * unlocked. + * l_grant_reserve_lock, we only need to take that lock if we are going to add + * the ticket to the queue and sleep. We can avoid taking the lock if the ticket + * was never added to the reserveq because the t_queue list head will be empty + * and we hold the only reference to it so it can safely be checked unlocked. */ STATIC int -xlog_grant_log_space(xlog_t *log, - xlog_ticket_t *tic) +xlog_grant_log_space( + struct log *log, + struct xlog_ticket *tic) { - int free_bytes; - int need_bytes; + int free_bytes, need_bytes; + int error = 0; -#ifdef DEBUG - if (log->l_flags & XLOG_ACTIVE_RECOVERY) - panic("grant Recovery problem"); -#endif + ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY)); trace_xfs_log_grant_enter(log, tic); + /* + * If there are other waiters on the queue then give them a chance at + * logspace before us. Wake up the first waiters, if we do not wake + * up all the waiters then go to sleep waiting for more free space, + * otherwise try to get some space for this transaction. + */ need_bytes = tic->t_unit_res; if (tic->t_flags & XFS_LOG_PERM_RESERV) need_bytes *= tic->t_ocnt; - - /* something is already sleeping; insert new transaction at end */ - if (!list_empty_careful(&log->l_reserveq)) { - spin_lock(&log->l_grant_reserve_lock); - /* recheck the queue now we are locked */ - if (list_empty(&log->l_reserveq)) { - spin_unlock(&log->l_grant_reserve_lock); - goto redo; - } - list_add_tail(&tic->t_queue, &log->l_reserveq); - - trace_xfs_log_grant_sleep1(log, tic); - - /* - * Gotta check this before going to sleep, while we're - * holding the grant lock. - */ - if (XLOG_FORCED_SHUTDOWN(log)) - goto error_return; - - XFS_STATS_INC(xs_sleep_logspace); - xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock); - - /* - * If we got an error, and the filesystem is shutting down, - * we'll catch it down below. So just continue... - */ - trace_xfs_log_grant_wake1(log, tic); - } - -redo: - if (XLOG_FORCED_SHUTDOWN(log)) - goto error_return_unlocked; - free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); - if (free_bytes < need_bytes) { + if (!list_empty_careful(&log->l_reserveq)) { spin_lock(&log->l_grant_reserve_lock); - if (list_empty(&tic->t_queue)) - list_add_tail(&tic->t_queue, &log->l_reserveq); - - trace_xfs_log_grant_sleep2(log, tic); - - if (XLOG_FORCED_SHUTDOWN(log)) - goto error_return; - - xlog_grant_push_ail(log, need_bytes); - - XFS_STATS_INC(xs_sleep_logspace); - xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock); - - trace_xfs_log_grant_wake2(log, tic); - goto redo; - } - - if (!list_empty(&tic->t_queue)) { + if (!xlog_reserveq_wake(log, &free_bytes) || + free_bytes < need_bytes) + error = xlog_reserveq_wait(log, tic, need_bytes); + spin_unlock(&log->l_grant_reserve_lock); + } else if (free_bytes < need_bytes) { spin_lock(&log->l_grant_reserve_lock); - list_del_init(&tic->t_queue); + error = xlog_reserveq_wait(log, tic, need_bytes); spin_unlock(&log->l_grant_reserve_lock); } + if (error) + return error; - /* we've got enough space */ xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes); xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes); trace_xfs_log_grant_exit(log, tic); xlog_verify_grant_tail(log); return 0; - -error_return_unlocked: - spin_lock(&log->l_grant_reserve_lock); -error_return: - list_del_init(&tic->t_queue); - spin_unlock(&log->l_grant_reserve_lock); - trace_xfs_log_grant_error(log, tic); - - /* - * If we are failing, make sure the ticket doesn't have any - * current reservations. We don't want to add this back when - * the ticket/transaction gets cancelled. - */ - tic->t_curr_res = 0; - tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ - return XFS_ERROR(EIO); -} /* xlog_grant_log_space */ - +} /* * Replenish the byte reservation required by moving the grant write head. @@ -2605,10 +2667,12 @@ error_return: * free fast path. */ STATIC int -xlog_regrant_write_log_space(xlog_t *log, - xlog_ticket_t *tic) +xlog_regrant_write_log_space( + struct log *log, + struct xlog_ticket *tic) { - int free_bytes, need_bytes; + int free_bytes, need_bytes; + int error = 0; tic->t_curr_res = tic->t_unit_res; xlog_tic_reset_res(tic); @@ -2616,104 +2680,38 @@ xlog_regrant_write_log_space(xlog_t *log, if (tic->t_cnt > 0) return 0; -#ifdef DEBUG - if (log->l_flags & XLOG_ACTIVE_RECOVERY) - panic("regrant Recovery problem"); -#endif + ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY)); trace_xfs_log_regrant_write_enter(log, tic); - if (XLOG_FORCED_SHUTDOWN(log)) - goto error_return_unlocked; - /* If there are other waiters on the queue then give them a - * chance at logspace before us. Wake up the first waiters, - * if we do not wake up all the waiters then go to sleep waiting - * for more free space, otherwise try to get some space for - * this transaction. + /* + * If there are other waiters on the queue then give them a chance at + * logspace before us. Wake up the first waiters, if we do not wake + * up all the waiters then go to sleep waiting for more free space, + * otherwise try to get some space for this transaction. */ need_bytes = tic->t_unit_res; - if (!list_empty_careful(&log->l_writeq)) { - struct xlog_ticket *ntic; - - spin_lock(&log->l_grant_write_lock); - free_bytes = xlog_space_left(log, &log->l_grant_write_head); - list_for_each_entry(ntic, &log->l_writeq, t_queue) { - ASSERT(ntic->t_flags & XLOG_TIC_PERM_RESERV); - - if (free_bytes < ntic->t_unit_res) - break; - free_bytes -= ntic->t_unit_res; - wake_up(&ntic->t_wait); - } - - if (ntic != list_first_entry(&log->l_writeq, - struct xlog_ticket, t_queue)) { - if (list_empty(&tic->t_queue)) - list_add_tail(&tic->t_queue, &log->l_writeq); - trace_xfs_log_regrant_write_sleep1(log, tic); - - xlog_grant_push_ail(log, need_bytes); - - XFS_STATS_INC(xs_sleep_logspace); - xlog_wait(&tic->t_wait, &log->l_grant_write_lock); - trace_xfs_log_regrant_write_wake1(log, tic); - } else - spin_unlock(&log->l_grant_write_lock); - } - -redo: - if (XLOG_FORCED_SHUTDOWN(log)) - goto error_return_unlocked; - free_bytes = xlog_space_left(log, &log->l_grant_write_head); - if (free_bytes < need_bytes) { + if (!list_empty_careful(&log->l_writeq)) { spin_lock(&log->l_grant_write_lock); - if (list_empty(&tic->t_queue)) - list_add_tail(&tic->t_queue, &log->l_writeq); - - if (XLOG_FORCED_SHUTDOWN(log)) - goto error_return; - - xlog_grant_push_ail(log, need_bytes); - - XFS_STATS_INC(xs_sleep_logspace); - trace_xfs_log_regrant_write_sleep2(log, tic); - xlog_wait(&tic->t_wait, &log->l_grant_write_lock); - - trace_xfs_log_regrant_write_wake2(log, tic); - goto redo; - } - - if (!list_empty(&tic->t_queue)) { + if (!xlog_writeq_wake(log, &free_bytes) || + free_bytes < need_bytes) + error = xlog_writeq_wait(log, tic, need_bytes); + spin_unlock(&log->l_grant_write_lock); + } else if (free_bytes < need_bytes) { spin_lock(&log->l_grant_write_lock); - list_del_init(&tic->t_queue); + error = xlog_writeq_wait(log, tic, need_bytes); spin_unlock(&log->l_grant_write_lock); } - /* we've got enough space */ + if (error) + return error; + xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes); trace_xfs_log_regrant_write_exit(log, tic); xlog_verify_grant_tail(log); return 0; - - - error_return_unlocked: - spin_lock(&log->l_grant_write_lock); - error_return: - list_del_init(&tic->t_queue); - spin_unlock(&log->l_grant_write_lock); - trace_xfs_log_regrant_write_error(log, tic); - - /* - * If we are failing, make sure the ticket doesn't have any - * current reservations. We don't want to add this back when - * the ticket/transaction gets cancelled. - */ - tic->t_curr_res = 0; - tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ - return XFS_ERROR(EIO); -} /* xlog_regrant_write_log_space */ - +} /* The first cnt-1 times through here we don't need to * move the grant write head because the permanent diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index f1d2802b2f0..49403579887 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -834,18 +834,14 @@ DEFINE_LOGGRANT_EVENT(xfs_log_umount_write); DEFINE_LOGGRANT_EVENT(xfs_log_grant_enter); DEFINE_LOGGRANT_EVENT(xfs_log_grant_exit); DEFINE_LOGGRANT_EVENT(xfs_log_grant_error); -DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1); -DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1); -DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2); -DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2); +DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep); +DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake); DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up); DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter); DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit); DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error); -DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1); -DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1); -DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2); -DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake); DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up); DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter); DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit); -- cgit v1.2.3