From b6d2d39a05f2c6acb741b8edcab160be85b0b6bb Mon Sep 17 00:00:00 2001 From: Steve Cornelius Date: Mon, 15 Jun 2015 16:52:59 -0700 Subject: crypto: caam - fix RNG buffer cache alignment commit 412c98c1bef65fe7589f1300e93735d96130307c upstream. The hwrng output buffers (2) are cast inside of a a struct (caam_rng_ctx) allocated in one DMA-tagged region. While the kernel's heap allocator should place the overall struct on a cacheline aligned boundary, the 2 buffers contained within may not necessarily align. Consenquently, the ends of unaligned buffers may not fully flush, and if so, stale data will be left behind, resulting in small repeating patterns. This fix aligns the buffers inside the struct. Note that not all of the data inside caam_rng_ctx necessarily needs to be DMA-tagged, only the buffers themselves require this. However, a fix would incur the expense of error-handling bloat in the case of allocation failure. Signed-off-by: Steve Cornelius Signed-off-by: Victoria Milhoan Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- drivers/crypto/caam/caamrng.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c index d1939a9539c0..04aefffb4dd9 100644 --- a/drivers/crypto/caam/caamrng.c +++ b/drivers/crypto/caam/caamrng.c @@ -56,7 +56,7 @@ /* Buffer, its dma address and lock */ struct buf_data { - u8 buf[RN_BUF_SIZE]; + u8 buf[RN_BUF_SIZE] ____cacheline_aligned; dma_addr_t addr; struct completion filled; u32 hw_desc[DESC_JOB_O_LEN]; -- cgit v1.2.3 From 63dec3118397e4a94b92d2c395b4310ecb449581 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 15 Jun 2015 17:50:25 -0400 Subject: tracing: Have filter check for balanced ops commit 2cf30dc180cea808077f003c5116388183e54f9e upstream. When the following filter is used it causes a warning to trigger: # cd /sys/kernel/debug/tracing # echo "((dev==1)blocks==2)" > events/ext4/ext4_truncate_exit/filter -bash: echo: write error: Invalid argument # cat events/ext4/ext4_truncate_exit/filter ((dev==1)blocks==2) ^ parse_error: No error ------------[ cut here ]------------ WARNING: CPU: 2 PID: 1223 at kernel/trace/trace_events_filter.c:1640 replace_preds+0x3c5/0x990() Modules linked in: bnep lockd grace bluetooth ... CPU: 3 PID: 1223 Comm: bash Tainted: G W 4.1.0-rc3-test+ #450 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v02.05 05/07/2012 0000000000000668 ffff8800c106bc98 ffffffff816ed4f9 ffff88011ead0cf0 0000000000000000 ffff8800c106bcd8 ffffffff8107fb07 ffffffff8136b46c ffff8800c7d81d48 ffff8800d4c2bc00 ffff8800d4d4f920 00000000ffffffea Call Trace: [] dump_stack+0x4c/0x6e [] warn_slowpath_common+0x97/0xe0 [] ? _kstrtoull+0x2c/0x80 [] warn_slowpath_null+0x1a/0x20 [] replace_preds+0x3c5/0x990 [] create_filter+0x82/0xb0 [] apply_event_filter+0xd4/0x180 [] event_filter_write+0x8f/0x120 [] __vfs_write+0x28/0xe0 [] ? __sb_start_write+0x53/0xf0 [] ? security_file_permission+0x30/0xc0 [] vfs_write+0xb8/0x1b0 [] SyS_write+0x4f/0xb0 [] system_call_fastpath+0x12/0x6a ---[ end trace e11028bd95818dcd ]--- Worse yet, reading the error message (the filter again) it says that there was no error, when there clearly was. The issue is that the code that checks the input does not check for balanced ops. That is, having an op between a closed parenthesis and the next token. This would only cause a warning, and fail out before doing any real harm, but it should still not caues a warning, and the error reported should work: # cd /sys/kernel/debug/tracing # echo "((dev==1)blocks==2)" > events/ext4/ext4_truncate_exit/filter -bash: echo: write error: Invalid argument # cat events/ext4/ext4_truncate_exit/filter ((dev==1)blocks==2) ^ parse_error: Meaningless filter expression And give no kernel warning. Link: http://lkml.kernel.org/r/20150615175025.7e809215@gandalf.local.home Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Reported-by: Vince Weaver Tested-by: Vince Weaver Signed-off-by: Steven Rostedt [ luis: backported to 3.16: - unconditionally decrement cnt as the OP_NOT logic was introduced only by e12c09cf3087 ("tracing: Add NOT to filtering logic") ] Signed-off-by: Luis Henriques Signed-off-by: Greg Kroah-Hartman --- kernel/trace/trace_events_filter.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 0a1edc694d67..fe3e086d38e9 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1328,19 +1328,24 @@ static int check_preds(struct filter_parse_state *ps) { int n_normal_preds = 0, n_logical_preds = 0; struct postfix_elt *elt; + int cnt = 0; list_for_each_entry(elt, &ps->postfix, list) { - if (elt->op == OP_NONE) + if (elt->op == OP_NONE) { + cnt++; continue; + } + cnt--; if (elt->op == OP_AND || elt->op == OP_OR) { n_logical_preds++; continue; } n_normal_preds++; + WARN_ON_ONCE(cnt < 0); } - if (!n_normal_preds || n_logical_preds >= n_normal_preds) { + if (cnt != 1 || !n_normal_preds || n_logical_preds >= n_normal_preds) { parse_error(ps, FILT_ERR_INVALID_FILTER, 0); return -EINVAL; } -- cgit v1.2.3 From 97d905e8568ff57c36c3e89b23b1c0f9f5146f96 Mon Sep 17 00:00:00 2001 From: Adam Jackson Date: Mon, 15 Jun 2015 16:16:15 -0400 Subject: drm/mgag200: Reject non-character-cell-aligned mode widths commit 25161084b1c1b0c29948f6f77266a35f302196b7 upstream. Turns out 1366x768 does not in fact work on this hardware. Signed-off-by: Adam Jackson Signed-off-by: Dave Airlie Signed-off-by: Greg Kroah-Hartman --- drivers/gpu/drm/mgag200/mgag200_mode.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c index f6341e8622ee..7bd2acce9f81 100644 --- a/drivers/gpu/drm/mgag200/mgag200_mode.c +++ b/drivers/gpu/drm/mgag200/mgag200_mode.c @@ -1487,6 +1487,11 @@ static int mga_vga_mode_valid(struct drm_connector *connector, return MODE_BANDWIDTH; } + if ((mode->hdisplay % 8) != 0 || (mode->hsync_start % 8) != 0 || + (mode->hsync_end % 8) != 0 || (mode->htotal % 8) != 0) { + return MODE_H_ILLEGAL; + } + if (mode->crtc_hdisplay > 2048 || mode->crtc_hsync_start > 4096 || mode->crtc_hsync_end > 4096 || mode->crtc_htotal > 4096 || mode->crtc_vdisplay > 2048 || mode->crtc_vsync_start > 4096 || -- cgit v1.2.3 From 14f81062f365fa9e3839bb2a16862217b71a553c Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 16 Jun 2015 22:11:06 +0100 Subject: pipe: iovec: Fix memory corruption when retrying atomic copy as non-atomic pipe_iov_copy_{from,to}_user() may be tried twice with the same iovec, the first time atomically and the second time not. The second attempt needs to continue from the iovec position, pipe buffer offset and remaining length where the first attempt failed, but currently the pipe buffer offset and remaining length are reset. This will corrupt the piped data (possibly also leading to an information leak between processes) and may also corrupt kernel memory. This was fixed upstream by commits f0d1bec9d58d ("new helper: copy_page_from_iter()") and 637b58c2887e ("switch pipe_read() to copy_page_to_iter()"), but those aren't suitable for stable. This fix for older kernel versions was made by Seth Jennings for RHEL and I have extracted it from their update. CVE-2015-1805 References: https://bugzilla.redhat.com/show_bug.cgi?id=1202855 Signed-off-by: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- fs/pipe.c | 55 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/fs/pipe.c b/fs/pipe.c index 0e0752ef2715..3e7ab278bb0c 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -117,25 +117,27 @@ void pipe_wait(struct pipe_inode_info *pipe) } static int -pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len, - int atomic) +pipe_iov_copy_from_user(void *addr, int *offset, struct iovec *iov, + size_t *remaining, int atomic) { unsigned long copy; - while (len > 0) { + while (*remaining > 0) { while (!iov->iov_len) iov++; - copy = min_t(unsigned long, len, iov->iov_len); + copy = min_t(unsigned long, *remaining, iov->iov_len); if (atomic) { - if (__copy_from_user_inatomic(to, iov->iov_base, copy)) + if (__copy_from_user_inatomic(addr + *offset, + iov->iov_base, copy)) return -EFAULT; } else { - if (copy_from_user(to, iov->iov_base, copy)) + if (copy_from_user(addr + *offset, + iov->iov_base, copy)) return -EFAULT; } - to += copy; - len -= copy; + *offset += copy; + *remaining -= copy; iov->iov_base += copy; iov->iov_len -= copy; } @@ -143,25 +145,27 @@ pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len, } static int -pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len, - int atomic) +pipe_iov_copy_to_user(struct iovec *iov, void *addr, int *offset, + size_t *remaining, int atomic) { unsigned long copy; - while (len > 0) { + while (*remaining > 0) { while (!iov->iov_len) iov++; - copy = min_t(unsigned long, len, iov->iov_len); + copy = min_t(unsigned long, *remaining, iov->iov_len); if (atomic) { - if (__copy_to_user_inatomic(iov->iov_base, from, copy)) + if (__copy_to_user_inatomic(iov->iov_base, + addr + *offset, copy)) return -EFAULT; } else { - if (copy_to_user(iov->iov_base, from, copy)) + if (copy_to_user(iov->iov_base, + addr + *offset, copy)) return -EFAULT; } - from += copy; - len -= copy; + *offset += copy; + *remaining -= copy; iov->iov_base += copy; iov->iov_len -= copy; } @@ -395,7 +399,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov, struct pipe_buffer *buf = pipe->bufs + curbuf; const struct pipe_buf_operations *ops = buf->ops; void *addr; - size_t chars = buf->len; + size_t chars = buf->len, remaining; int error, atomic; if (chars > total_len) @@ -409,9 +413,11 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov, } atomic = !iov_fault_in_pages_write(iov, chars); + remaining = chars; redo: addr = ops->map(pipe, buf, atomic); - error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars, atomic); + error = pipe_iov_copy_to_user(iov, addr, &buf->offset, + &remaining, atomic); ops->unmap(pipe, buf, addr); if (unlikely(error)) { /* @@ -426,7 +432,6 @@ redo: break; } ret += chars; - buf->offset += chars; buf->len -= chars; /* Was it a packet buffer? Clean up and exit */ @@ -531,6 +536,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov, if (ops->can_merge && offset + chars <= PAGE_SIZE) { int error, atomic = 1; void *addr; + size_t remaining = chars; error = ops->confirm(pipe, buf); if (error) @@ -539,8 +545,8 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov, iov_fault_in_pages_read(iov, chars); redo1: addr = ops->map(pipe, buf, atomic); - error = pipe_iov_copy_from_user(offset + addr, iov, - chars, atomic); + error = pipe_iov_copy_from_user(addr, &offset, iov, + &remaining, atomic); ops->unmap(pipe, buf, addr); ret = error; do_wakeup = 1; @@ -575,6 +581,8 @@ redo1: struct page *page = pipe->tmp_page; char *src; int error, atomic = 1; + int offset = 0; + size_t remaining; if (!page) { page = alloc_page(GFP_HIGHUSER); @@ -595,14 +603,15 @@ redo1: chars = total_len; iov_fault_in_pages_read(iov, chars); + remaining = chars; redo2: if (atomic) src = kmap_atomic(page); else src = kmap(page); - error = pipe_iov_copy_from_user(src, iov, chars, - atomic); + error = pipe_iov_copy_from_user(src, &offset, iov, + &remaining, atomic); if (atomic) kunmap_atomic(src); else -- cgit v1.2.3 From 17c06c69128f310cac51858455b1483a30c875ad Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 7 May 2014 17:16:46 -0400 Subject: lpfc: Add iotag memory barrier commit 27f344eb15dd0da80ebec80c7245e8c85043f841 upstream. Add a memory barrier to ensure the valid bit is read before any of the cqe payload is read. This fixes an issue seen on Power where the cqe payload was getting loaded before the valid bit. When this occurred, we saw an iotag out of range error when a command completed, but since the iotag looked invalid the command didn't get completed to scsi core. Later we hit the command timeout, attempted to abort the command, then waited for the aborted command to get returned. Since the adapter already returned the command, we timeout waiting, and end up escalating EEH all the way to host reset. This patch fixes this issue. Signed-off-by: Brian King Signed-off-by: James Smart Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/lpfc/lpfc_sli.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 572579f87de4..90861416b9e9 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -263,6 +263,16 @@ lpfc_sli4_eq_get(struct lpfc_queue *q) return NULL; q->hba_index = idx; + + /* + * insert barrier for instruction interlock : data from the hardware + * must have the valid bit checked before it can be copied and acted + * upon. Given what was seen in lpfc_sli4_cq_get() of speculative + * instructions allowing action on content before valid bit checked, + * add barrier here as well. May not be needed as "content" is a + * single 32-bit entity here (vs multi word structure for cq's). + */ + mb(); return eqe; } @@ -368,6 +378,17 @@ lpfc_sli4_cq_get(struct lpfc_queue *q) cqe = q->qe[q->hba_index].cqe; q->hba_index = idx; + + /* + * insert barrier for instruction interlock : data from the hardware + * must have the valid bit checked before it can be copied and acted + * upon. Speculative instructions were allowing a bcopy at the start + * of lpfc_sli4_fp_handle_wcqe(), which is called immediately + * after our return, to copy data before the valid bit check above + * was done. As such, some of the copied data was stale. The barrier + * ensures the check is before any data is copied. + */ + mb(); return cqe; } -- cgit v1.2.3 From b3d78448444995411ba87af2237391bf68fc358d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 29 Jun 2015 12:08:45 -0700 Subject: Linux 3.10.82 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 6d19e37d36d5..5e3e665a10b7 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 81 +SUBLEVEL = 82 EXTRAVERSION = NAME = TOSSUG Baby Fish -- cgit v1.2.3 From 2a7abb5a0b35ca8f7c9f8113322fbff87b5d6667 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 8 Jul 2013 14:24:16 -0700 Subject: fput: turn "list_head delayed_fput_list" into llist_head commit 4f5e65a1cc90bbb15b9f6cdc362922af1bcc155a upstream. fput() and delayed_fput() can use llist and avoid the locking. This is unlikely path, it is not that this change can improve the performance, but this way the code looks simpler. Signed-off-by: Oleg Nesterov Suggested-by: Andrew Morton Cc: Al Viro Cc: Andrey Vagin Cc: "Eric W. Biederman" Cc: David Howells Cc: Huang Ying Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Al Viro Signed-off-by: Wang Kai Signed-off-by: Greg Kroah-Hartman --- fs/file_table.c | 25 ++++++++++--------------- include/linux/fs.h | 2 ++ 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/fs/file_table.c b/fs/file_table.c index 54a34be444f9..cd4ef34bb641 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -265,18 +265,15 @@ static void __fput(struct file *file) mntput(mnt); } -static DEFINE_SPINLOCK(delayed_fput_lock); -static LIST_HEAD(delayed_fput_list); +static LLIST_HEAD(delayed_fput_list); static void delayed_fput(struct work_struct *unused) { - LIST_HEAD(head); - spin_lock_irq(&delayed_fput_lock); - list_splice_init(&delayed_fput_list, &head); - spin_unlock_irq(&delayed_fput_lock); - while (!list_empty(&head)) { - struct file *f = list_first_entry(&head, struct file, f_u.fu_list); - list_del_init(&f->f_u.fu_list); - __fput(f); + struct llist_node *node = llist_del_all(&delayed_fput_list); + struct llist_node *next; + + for (; node; node = next) { + next = llist_next(node); + __fput(llist_entry(node, struct file, f_u.fu_llist)); } } @@ -306,7 +303,6 @@ void fput(struct file *file) { if (atomic_long_dec_and_test(&file->f_count)) { struct task_struct *task = current; - unsigned long flags; file_sb_list_del(file); if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) { @@ -314,10 +310,9 @@ void fput(struct file *file) if (!task_work_add(task, &file->f_u.fu_rcuhead, true)) return; } - spin_lock_irqsave(&delayed_fput_lock, flags); - list_add(&file->f_u.fu_list, &delayed_fput_list); - schedule_work(&delayed_fput_work); - spin_unlock_irqrestore(&delayed_fput_lock, flags); + + if (llist_add(&file->f_u.fu_llist, &delayed_fput_list)) + schedule_work(&delayed_fput_work); } } diff --git a/include/linux/fs.h b/include/linux/fs.h index d57bc5df7225..f45857a7ac49 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -767,6 +768,7 @@ struct file { */ union { struct list_head fu_list; + struct llist_node fu_llist; struct rcu_head fu_rcuhead; } f_u; struct path f_path; -- cgit v1.2.3 From 68c8a7ae3030f73631a98d4d9ee16e31cba0c1b5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 4 Oct 2013 11:06:42 -0400 Subject: get rid of s_files and files_lock commit eee5cc2702929fd41cce28058dc6d6717f723f87 upstream. The only thing we need it for is alt-sysrq-r (emergency remount r/o) and these days we can do just as well without going through the list of files. Signed-off-by: Al Viro [wangkai: backport to 3.10: adjust context] Signed-off-by: Wang Kai Signed-off-by: Greg Kroah-Hartman --- fs/file_table.c | 123 ----------------------------------------------------- fs/internal.h | 3 -- fs/open.c | 2 - fs/super.c | 23 +--------- include/linux/fs.h | 13 ------ 5 files changed, 2 insertions(+), 162 deletions(-) diff --git a/fs/file_table.c b/fs/file_table.c index cd4ef34bb641..28f02a7cbba1 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -36,8 +36,6 @@ struct files_stat_struct files_stat = { .max_files = NR_FILE }; -DEFINE_STATIC_LGLOCK(files_lglock); - /* SLAB cache for file structures */ static struct kmem_cache *filp_cachep __read_mostly; @@ -134,7 +132,6 @@ struct file *get_empty_filp(void) return ERR_PTR(error); } - INIT_LIST_HEAD(&f->f_u.fu_list); atomic_long_set(&f->f_count, 1); rwlock_init(&f->f_owner.lock); spin_lock_init(&f->f_lock); @@ -304,7 +301,6 @@ void fput(struct file *file) if (atomic_long_dec_and_test(&file->f_count)) { struct task_struct *task = current; - file_sb_list_del(file); if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) { init_task_work(&file->f_u.fu_rcuhead, ____fput); if (!task_work_add(task, &file->f_u.fu_rcuhead, true)) @@ -328,7 +324,6 @@ void __fput_sync(struct file *file) { if (atomic_long_dec_and_test(&file->f_count)) { struct task_struct *task = current; - file_sb_list_del(file); BUG_ON(!(task->flags & PF_KTHREAD)); __fput(file); } @@ -340,127 +335,10 @@ void put_filp(struct file *file) { if (atomic_long_dec_and_test(&file->f_count)) { security_file_free(file); - file_sb_list_del(file); file_free(file); } } -static inline int file_list_cpu(struct file *file) -{ -#ifdef CONFIG_SMP - return file->f_sb_list_cpu; -#else - return smp_processor_id(); -#endif -} - -/* helper for file_sb_list_add to reduce ifdefs */ -static inline void __file_sb_list_add(struct file *file, struct super_block *sb) -{ - struct list_head *list; -#ifdef CONFIG_SMP - int cpu; - cpu = smp_processor_id(); - file->f_sb_list_cpu = cpu; - list = per_cpu_ptr(sb->s_files, cpu); -#else - list = &sb->s_files; -#endif - list_add(&file->f_u.fu_list, list); -} - -/** - * file_sb_list_add - add a file to the sb's file list - * @file: file to add - * @sb: sb to add it to - * - * Use this function to associate a file with the superblock of the inode it - * refers to. - */ -void file_sb_list_add(struct file *file, struct super_block *sb) -{ - lg_local_lock(&files_lglock); - __file_sb_list_add(file, sb); - lg_local_unlock(&files_lglock); -} - -/** - * file_sb_list_del - remove a file from the sb's file list - * @file: file to remove - * @sb: sb to remove it from - * - * Use this function to remove a file from its superblock. - */ -void file_sb_list_del(struct file *file) -{ - if (!list_empty(&file->f_u.fu_list)) { - lg_local_lock_cpu(&files_lglock, file_list_cpu(file)); - list_del_init(&file->f_u.fu_list); - lg_local_unlock_cpu(&files_lglock, file_list_cpu(file)); - } -} - -#ifdef CONFIG_SMP - -/* - * These macros iterate all files on all CPUs for a given superblock. - * files_lglock must be held globally. - */ -#define do_file_list_for_each_entry(__sb, __file) \ -{ \ - int i; \ - for_each_possible_cpu(i) { \ - struct list_head *list; \ - list = per_cpu_ptr((__sb)->s_files, i); \ - list_for_each_entry((__file), list, f_u.fu_list) - -#define while_file_list_for_each_entry \ - } \ -} - -#else - -#define do_file_list_for_each_entry(__sb, __file) \ -{ \ - struct list_head *list; \ - list = &(sb)->s_files; \ - list_for_each_entry((__file), list, f_u.fu_list) - -#define while_file_list_for_each_entry \ -} - -#endif - -/** - * mark_files_ro - mark all files read-only - * @sb: superblock in question - * - * All files are marked read-only. We don't care about pending - * delete files so this should be used in 'force' mode only. - */ -void mark_files_ro(struct super_block *sb) -{ - struct file *f; - - lg_global_lock(&files_lglock); - do_file_list_for_each_entry(sb, f) { - if (!S_ISREG(file_inode(f)->i_mode)) - continue; - if (!file_count(f)) - continue; - if (!(f->f_mode & FMODE_WRITE)) - continue; - spin_lock(&f->f_lock); - f->f_mode &= ~FMODE_WRITE; - spin_unlock(&f->f_lock); - if (file_check_writeable(f) != 0) - continue; - __mnt_drop_write(f->f_path.mnt); - file_release_write(f); - } while_file_list_for_each_entry; - lg_global_unlock(&files_lglock); -} - void __init files_init(unsigned long mempages) { unsigned long n; @@ -476,6 +354,5 @@ void __init files_init(unsigned long mempages) n = (mempages * (PAGE_SIZE / 1024)) / 10; files_stat.max_files = max_t(unsigned long, n, NR_FILE); files_defer_init(); - lg_lock_init(&files_lglock, "files_lglock"); percpu_counter_init(&nr_files, 0); } diff --git a/fs/internal.h b/fs/internal.h index 68121584ae37..2ffa65a36ca0 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -74,9 +74,6 @@ extern void chroot_fs_refs(const struct path *, const struct path *); /* * file_table.c */ -extern void file_sb_list_add(struct file *f, struct super_block *sb); -extern void file_sb_list_del(struct file *f); -extern void mark_files_ro(struct super_block *); extern struct file *get_empty_filp(void); /* diff --git a/fs/open.c b/fs/open.c index 86092bde31f4..5f129683b7d7 100644 --- a/fs/open.c +++ b/fs/open.c @@ -674,7 +674,6 @@ static int do_dentry_open(struct file *f, } f->f_mapping = inode->i_mapping; - file_sb_list_add(f, inode->i_sb); if (unlikely(f->f_mode & FMODE_PATH)) { f->f_op = &empty_fops; @@ -709,7 +708,6 @@ static int do_dentry_open(struct file *f, cleanup_all: fops_put(f->f_op); - file_sb_list_del(f); if (f->f_mode & FMODE_WRITE) { if (!special_file(inode->i_mode)) { /* diff --git a/fs/super.c b/fs/super.c index e028b508db25..97280e76179c 100644 --- a/fs/super.c +++ b/fs/super.c @@ -163,19 +163,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) s = NULL; goto out; } -#ifdef CONFIG_SMP - s->s_files = alloc_percpu(struct list_head); - if (!s->s_files) - goto err_out; - else { - int i; - - for_each_possible_cpu(i) - INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i)); - } -#else - INIT_LIST_HEAD(&s->s_files); -#endif if (init_sb_writers(s, type)) goto err_out; s->s_flags = flags; @@ -225,10 +212,6 @@ out: return s; err_out: security_sb_free(s); -#ifdef CONFIG_SMP - if (s->s_files) - free_percpu(s->s_files); -#endif destroy_sb_writers(s); kfree(s); s = NULL; @@ -243,9 +226,6 @@ err_out: */ static inline void destroy_super(struct super_block *s) { -#ifdef CONFIG_SMP - free_percpu(s->s_files); -#endif destroy_sb_writers(s); security_sb_free(s); WARN_ON(!list_empty(&s->s_mounts)); @@ -727,7 +707,8 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) make sure there are no rw files opened */ if (remount_ro) { if (force) { - mark_files_ro(sb); + sb->s_readonly_remount = 1; + smp_wmb(); } else { retval = sb_prepare_remount_readonly(sb); if (retval) diff --git a/include/linux/fs.h b/include/linux/fs.h index f45857a7ac49..5c9dc8471da5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -762,12 +762,7 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) #define FILE_MNT_WRITE_RELEASED 2 struct file { - /* - * fu_list becomes invalid after file_free is called and queued via - * fu_rcuhead for RCU freeing - */ union { - struct list_head fu_list; struct llist_node fu_llist; struct rcu_head fu_rcuhead; } f_u; @@ -781,9 +776,6 @@ struct file { * Must not be taken from IRQ context. */ spinlock_t f_lock; -#ifdef CONFIG_SMP - int f_sb_list_cpu; -#endif atomic_long_t f_count; unsigned int f_flags; fmode_t f_mode; @@ -1259,11 +1251,6 @@ struct super_block { struct list_head s_inodes; /* all inodes */ struct hlist_bl_head s_anon; /* anonymous dentries for (nfs) exporting */ -#ifdef CONFIG_SMP - struct list_head __percpu *s_files; -#else - struct list_head s_files; -#endif struct list_head s_mounts; /* list of mounts; _not_ for fs use */ /* s_dentry_lru, s_nr_dentry_unused protected by dcache.c lru locks */ struct list_head s_dentry_lru; /* unused dentry lru */ -- cgit v1.2.3 From ff19fc34c96fa1e6eef703eccaaf14f47cfd08ce Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 17 Apr 2015 15:04:48 -0400 Subject: config: Enable NEED_DMA_MAP_STATE by default when SWIOTLB is selected commit a6dfa128ce5c414ab46b1d690f7a1b8decb8526d upstream. A huge amount of NIC drivers use the DMA API, however if compiled under 32-bit an very important part of the DMA API can be ommitted leading to the drivers not working at all (especially if used with 'swiotlb=force iommu=soft'). As Prashant Sreedharan explains it: "the driver [tg3] uses DEFINE_DMA_UNMAP_ADDR(), dma_unmap_addr_set() to keep a copy of the dma "mapping" and dma_unmap_addr() to get the "mapping" value. On most of the platforms this is a no-op, but ... with "iommu=soft and swiotlb=force" this house keeping is required, ... otherwise we pass 0 while calling pci_unmap_/pci_dma_sync_ instead of the DMA address." As such enable this even when using 32-bit kernels. Reported-by: Ian Jackson Signed-off-by: Konrad Rzeszutek Wilk Acked-by: David S. Miller Acked-by: Prashant Sreedharan Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Michael Chan Cc: Thomas Gleixner Cc: boris.ostrovsky@oracle.com Cc: cascardo@linux.vnet.ibm.com Cc: david.vrabel@citrix.com Cc: sanjeevb@broadcom.com Cc: siva.kallam@broadcom.com Cc: vyasevich@gmail.com Cc: xen-devel@lists.xensource.com Link: http://lkml.kernel.org/r/20150417190448.GA9462@l.oracle.com Signed-off-by: Ingo Molnar Cc: Ben Hutchings Signed-off-by: Greg Kroah-Hartman --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4e5b80d883c8..105ae30a176b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -154,7 +154,7 @@ config SBUS config NEED_DMA_MAP_STATE def_bool y - depends on X86_64 || INTEL_IOMMU || DMA_API_DEBUG + depends on X86_64 || INTEL_IOMMU || DMA_API_DEBUG || SWIOTLB config NEED_SG_DMA_LENGTH def_bool y -- cgit v1.2.3 From 59bc21951c78625486741b33eafdacbe57ce688d Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Wed, 24 Dec 2014 23:04:54 +0800 Subject: netfilter: nfnetlink_cthelper: Remove 'const' and '&' to avoid warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b18c5d15e8714336365d9d51782d5b53afa0443c upstream. The related code can be simplified, and also can avoid related warnings (with allmodconfig under parisc): CC [M] net/netfilter/nfnetlink_cthelper.o net/netfilter/nfnetlink_cthelper.c: In function ‘nfnl_cthelper_from_nlattr’: net/netfilter/nfnetlink_cthelper.c:97:9: warning: passing argument 1 o ‘memcpy’ discards ‘const’ qualifier from pointer target type [-Wdiscarded-array-qualifiers] memcpy(&help->data, nla_data(attr), help->helper->data_len); ^ In file included from include/linux/string.h:17:0, from include/uapi/linux/uuid.h:25, from include/linux/uuid.h:23, from include/linux/mod_devicetable.h:12, from ./arch/parisc/include/asm/hardware.h:4, from ./arch/parisc/include/asm/processor.h:15, from ./arch/parisc/include/asm/spinlock.h:6, from ./arch/parisc/include/asm/atomic.h:21, from include/linux/atomic.h:4, from ./arch/parisc/include/asm/bitops.h:12, from include/linux/bitops.h:36, from include/linux/kernel.h:10, from include/linux/list.h:8, from include/linux/module.h:9, from net/netfilter/nfnetlink_cthelper.c:11: ./arch/parisc/include/asm/string.h:8:8: note: expected ‘void *’ but argument is of type ‘const char (*)[]’ void * memcpy(void * dest,const void *src,size_t count); ^ Signed-off-by: Chen Gang Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nfnetlink_cthelper.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index a191b6db657e..a9da433c8149 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -83,7 +83,7 @@ nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple, static int nfnl_cthelper_from_nlattr(struct nlattr *attr, struct nf_conn *ct) { - const struct nf_conn_help *help = nfct_help(ct); + struct nf_conn_help *help = nfct_help(ct); if (attr == NULL) return -EINVAL; @@ -91,7 +91,7 @@ nfnl_cthelper_from_nlattr(struct nlattr *attr, struct nf_conn *ct) if (help->helper->data_len == 0) return -EINVAL; - memcpy(&help->data, nla_data(attr), help->helper->data_len); + memcpy(help->data, nla_data(attr), help->helper->data_len); return 0; } -- cgit v1.2.3 From 7c6300bb4934b8f9f881b1645c087b2ddba922f9 Mon Sep 17 00:00:00 2001 From: Ian Wilson Date: Thu, 12 Mar 2015 09:37:58 +0000 Subject: netfilter: Zero the tuple in nfnl_cthelper_parse_tuple() commit 78146572b9cd20452da47951812f35b1ad4906be upstream. nfnl_cthelper_parse_tuple() is called from nfnl_cthelper_new(), nfnl_cthelper_get() and nfnl_cthelper_del(). In each case they pass a pointer to an nf_conntrack_tuple data structure local variable: struct nf_conntrack_tuple tuple; ... ret = nfnl_cthelper_parse_tuple(&tuple, tb[NFCTH_TUPLE]); The problem is that this local variable is not initialized, and nfnl_cthelper_parse_tuple() only initializes two fields: src.l3num and dst.protonum. This leaves all other fields with undefined values based on whatever is on the stack: tuple->src.l3num = ntohs(nla_get_be16(tb[NFCTH_TUPLE_L3PROTONUM])); tuple->dst.protonum = nla_get_u8(tb[NFCTH_TUPLE_L4PROTONUM]); The symptom observed was that when the rpc and tns helpers were added then traffic to port 1536 was being sent to user-space. Signed-off-by: Ian Wilson Signed-off-by: Pablo Neira Ayuso Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nfnetlink_cthelper.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index a9da433c8149..3b283edec027 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -74,6 +74,9 @@ nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple, if (!tb[NFCTH_TUPLE_L3PROTONUM] || !tb[NFCTH_TUPLE_L4PROTONUM]) return -EINVAL; + /* Not all fields are initialized so first zero the tuple */ + memset(tuple, 0, sizeof(struct nf_conntrack_tuple)); + tuple->src.l3num = ntohs(nla_get_be16(tb[NFCTH_TUPLE_L3PROTONUM])); tuple->dst.protonum = nla_get_u8(tb[NFCTH_TUPLE_L4PROTONUM]); -- cgit v1.2.3 From 434c32f798d372fbeff6258a1178dbeb41d960d4 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 11 Sep 2013 14:20:06 -0700 Subject: include/linux/sched.h: don't use task->pid/tgid in same_thread_group/has_group_leader_pid commit e1403b8edf669ff49bbdf602cc97fefa2760cb15 upstream. task_struct->pid/tgid should go away. 1. Change same_thread_group() to use task->signal for comparison. 2. Change has_group_leader_pid(task) to compare task_pid(task) with signal->leader_pid. Signed-off-by: Oleg Nesterov Cc: Michal Hocko Cc: Sergey Dyasly Reviewed-by: "Eric W. Biederman" Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- include/linux/sched.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 00c1d4f45072..7cf305d036db 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2203,15 +2203,15 @@ static inline bool thread_group_leader(struct task_struct *p) * all we care about is that we have a task with the appropriate * pid, we don't actually care if we have the right task. */ -static inline int has_group_leader_pid(struct task_struct *p) +static inline bool has_group_leader_pid(struct task_struct *p) { - return p->pid == p->tgid; + return task_pid(p) == p->signal->leader_pid; } static inline -int same_thread_group(struct task_struct *p1, struct task_struct *p2) +bool same_thread_group(struct task_struct *p1, struct task_struct *p2) { - return p1->tgid == p2->tgid; + return p1->signal == p2->signal; } static inline struct task_struct *next_thread(const struct task_struct *p) -- cgit v1.2.3 From a20cc70d1e37617c25c336ec036186d369946765 Mon Sep 17 00:00:00 2001 From: Mark Grondona Date: Wed, 11 Sep 2013 14:24:31 -0700 Subject: __ptrace_may_access() should not deny sub-threads commit 73af963f9f3036dffed55c3a2898598186db1045 upstream. __ptrace_may_access() checks get_dumpable/ptrace_has_cap/etc if task != current, this can can lead to surprising results. For example, a sub-thread can't readlink("/proc/self/exe") if the executable is not readable. setup_new_exec()->would_dump() notices that inode_permission(MAY_READ) fails and then it does set_dumpable(suid_dumpable). After that get_dumpable() fails. (It is not clear why proc_pid_readlink() checks get_dumpable(), perhaps we could add PTRACE_MODE_NODUMPABLE) Change __ptrace_may_access() to use same_thread_group() instead of "task == current". Any security check is pointless when the tasks share the same ->mm. Signed-off-by: Mark Grondona Signed-off-by: Ben Woodard Signed-off-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 118323bc8529..30ab20623bca 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -236,7 +236,7 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode) */ int dumpable = 0; /* Don't let security modules deny introspection */ - if (task == current) + if (same_thread_group(task, current)) return 0; rcu_read_lock(); tcred = __task_cred(task); -- cgit v1.2.3 From 4dce4607145619ed563859413db8374c270319f6 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Mon, 13 Apr 2015 11:48:46 +0800 Subject: ACPICA: Utilities: Cleanup to convert physical address printing formats. commit cc2080b0e5a7c6c33ef5e9ffccbc2b8f6f861393 upstream. ACPICA commit 7f06739db43a85083a70371c14141008f20b2198 For physical addresses, since the address may exceed 32-bit address range after calculation, we should use %8.8X%8.8X (see ACPI_FORMAT_UINT64()) to convert the %p formats. This is a preparation to switch acpi_physical_address to 64-bit on 32-bit kernel builds. Link: https://github.com/acpica/acpica/commit/7f06739d Signed-off-by: Lv Zheng Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki Signed-off-by: Dirk Behme [gdavis: Move tbinstall.c changes to tbutils.c due to lack of commit "42f4786 ACPICA: Split table print utilities to a new a separate file" in linux-3.10.y] Signed-off-by: George G. Davis Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpica/exfldio.c | 10 ++++------ drivers/acpi/acpica/hwvalid.c | 16 ++++++++-------- drivers/acpi/acpica/nsdump.c | 7 +++---- drivers/acpi/acpica/tbutils.c | 4 ++-- drivers/acpi/acpica/utaddress.c | 24 +++++++++++------------- 5 files changed, 28 insertions(+), 33 deletions(-) diff --git a/drivers/acpi/acpica/exfldio.c b/drivers/acpi/acpica/exfldio.c index c84ee956fa4c..dc210c379277 100644 --- a/drivers/acpi/acpica/exfldio.c +++ b/drivers/acpi/acpica/exfldio.c @@ -269,17 +269,15 @@ acpi_ex_access_region(union acpi_operand_object *obj_desc, } ACPI_DEBUG_PRINT_RAW((ACPI_DB_BFIELD, - " Region [%s:%X], Width %X, ByteBase %X, Offset %X at %p\n", + " Region [%s:%X], Width %X, ByteBase %X, Offset %X at %8.8X%8.8X\n", acpi_ut_get_region_name(rgn_desc->region. space_id), rgn_desc->region.space_id, obj_desc->common_field.access_byte_width, obj_desc->common_field.base_byte_offset, - field_datum_byte_offset, ACPI_CAST_PTR(void, - (rgn_desc-> - region. - address + - region_offset)))); + field_datum_byte_offset, + ACPI_FORMAT_UINT64(rgn_desc->region.address + + region_offset))); /* Invoke the appropriate address_space/op_region handler */ diff --git a/drivers/acpi/acpica/hwvalid.c b/drivers/acpi/acpica/hwvalid.c index eab70d58852a..fae57584a182 100644 --- a/drivers/acpi/acpica/hwvalid.c +++ b/drivers/acpi/acpica/hwvalid.c @@ -142,17 +142,17 @@ acpi_hw_validate_io_request(acpi_io_address address, u32 bit_width) byte_width = ACPI_DIV_8(bit_width); last_address = address + byte_width - 1; - ACPI_DEBUG_PRINT((ACPI_DB_IO, "Address %p LastAddress %p Length %X", - ACPI_CAST_PTR(void, address), ACPI_CAST_PTR(void, - last_address), - byte_width)); + ACPI_DEBUG_PRINT((ACPI_DB_IO, + "Address %8.8X%8.8X LastAddress %8.8X%8.8X Length %X", + ACPI_FORMAT_UINT64(address), + ACPI_FORMAT_UINT64(last_address), byte_width)); /* Maximum 16-bit address in I/O space */ if (last_address > ACPI_UINT16_MAX) { ACPI_ERROR((AE_INFO, - "Illegal I/O port address/length above 64K: %p/0x%X", - ACPI_CAST_PTR(void, address), byte_width)); + "Illegal I/O port address/length above 64K: %8.8X%8.8X/0x%X", + ACPI_FORMAT_UINT64(address), byte_width)); return_ACPI_STATUS(AE_LIMIT); } @@ -181,8 +181,8 @@ acpi_hw_validate_io_request(acpi_io_address address, u32 bit_width) if (acpi_gbl_osi_data >= port_info->osi_dependency) { ACPI_DEBUG_PRINT((ACPI_DB_IO, - "Denied AML access to port 0x%p/%X (%s 0x%.4X-0x%.4X)", - ACPI_CAST_PTR(void, address), + "Denied AML access to port 0x%8.8X%8.8X/%X (%s 0x%.4X-0x%.4X)", + ACPI_FORMAT_UINT64(address), byte_width, port_info->name, port_info->start, port_info->end)); diff --git a/drivers/acpi/acpica/nsdump.c b/drivers/acpi/acpica/nsdump.c index ce6e97326205..d6651e701a71 100644 --- a/drivers/acpi/acpica/nsdump.c +++ b/drivers/acpi/acpica/nsdump.c @@ -258,12 +258,11 @@ acpi_ns_dump_one_object(acpi_handle obj_handle, switch (type) { case ACPI_TYPE_PROCESSOR: - acpi_os_printf("ID %02X Len %02X Addr %p\n", + acpi_os_printf("ID %02X Len %02X Addr %8.8X%8.8X\n", obj_desc->processor.proc_id, obj_desc->processor.length, - ACPI_CAST_PTR(void, - obj_desc->processor. - address)); + ACPI_FORMAT_UINT64(obj_desc->processor. + address)); break; case ACPI_TYPE_DEVICE: diff --git a/drivers/acpi/acpica/tbutils.c b/drivers/acpi/acpica/tbutils.c index ce3d5db39a9c..45c4474c35b6 100644 --- a/drivers/acpi/acpica/tbutils.c +++ b/drivers/acpi/acpica/tbutils.c @@ -474,8 +474,8 @@ acpi_tb_install_table(acpi_physical_address address, table = acpi_os_map_memory(address, sizeof(struct acpi_table_header)); if (!table) { ACPI_ERROR((AE_INFO, - "Could not map memory for table [%s] at %p", - signature, ACPI_CAST_PTR(void, address))); + "Could not map memory for table [%s] at %8.8X%8.8X", + signature, ACPI_FORMAT_UINT64(address))); return; } diff --git a/drivers/acpi/acpica/utaddress.c b/drivers/acpi/acpica/utaddress.c index e0a2e2779c2e..3c7770d75773 100644 --- a/drivers/acpi/acpica/utaddress.c +++ b/drivers/acpi/acpica/utaddress.c @@ -107,10 +107,10 @@ acpi_ut_add_address_range(acpi_adr_space_type space_id, acpi_gbl_address_range_list[space_id] = range_info; ACPI_DEBUG_PRINT((ACPI_DB_NAMES, - "\nAdded [%4.4s] address range: 0x%p-0x%p\n", + "\nAdded [%4.4s] address range: 0x%8.8X%8.8X-0x%8.8X%8.8X\n", acpi_ut_get_node_name(range_info->region_node), - ACPI_CAST_PTR(void, address), - ACPI_CAST_PTR(void, range_info->end_address))); + ACPI_FORMAT_UINT64(address), + ACPI_FORMAT_UINT64(range_info->end_address))); (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE); return_ACPI_STATUS(AE_OK); @@ -160,15 +160,13 @@ acpi_ut_remove_address_range(acpi_adr_space_type space_id, } ACPI_DEBUG_PRINT((ACPI_DB_NAMES, - "\nRemoved [%4.4s] address range: 0x%p-0x%p\n", + "\nRemoved [%4.4s] address range: 0x%8.8X%8.8X-0x%8.8X%8.8X\n", acpi_ut_get_node_name(range_info-> region_node), - ACPI_CAST_PTR(void, - range_info-> - start_address), - ACPI_CAST_PTR(void, - range_info-> - end_address))); + ACPI_FORMAT_UINT64(range_info-> + start_address), + ACPI_FORMAT_UINT64(range_info-> + end_address))); ACPI_FREE(range_info); return_VOID; @@ -244,9 +242,9 @@ acpi_ut_check_address_range(acpi_adr_space_type space_id, region_node); ACPI_WARNING((AE_INFO, - "0x%p-0x%p %s conflicts with Region %s %d", - ACPI_CAST_PTR(void, address), - ACPI_CAST_PTR(void, end_address), + "0x%8.8X%8.8X-0x%8.8X%8.8X %s conflicts with Region %s %d", + ACPI_FORMAT_UINT64(address), + ACPI_FORMAT_UINT64(end_address), acpi_ut_get_region_name(space_id), pathname, overlap_count)); ACPI_FREE(pathname); -- cgit v1.2.3 From a2dac7a1d62f940989d3695d44d04afef4d291f6 Mon Sep 17 00:00:00 2001 From: Lv Zheng Date: Mon, 13 Apr 2015 11:48:52 +0800 Subject: ACPICA: Utilities: Cleanup to remove useless ACPI_PRINTF/FORMAT_xxx helpers. commit 1d0a0b2f6df2bf2643fadc990eb143361eca6ada upstream. ACPICA commit b60612373a4ef63b64a57c124576d7ddb6d8efb6 For physical addresses, since the address may exceed 32-bit address range after calculation, we should use 0x%8.8X%8.8X instead of ACPI_PRINTF_UINT and ACPI_FORMAT_UINT64() instead of ACPI_FORMAT_NATIVE_UINT()/ACPI_FORMAT_TO_UINT(). This patch also removes above replaced macros as there are no users. This is a preparation to switch acpi_physical_address to 64-bit on 32-bit kernel builds. Link: https://github.com/acpica/acpica/commit/b6061237 Signed-off-by: Lv Zheng Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki Signed-off-by: Dirk Behme [gdavis: Move tbprint.c changes to tbutils.c due to lack of commit "42f4786 ACPICA: Split table print utilities to a new a separate file" in linux-3.10.y] Signed-off-by: George G. Davis Signed-off-by: Greg Kroah-Hartman --- drivers/acpi/acpica/acmacros.h | 10 +++------- drivers/acpi/acpica/dsopcode.c | 4 ++-- drivers/acpi/acpica/evregion.c | 2 +- drivers/acpi/acpica/exdump.c | 4 ++-- drivers/acpi/acpica/exregion.c | 8 +++----- drivers/acpi/acpica/nsdump.c | 5 +++-- drivers/acpi/acpica/tbutils.c | 16 ++++++---------- 7 files changed, 20 insertions(+), 29 deletions(-) diff --git a/drivers/acpi/acpica/acmacros.h b/drivers/acpi/acpica/acmacros.h index 53666bd9193d..32b0bf32364a 100644 --- a/drivers/acpi/acpica/acmacros.h +++ b/drivers/acpi/acpica/acmacros.h @@ -63,19 +63,15 @@ #define ACPI_SET64(ptr, val) (*ACPI_CAST64 (ptr) = (u64) (val)) /* - * printf() format helpers + * printf() format helper. This macros is a workaround for the difficulties + * with emitting 64-bit integers and 64-bit pointers with the same code + * for both 32-bit and 64-bit hosts. */ /* Split 64-bit integer into two 32-bit values. Use with %8.8X%8.8X */ #define ACPI_FORMAT_UINT64(i) ACPI_HIDWORD(i), ACPI_LODWORD(i) -#if ACPI_MACHINE_WIDTH == 64 -#define ACPI_FORMAT_NATIVE_UINT(i) ACPI_FORMAT_UINT64(i) -#else -#define ACPI_FORMAT_NATIVE_UINT(i) 0, (i) -#endif - /* * Macros for moving data around to/from buffers that are possibly unaligned. * If the hardware supports the transfer of unaligned data, just do the store. diff --git a/drivers/acpi/acpica/dsopcode.c b/drivers/acpi/acpica/dsopcode.c index e9b13b92ba1e..46a37aeaedae 100644 --- a/drivers/acpi/acpica/dsopcode.c +++ b/drivers/acpi/acpica/dsopcode.c @@ -446,7 +446,7 @@ acpi_ds_eval_region_operands(struct acpi_walk_state *walk_state, ACPI_DEBUG_PRINT((ACPI_DB_EXEC, "RgnObj %p Addr %8.8X%8.8X Len %X\n", obj_desc, - ACPI_FORMAT_NATIVE_UINT(obj_desc->region.address), + ACPI_FORMAT_UINT64(obj_desc->region.address), obj_desc->region.length)); /* Now the address and length are valid for this opregion */ @@ -544,7 +544,7 @@ acpi_ds_eval_table_region_operands(struct acpi_walk_state *walk_state, ACPI_DEBUG_PRINT((ACPI_DB_EXEC, "RgnObj %p Addr %8.8X%8.8X Len %X\n", obj_desc, - ACPI_FORMAT_NATIVE_UINT(obj_desc->region.address), + ACPI_FORMAT_UINT64(obj_desc->region.address), obj_desc->region.length)); /* Now the address and length are valid for this opregion */ diff --git a/drivers/acpi/acpica/evregion.c b/drivers/acpi/acpica/evregion.c index 8fab9262d98a..ad698893e829 100644 --- a/drivers/acpi/acpica/evregion.c +++ b/drivers/acpi/acpica/evregion.c @@ -276,7 +276,7 @@ acpi_ev_address_space_dispatch(union acpi_operand_object *region_obj, ACPI_DEBUG_PRINT((ACPI_DB_OPREGION, "Handler %p (@%p) Address %8.8X%8.8X [%s]\n", ®ion_obj->region.handler->address_space, handler, - ACPI_FORMAT_NATIVE_UINT(address), + ACPI_FORMAT_UINT64(address), acpi_ut_get_region_name(region_obj->region. space_id))); diff --git a/drivers/acpi/acpica/exdump.c b/drivers/acpi/acpica/exdump.c index e5a3c249f7fa..7e6a56fe1d6e 100644 --- a/drivers/acpi/acpica/exdump.c +++ b/drivers/acpi/acpica/exdump.c @@ -621,8 +621,8 @@ void acpi_ex_dump_operand(union acpi_operand_object *obj_desc, u32 depth) acpi_os_printf("\n"); } else { acpi_os_printf(" base %8.8X%8.8X Length %X\n", - ACPI_FORMAT_NATIVE_UINT(obj_desc->region. - address), + ACPI_FORMAT_UINT64(obj_desc->region. + address), obj_desc->region.length); } break; diff --git a/drivers/acpi/acpica/exregion.c b/drivers/acpi/acpica/exregion.c index 182abaf045e1..e90c59d35a16 100644 --- a/drivers/acpi/acpica/exregion.c +++ b/drivers/acpi/acpica/exregion.c @@ -176,7 +176,7 @@ acpi_ex_system_memory_space_handler(u32 function, if (!mem_info->mapped_logical_address) { ACPI_ERROR((AE_INFO, "Could not map memory at 0x%8.8X%8.8X, size %u", - ACPI_FORMAT_NATIVE_UINT(address), + ACPI_FORMAT_UINT64(address), (u32) map_length)); mem_info->mapped_length = 0; return_ACPI_STATUS(AE_NO_MEMORY); @@ -197,8 +197,7 @@ acpi_ex_system_memory_space_handler(u32 function, ACPI_DEBUG_PRINT((ACPI_DB_INFO, "System-Memory (width %u) R/W %u Address=%8.8X%8.8X\n", - bit_width, function, - ACPI_FORMAT_NATIVE_UINT(address))); + bit_width, function, ACPI_FORMAT_UINT64(address))); /* * Perform the memory read or write @@ -300,8 +299,7 @@ acpi_ex_system_io_space_handler(u32 function, ACPI_DEBUG_PRINT((ACPI_DB_INFO, "System-IO (width %u) R/W %u Address=%8.8X%8.8X\n", - bit_width, function, - ACPI_FORMAT_NATIVE_UINT(address))); + bit_width, function, ACPI_FORMAT_UINT64(address))); /* Decode the function parameter */ diff --git a/drivers/acpi/acpica/nsdump.c b/drivers/acpi/acpica/nsdump.c index d6651e701a71..20ae5b9bb9f2 100644 --- a/drivers/acpi/acpica/nsdump.c +++ b/drivers/acpi/acpica/nsdump.c @@ -333,8 +333,9 @@ acpi_ns_dump_one_object(acpi_handle obj_handle, space_id)); if (obj_desc->region.flags & AOPOBJ_DATA_VALID) { acpi_os_printf(" Addr %8.8X%8.8X Len %.4X\n", - ACPI_FORMAT_NATIVE_UINT - (obj_desc->region.address), + ACPI_FORMAT_UINT64(obj_desc-> + region. + address), obj_desc->region.length); } else { acpi_os_printf diff --git a/drivers/acpi/acpica/tbutils.c b/drivers/acpi/acpica/tbutils.c index 45c4474c35b6..5c67b2840c58 100644 --- a/drivers/acpi/acpica/tbutils.c +++ b/drivers/acpi/acpica/tbutils.c @@ -246,16 +246,12 @@ acpi_tb_print_table_header(acpi_physical_address address, { struct acpi_table_header local_header; - /* - * The reason that the Address is cast to a void pointer is so that we - * can use %p which will work properly on both 32-bit and 64-bit hosts. - */ if (ACPI_COMPARE_NAME(header->signature, ACPI_SIG_FACS)) { /* FACS only has signature and length fields */ - ACPI_INFO((AE_INFO, "%4.4s %p %05X", - header->signature, ACPI_CAST_PTR(void, address), + ACPI_INFO((AE_INFO, "%4.4s 0x%8.8X%8.8X %05X", + header->signature, ACPI_FORMAT_UINT64(address), header->length)); } else if (ACPI_COMPARE_NAME(header->signature, ACPI_SIG_RSDP)) { @@ -266,8 +262,8 @@ acpi_tb_print_table_header(acpi_physical_address address, header)->oem_id, ACPI_OEM_ID_SIZE); acpi_tb_fix_string(local_header.oem_id, ACPI_OEM_ID_SIZE); - ACPI_INFO((AE_INFO, "RSDP %p %05X (v%.2d %6.6s)", - ACPI_CAST_PTR (void, address), + ACPI_INFO((AE_INFO, "RSDP 0x%8.8X%8.8X %05X (v%.2d %6.6s)", + ACPI_FORMAT_UINT64(address), (ACPI_CAST_PTR(struct acpi_table_rsdp, header)-> revision > 0) ? ACPI_CAST_PTR(struct acpi_table_rsdp, @@ -281,8 +277,8 @@ acpi_tb_print_table_header(acpi_physical_address address, acpi_tb_cleanup_table_header(&local_header, header); ACPI_INFO((AE_INFO, - "%4.4s %p %05X (v%.2d %6.6s %8.8s %08X %4.4s %08X)", - local_header.signature, ACPI_CAST_PTR(void, address), + "%-4.4s 0x%8.8X%8.8X %05X (v%.2d %-6.6s %-8.8s %08X %-4.4s %08X)", + local_header.signature, ACPI_FORMAT_UINT64(address), local_header.length, local_header.revision, local_header.oem_id, local_header.oem_table_id, local_header.oem_revision, -- cgit v1.2.3 From 29e7fa7cb70a5f4b0e934f80949d1f1c6a6dc0a6 Mon Sep 17 00:00:00 2001 From: Jim Snow Date: Tue, 18 Nov 2014 14:51:09 +0100 Subject: sb_edac: Fix erroneous bytes->gigabytes conversion commit 8c009100295597f23978c224aec5751a365bc965 upstream. Signed-off-by: Jim Snow Signed-off-by: Lukasz Anaczkowski Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Jiri Slaby Cc: Vinson Lee Signed-off-by: Greg Kroah-Hartman --- drivers/edac/sb_edac.c | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index f505e4ca6d58..3bdefbfb4377 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -623,7 +623,7 @@ static void get_memory_layout(const struct mem_ctl_info *mci) u32 reg; u64 limit, prv = 0; u64 tmp_mb; - u32 mb, kb; + u32 gb, mb; u32 rir_way; /* @@ -636,8 +636,9 @@ static void get_memory_layout(const struct mem_ctl_info *mci) pvt->tolm = GET_TOLM(reg); tmp_mb = (1 + pvt->tolm) >> 20; - mb = div_u64_rem(tmp_mb, 1000, &kb); - edac_dbg(0, "TOLM: %u.%03u GB (0x%016Lx)\n", mb, kb, (u64)pvt->tolm); + gb = div_u64_rem(tmp_mb, 1024, &mb); + edac_dbg(0, "TOLM: %u.%03u GB (0x%016Lx)\n", + gb, (mb*1000)/1024, (u64)pvt->tolm); /* Address range is already 45:25 */ pci_read_config_dword(pvt->pci_sad1, TOHM, @@ -645,8 +646,9 @@ static void get_memory_layout(const struct mem_ctl_info *mci) pvt->tohm = GET_TOHM(reg); tmp_mb = (1 + pvt->tohm) >> 20; - mb = div_u64_rem(tmp_mb, 1000, &kb); - edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)\n", mb, kb, (u64)pvt->tohm); + gb = div_u64_rem(tmp_mb, 1024, &mb); + edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)\n", + gb, (mb*1000)/1024, (u64)pvt->tohm); /* * Step 2) Get SAD range and SAD Interleave list @@ -668,11 +670,11 @@ static void get_memory_layout(const struct mem_ctl_info *mci) break; tmp_mb = (limit + 1) >> 20; - mb = div_u64_rem(tmp_mb, 1000, &kb); + gb = div_u64_rem(tmp_mb, 1024, &mb); edac_dbg(0, "SAD#%d %s up to %u.%03u GB (0x%016Lx) Interleave: %s reg=0x%08x\n", n_sads, get_dram_attr(reg), - mb, kb, + gb, (mb*1000)/1024, ((u64)tmp_mb) << 20L, INTERLEAVE_MODE(reg) ? "8:6" : "[8:6]XOR[18:16]", reg); @@ -702,9 +704,9 @@ static void get_memory_layout(const struct mem_ctl_info *mci) break; tmp_mb = (limit + 1) >> 20; - mb = div_u64_rem(tmp_mb, 1000, &kb); + gb = div_u64_rem(tmp_mb, 1024, &mb); edac_dbg(0, "TAD#%d: up to %u.%03u GB (0x%016Lx), socket interleave %d, memory interleave %d, TGT: %d, %d, %d, %d, reg=0x%08x\n", - n_tads, mb, kb, + n_tads, gb, (mb*1000)/1024, ((u64)tmp_mb) << 20L, (u32)TAD_SOCK(reg), (u32)TAD_CH(reg), @@ -727,10 +729,10 @@ static void get_memory_layout(const struct mem_ctl_info *mci) tad_ch_nilv_offset[j], ®); tmp_mb = TAD_OFFSET(reg) >> 20; - mb = div_u64_rem(tmp_mb, 1000, &kb); + gb = div_u64_rem(tmp_mb, 1024, &mb); edac_dbg(0, "TAD CH#%d, offset #%d: %u.%03u GB (0x%016Lx), reg=0x%08x\n", i, j, - mb, kb, + gb, (mb*1000)/1024, ((u64)tmp_mb) << 20L, reg); } @@ -752,10 +754,10 @@ static void get_memory_layout(const struct mem_ctl_info *mci) tmp_mb = RIR_LIMIT(reg) >> 20; rir_way = 1 << RIR_WAY(reg); - mb = div_u64_rem(tmp_mb, 1000, &kb); + gb = div_u64_rem(tmp_mb, 1024, &mb); edac_dbg(0, "CH#%d RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d, reg=0x%08x\n", i, j, - mb, kb, + gb, (mb*1000)/1024, ((u64)tmp_mb) << 20L, rir_way, reg); @@ -766,10 +768,10 @@ static void get_memory_layout(const struct mem_ctl_info *mci) ®); tmp_mb = RIR_OFFSET(reg) << 6; - mb = div_u64_rem(tmp_mb, 1000, &kb); + gb = div_u64_rem(tmp_mb, 1024, &mb); edac_dbg(0, "CH#%d RIR#%d INTL#%d, offset %u.%03u GB (0x%016Lx), tgt: %d, reg=0x%08x\n", i, j, k, - mb, kb, + gb, (mb*1000)/1024, ((u64)tmp_mb) << 20L, (u32)RIR_RNK_TGT(reg), reg); @@ -806,7 +808,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci, u8 ch_way,sck_way; u32 tad_offset; u32 rir_way; - u32 mb, kb; + u32 mb, gb; u64 ch_addr, offset, limit, prv = 0; @@ -1022,10 +1024,10 @@ static int get_memory_error_data(struct mem_ctl_info *mci, continue; limit = RIR_LIMIT(reg); - mb = div_u64_rem(limit >> 20, 1000, &kb); + gb = div_u64_rem(limit >> 20, 1024, &mb); edac_dbg(0, "RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d\n", n_rir, - mb, kb, + gb, (mb*1000)/1024, limit, 1 << RIR_WAY(reg)); if (ch_addr <= limit) -- cgit v1.2.3 From 6a82524ebeb75562563192218e01462fb60c68f4 Mon Sep 17 00:00:00 2001 From: Tomas Henzl Date: Thu, 14 Aug 2014 16:12:39 +0200 Subject: hpsa: refine the pci enable/disable handling commit 132aa220b45d60e9b20def1e9d8be9422eed9616 upstream. When a second(kdump) kernel starts and the hard reset method is used the driver calls pci_disable_device without previously enabling it, so the kernel shows a warning - [ 16.876248] WARNING: at drivers/pci/pci.c:1431 pci_disable_device+0x84/0x90() [ 16.882686] Device hpsa disabling already-disabled device ... This patch fixes it, in addition to this I tried to balance also some other pairs of enable/disable device in the driver. Unfortunately I wasn't able to verify the functionality for the case of a sw reset, because of a lack of proper hw. Signed-off-by: Tomas Henzl Reviewed-by: Stephen M. Cameron Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/hpsa.c | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 62ed744bbe06..28b6a2fde0ac 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -3898,10 +3898,6 @@ static int hpsa_kdump_hard_reset_controller(struct pci_dev *pdev) /* Save the PCI command register */ pci_read_config_word(pdev, 4, &command_register); - /* Turn the board off. This is so that later pci_restore_state() - * won't turn the board on before the rest of config space is ready. - */ - pci_disable_device(pdev); pci_save_state(pdev); /* find the first memory BAR, so we can find the cfg table */ @@ -3949,11 +3945,6 @@ static int hpsa_kdump_hard_reset_controller(struct pci_dev *pdev) goto unmap_cfgtable; pci_restore_state(pdev); - rc = pci_enable_device(pdev); - if (rc) { - dev_warn(&pdev->dev, "failed to enable device.\n"); - goto unmap_cfgtable; - } pci_write_config_word(pdev, 4, command_register); /* Some devices (notably the HP Smart Array 5i Controller) @@ -4448,6 +4439,23 @@ static int hpsa_init_reset_devices(struct pci_dev *pdev) if (!reset_devices) return 0; + /* kdump kernel is loading, we don't know in which state is + * the pci interface. The dev->enable_cnt is equal zero + * so we call enable+disable, wait a while and switch it on. + */ + rc = pci_enable_device(pdev); + if (rc) { + dev_warn(&pdev->dev, "Failed to enable PCI device\n"); + return -ENODEV; + } + pci_disable_device(pdev); + msleep(260); /* a randomly chosen number */ + rc = pci_enable_device(pdev); + if (rc) { + dev_warn(&pdev->dev, "failed to enable device.\n"); + return -ENODEV; + } + /* Reset the controller with a PCI power-cycle or via doorbell */ rc = hpsa_kdump_hard_reset_controller(pdev); @@ -4456,10 +4464,11 @@ static int hpsa_init_reset_devices(struct pci_dev *pdev) * "performant mode". Or, it might be 640x, which can't reset * due to concerns about shared bbwc between 6402/6404 pair. */ - if (rc == -ENOTSUPP) - return rc; /* just try to do the kdump anyhow. */ - if (rc) - return -ENODEV; + if (rc) { + if (rc != -ENOTSUPP) /* just try to do the kdump anyhow. */ + rc = -ENODEV; + goto out_disable; + } /* Now try to get the controller to respond to a no-op */ dev_warn(&pdev->dev, "Waiting for controller to respond to no-op\n"); @@ -4470,7 +4479,11 @@ static int hpsa_init_reset_devices(struct pci_dev *pdev) dev_warn(&pdev->dev, "no-op failed%s\n", (i < 11 ? "; re-trying" : "")); } - return 0; + +out_disable: + + pci_disable_device(pdev); + return rc; } static int hpsa_allocate_cmd_pool(struct ctlr_info *h) @@ -4613,6 +4626,7 @@ static void hpsa_undo_allocations_after_kdump_soft_reset(struct ctlr_info *h) iounmap(h->transtable); if (h->cfgtable) iounmap(h->cfgtable); + pci_disable_device(h->pdev); pci_release_regions(h->pdev); kfree(h); } -- cgit v1.2.3 From 31884f5463499bf023772ce94b003d40b5db08fc Mon Sep 17 00:00:00 2001 From: Tomas Henzl Date: Fri, 12 Sep 2014 14:44:15 +0200 Subject: hpsa: add missing pci_set_master in kdump path commit 859c75aba20264d87dd026bab0d0ca3bff385955 upstream. Add a call to pci_set_master(...) missing in the previous patch "hpsa: refine the pci enable/disable handling". Found thanks to Rob Elliot. Signed-off-by: Tomas Henzl Reviewed-by: Robert Elliott Tested-by: Robert Elliott Signed-off-by: Christoph Hellwig Signed-off-by: Greg Kroah-Hartman --- drivers/scsi/hpsa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 28b6a2fde0ac..a6cdf17e27dc 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -4455,7 +4455,7 @@ static int hpsa_init_reset_devices(struct pci_dev *pdev) dev_warn(&pdev->dev, "failed to enable device.\n"); return -ENODEV; } - + pci_set_master(pdev); /* Reset the controller with a PCI power-cycle or via doorbell */ rc = hpsa_kdump_hard_reset_controller(pdev); -- cgit v1.2.3 From 9eae8ac6ab40b896b472c526afe7847e798f4f36 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Sun, 19 Apr 2015 02:48:39 +0200 Subject: fs: take i_mutex during prepare_binprm for set[ug]id executables commit 8b01fc86b9f425899f8a3a8fc1c47d73c2c20543 upstream. This prevents a race between chown() and execve(), where chowning a setuid-user binary to root would momentarily make the binary setuid root. This patch was mostly written by Linus Torvalds. Signed-off-by: Jann Horn Signed-off-by: Linus Torvalds Signed-off-by: Charles Williams Signed-off-by: Jiri Slaby Signed-off-by: Sheng Yong Signed-off-by: Greg Kroah-Hartman --- fs/exec.c | 76 ++++++++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 48 insertions(+), 28 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index dd6aa61c8548..acbd7ac2deda 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1265,6 +1265,53 @@ static int check_unsafe_exec(struct linux_binprm *bprm) return res; } +static void bprm_fill_uid(struct linux_binprm *bprm) +{ + struct inode *inode; + unsigned int mode; + kuid_t uid; + kgid_t gid; + + /* clear any previous set[ug]id data from a previous binary */ + bprm->cred->euid = current_euid(); + bprm->cred->egid = current_egid(); + + if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) + return; + + if (current->no_new_privs) + return; + + inode = file_inode(bprm->file); + mode = ACCESS_ONCE(inode->i_mode); + if (!(mode & (S_ISUID|S_ISGID))) + return; + + /* Be careful if suid/sgid is set */ + mutex_lock(&inode->i_mutex); + + /* reload atomically mode/uid/gid now that lock held */ + mode = inode->i_mode; + uid = inode->i_uid; + gid = inode->i_gid; + mutex_unlock(&inode->i_mutex); + + /* We ignore suid/sgid if there are no mappings for them in the ns */ + if (!kuid_has_mapping(bprm->cred->user_ns, uid) || + !kgid_has_mapping(bprm->cred->user_ns, gid)) + return; + + if (mode & S_ISUID) { + bprm->per_clear |= PER_CLEAR_ON_SETID; + bprm->cred->euid = uid; + } + + if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { + bprm->per_clear |= PER_CLEAR_ON_SETID; + bprm->cred->egid = gid; + } +} + /* * Fill the binprm structure from the inode. * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes @@ -1273,39 +1320,12 @@ static int check_unsafe_exec(struct linux_binprm *bprm) */ int prepare_binprm(struct linux_binprm *bprm) { - umode_t mode; - struct inode * inode = file_inode(bprm->file); int retval; - mode = inode->i_mode; if (bprm->file->f_op == NULL) return -EACCES; - /* clear any previous set[ug]id data from a previous binary */ - bprm->cred->euid = current_euid(); - bprm->cred->egid = current_egid(); - - if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) && - !current->no_new_privs && - kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) && - kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) { - /* Set-uid? */ - if (mode & S_ISUID) { - bprm->per_clear |= PER_CLEAR_ON_SETID; - bprm->cred->euid = inode->i_uid; - } - - /* Set-gid? */ - /* - * If setgid is set but no group execute bit then this - * is a candidate for mandatory locking, not a setgid - * executable. - */ - if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { - bprm->per_clear |= PER_CLEAR_ON_SETID; - bprm->cred->egid = inode->i_gid; - } - } + bprm_fill_uid(bprm); /* fill in binprm security blob */ retval = security_bprm_set_creds(bprm); -- cgit v1.2.3 From 66bd44e6a5c8f24805f94d19f943301d7a7f418e Mon Sep 17 00:00:00 2001 From: Quentin Casasnovas Date: Tue, 3 Feb 2015 13:00:22 +0100 Subject: x86/microcode/intel: Guard against stack overflow in the loader commit f84598bd7c851f8b0bf8cd0d7c3be0d73c432ff4 upstream. mc_saved_tmp is a static array allocated on the stack, we need to make sure mc_saved_count stays within its bounds, otherwise we're overflowing the stack in _save_mc(). A specially crafted microcode header could lead to a kernel crash or potentially kernel execution. Signed-off-by: Quentin Casasnovas Cc: "H. Peter Anvin" Cc: Fenghua Yu Link: http://lkml.kernel.org/r/1422964824-22056-1-git-send-email-quentin.casasnovas@oracle.com Signed-off-by: Borislav Petkov Signed-off-by: Jiri Slaby Signed-off-by: Sheng Yong Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/microcode_intel_early.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/microcode_intel_early.c b/arch/x86/kernel/microcode_intel_early.c index 2e9e12871c2b..a883942aee44 100644 --- a/arch/x86/kernel/microcode_intel_early.c +++ b/arch/x86/kernel/microcode_intel_early.c @@ -321,7 +321,7 @@ get_matching_model_microcode(int cpu, unsigned long start, unsigned int mc_saved_count = mc_saved_data->mc_saved_count; int i; - while (leftover) { + while (leftover && mc_saved_count < ARRAY_SIZE(mc_saved_tmp)) { mc_header = (struct microcode_header_intel *)ucode_ptr; mc_size = get_totalsize(mc_header); -- cgit v1.2.3 From 43f2e3615d181a7028ab797114c5960977669b2a Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sun, 9 Nov 2014 08:38:39 +0000 Subject: Btrfs: make xattr replace operations atomic commit 5f5bc6b1e2d5a6f827bc860ef2dc5b6f365d1339 upstream. Replacing a xattr consists of doing a lookup for its existing value, delete the current value from the respective leaf, release the search path and then finally insert the new value. This leaves a time window where readers (getxattr, listxattrs) won't see any value for the xattr. Xattrs are used to store ACLs, so this has security implications. This change also fixes 2 other existing issues which were: *) Deleting the old xattr value without verifying first if the new xattr will fit in the existing leaf item (in case multiple xattrs are packed in the same item due to name hash collision); *) Returning -EEXIST when the flag XATTR_CREATE is given and the xattr doesn't exist but we have have an existing item that packs muliple xattrs with the same name hash as the input xattr. In this case we should return ENOSPC. A test case for xfstests follows soon. Thanks to Alexandre Oliva for reporting the non-atomicity of the xattr replace implementation. Reported-by: Alexandre Oliva Signed-off-by: Filipe Manana Signed-off-by: Chris Mason [shengyong: backport to 3.10 - FIX: CVE-2014-9710 - adjust context - ASSERT() was added v3.12, so we do check with if statement - set the first parameter of btrfs_item_nr() as NULL, because it is not used, and is removed in v3.13 ] Signed-off-by: Sheng Yong Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ctree.c | 2 +- fs/btrfs/ctree.h | 5 ++ fs/btrfs/dir-item.c | 10 +--- fs/btrfs/xattr.c | 159 +++++++++++++++++++++++++++++++++------------------- 4 files changed, 111 insertions(+), 65 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 7fb054ba1b60..82f14a1da542 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2769,7 +2769,7 @@ done: */ if (!p->leave_spinning) btrfs_set_path_blocking(p); - if (ret < 0) + if (ret < 0 && !p->skip_release_on_error) btrfs_release_path(p); return ret; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d6dd49b51ba8..c19444e412be 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -586,6 +586,7 @@ struct btrfs_path { unsigned int skip_locking:1; unsigned int leave_spinning:1; unsigned int search_commit_root:1; + unsigned int skip_release_on_error:1; }; /* @@ -3406,6 +3407,10 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, int verify_dir_item(struct btrfs_root *root, struct extent_buffer *leaf, struct btrfs_dir_item *dir_item); +struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, + struct btrfs_path *path, + const char *name, + int name_len); /* orphan.c */ int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 79e594e341c7..6f61b9b1526f 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -21,10 +21,6 @@ #include "hash.h" #include "transaction.h" -static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, - struct btrfs_path *path, - const char *name, int name_len); - /* * insert a name into a directory, doing overflow properly if there is a hash * collision. data_size indicates how big the item inserted should be. On @@ -383,9 +379,9 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans, * this walks through all the entries in a dir item and finds one * for a specific name. */ -static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, - struct btrfs_path *path, - const char *name, int name_len) +struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root, + struct btrfs_path *path, + const char *name, int name_len) { struct btrfs_dir_item *dir_item; unsigned long name_ptr; diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 05740b9789e4..9cf20d63cc99 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -27,6 +27,7 @@ #include "transaction.h" #include "xattr.h" #include "disk-io.h" +#include "locking.h" ssize_t __btrfs_getxattr(struct inode *inode, const char *name, @@ -89,7 +90,7 @@ static int do_setxattr(struct btrfs_trans_handle *trans, struct inode *inode, const char *name, const void *value, size_t size, int flags) { - struct btrfs_dir_item *di; + struct btrfs_dir_item *di = NULL; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_path *path; size_t name_len = strlen(name); @@ -101,84 +102,128 @@ static int do_setxattr(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); if (!path) return -ENOMEM; + path->skip_release_on_error = 1; + + if (!value) { + di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), + name, name_len, -1); + if (!di && (flags & XATTR_REPLACE)) + ret = -ENODATA; + else if (di) + ret = btrfs_delete_one_dir_name(trans, root, path, di); + goto out; + } + /* + * For a replace we can't just do the insert blindly. + * Do a lookup first (read-only btrfs_search_slot), and return if xattr + * doesn't exist. If it exists, fall down below to the insert/replace + * path - we can't race with a concurrent xattr delete, because the VFS + * locks the inode's i_mutex before calling setxattr or removexattr. + */ if (flags & XATTR_REPLACE) { - di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name, - name_len, -1); - if (IS_ERR(di)) { - ret = PTR_ERR(di); - goto out; - } else if (!di) { + if(!mutex_is_locked(&inode->i_mutex)) { + pr_err("BTRFS: assertion failed: %s, file: %s, line: %d", + "mutex_is_locked(&inode->i_mutex)", __FILE__, + __LINE__); + BUG(); + } + di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode), + name, name_len, 0); + if (!di) { ret = -ENODATA; goto out; } - ret = btrfs_delete_one_dir_name(trans, root, path, di); - if (ret) - goto out; btrfs_release_path(path); + di = NULL; + } + ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode), + name, name_len, value, size); + if (ret == -EOVERFLOW) { /* - * remove the attribute + * We have an existing item in a leaf, split_leaf couldn't + * expand it. That item might have or not a dir_item that + * matches our target xattr, so lets check. */ - if (!value) - goto out; - } else { - di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode), - name, name_len, 0); - if (IS_ERR(di)) { - ret = PTR_ERR(di); + ret = 0; + btrfs_assert_tree_locked(path->nodes[0]); + di = btrfs_match_dir_item_name(root, path, name, name_len); + if (!di && !(flags & XATTR_REPLACE)) { + ret = -ENOSPC; goto out; } - if (!di && !value) - goto out; - btrfs_release_path(path); + } else if (ret == -EEXIST) { + ret = 0; + di = btrfs_match_dir_item_name(root, path, name, name_len); + if(!di) { /* logic error */ + pr_err("BTRFS: assertion failed: %s, file: %s, line: %d", + "di", __FILE__, __LINE__); + BUG(); + } + } else if (ret) { + goto out; } -again: - ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode), - name, name_len, value, size); - /* - * If we're setting an xattr to a new value but the new value is say - * exactly BTRFS_MAX_XATTR_SIZE, we could end up with EOVERFLOW getting - * back from split_leaf. This is because it thinks we'll be extending - * the existing item size, but we're asking for enough space to add the - * item itself. So if we get EOVERFLOW just set ret to EEXIST and let - * the rest of the function figure it out. - */ - if (ret == -EOVERFLOW) + if (di && (flags & XATTR_CREATE)) { ret = -EEXIST; + goto out; + } - if (ret == -EEXIST) { - if (flags & XATTR_CREATE) - goto out; + if (di) { /* - * We can't use the path we already have since we won't have the - * proper locking for a delete, so release the path and - * re-lookup to delete the thing. + * We're doing a replace, and it must be atomic, that is, at + * any point in time we have either the old or the new xattr + * value in the tree. We don't want readers (getxattr and + * listxattrs) to miss a value, this is specially important + * for ACLs. */ - btrfs_release_path(path); - di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), - name, name_len, -1); - if (IS_ERR(di)) { - ret = PTR_ERR(di); - goto out; - } else if (!di) { - /* Shouldn't happen but just in case... */ - btrfs_release_path(path); - goto again; + const int slot = path->slots[0]; + struct extent_buffer *leaf = path->nodes[0]; + const u16 old_data_len = btrfs_dir_data_len(leaf, di); + const u32 item_size = btrfs_item_size_nr(leaf, slot); + const u32 data_size = sizeof(*di) + name_len + size; + struct btrfs_item *item; + unsigned long data_ptr; + char *ptr; + + if (size > old_data_len) { + if (btrfs_leaf_free_space(root, leaf) < + (size - old_data_len)) { + ret = -ENOSPC; + goto out; + } } - ret = btrfs_delete_one_dir_name(trans, root, path, di); - if (ret) - goto out; + if (old_data_len + name_len + sizeof(*di) == item_size) { + /* No other xattrs packed in the same leaf item. */ + if (size > old_data_len) + btrfs_extend_item(root, path, + size - old_data_len); + else if (size < old_data_len) + btrfs_truncate_item(root, path, data_size, 1); + } else { + /* There are other xattrs packed in the same item. */ + ret = btrfs_delete_one_dir_name(trans, root, path, di); + if (ret) + goto out; + btrfs_extend_item(root, path, data_size); + } + item = btrfs_item_nr(NULL, slot); + ptr = btrfs_item_ptr(leaf, slot, char); + ptr += btrfs_item_size(leaf, item) - data_size; + di = (struct btrfs_dir_item *)ptr; + btrfs_set_dir_data_len(leaf, di, size); + data_ptr = ((unsigned long)(di + 1)) + name_len; + write_extent_buffer(leaf, value, data_ptr, size); + btrfs_mark_buffer_dirty(leaf); + } else { /* - * We have a value to set, so go back and try to insert it now. + * Insert, and we had space for the xattr, so path->slots[0] is + * where our xattr dir_item is and btrfs_insert_xattr_item() + * filled it. */ - if (value) { - btrfs_release_path(path); - goto again; - } } out: btrfs_free_path(path); -- cgit v1.2.3 From 528555d05bde69c436bb0126ea95635da24d889e Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 25 Oct 2013 10:21:32 +0200 Subject: xfrm: Increase the garbage collector threshold commit eeb1b73378b560e00ff1da2ef09fed9254f4e128 upstream. With the removal of the routing cache, we lost the option to tweak the garbage collector threshold along with the maximum routing cache size. So git commit 703fb94ec ("xfrm: Fix the gc threshold value for ipv4") moved back to a static threshold. It turned out that the current threshold before we start garbage collecting is much to small for some workloads, so increase it from 1024 to 32768. This means that we start the garbage collector if we have more than 32768 dst entries in the system and refuse new allocations if we are above 65536. Reported-by: Wolfgang Walter Signed-off-by: Steffen Klassert Cc: Stephen Hemminger Signed-off-by: Greg Kroah-Hartman --- net/ipv4/xfrm4_policy.c | 2 +- net/ipv6/xfrm6_policy.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 9a459be24af7..9b5b5ddf8cd4 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -235,7 +235,7 @@ static struct dst_ops xfrm4_dst_ops = { .destroy = xfrm4_dst_destroy, .ifdown = xfrm4_dst_ifdown, .local_out = __ip_local_out, - .gc_thresh = 1024, + .gc_thresh = 32768, }; static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 23ed03d786c8..1c2e0c9ba8a1 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -284,7 +284,7 @@ static struct dst_ops xfrm6_dst_ops = { .destroy = xfrm6_dst_destroy, .ifdown = xfrm6_dst_ifdown, .local_out = __ip6_local_out, - .gc_thresh = 1024, + .gc_thresh = 32768, }; static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { -- cgit v1.2.3 From ba310bc86f54169bf4ec414683bd61dcd2f8675f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= Date: Thu, 1 Aug 2013 10:04:14 +0200 Subject: ipv6: prevent fib6_run_gc() contention commit 2ac3ac8f86f2fe065d746d9a9abaca867adec577 upstream. On a high-traffic router with many processors and many IPv6 dst entries, soft lockup in fib6_run_gc() can occur when number of entries reaches gc_thresh. This happens because fib6_run_gc() uses fib6_gc_lock to allow only one thread to run the garbage collector but ip6_dst_gc() doesn't update net->ipv6.ip6_rt_last_gc until fib6_run_gc() returns. On a system with many entries, this can take some time so that in the meantime, other threads pass the tests in ip6_dst_gc() (ip6_rt_last_gc is still not updated) and wait for the lock. They then have to run the garbage collector one after another which blocks them for quite long. Resolve this by replacing special value ~0UL of expire parameter to fib6_run_gc() by explicit "force" parameter to choose between spin_lock_bh() and spin_trylock_bh() and call fib6_run_gc() with force=false if gc_thresh is reached but not max_size. Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller Cc: Konstantin Khlebnikov Signed-off-by: Greg Kroah-Hartman --- include/net/ip6_fib.h | 2 +- net/ipv6/ip6_fib.c | 19 ++++++++----------- net/ipv6/ndisc.c | 4 ++-- net/ipv6/route.c | 4 ++-- 4 files changed, 13 insertions(+), 16 deletions(-) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 665e0cee59bd..5e661a979694 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -301,7 +301,7 @@ extern void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info); extern void fib6_run_gc(unsigned long expires, - struct net *net); + struct net *net, bool force); extern void fib6_gc_cleanup(void); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index ceeb9458bb60..0b5e9086322d 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1648,19 +1648,16 @@ static int fib6_age(struct rt6_info *rt, void *arg) static DEFINE_SPINLOCK(fib6_gc_lock); -void fib6_run_gc(unsigned long expires, struct net *net) +void fib6_run_gc(unsigned long expires, struct net *net, bool force) { - if (expires != ~0UL) { + if (force) { spin_lock_bh(&fib6_gc_lock); - gc_args.timeout = expires ? (int)expires : - net->ipv6.sysctl.ip6_rt_gc_interval; - } else { - if (!spin_trylock_bh(&fib6_gc_lock)) { - mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ); - return; - } - gc_args.timeout = net->ipv6.sysctl.ip6_rt_gc_interval; + } else if (!spin_trylock_bh(&fib6_gc_lock)) { + mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ); + return; } + gc_args.timeout = expires ? (int)expires : + net->ipv6.sysctl.ip6_rt_gc_interval; gc_args.more = icmp6_dst_gc(); @@ -1677,7 +1674,7 @@ void fib6_run_gc(unsigned long expires, struct net *net) static void fib6_gc_timer_cb(unsigned long arg) { - fib6_run_gc(0, (struct net *)arg); + fib6_run_gc(0, (struct net *)arg, true); } static int __net_init fib6_net_init(struct net *net) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 05f361338c2e..deedf7ddbc6e 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1584,7 +1584,7 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&nd_tbl, dev); - fib6_run_gc(~0UL, net); + fib6_run_gc(0, net, false); idev = in6_dev_get(dev); if (!idev) break; @@ -1594,7 +1594,7 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, break; case NETDEV_DOWN: neigh_ifdown(&nd_tbl, dev); - fib6_run_gc(~0UL, net); + fib6_run_gc(0, net, false); break; case NETDEV_NOTIFY_PEERS: ndisc_send_unsol_na(dev); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d94d224f7e68..bd83c90f970c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1349,7 +1349,7 @@ static int ip6_dst_gc(struct dst_ops *ops) goto out; net->ipv6.ip6_rt_gc_expire++; - fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); + fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size); net->ipv6.ip6_rt_last_gc = now; entries = dst_entries_get_slow(ops); if (entries < ops->gc_thresh) @@ -2849,7 +2849,7 @@ int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, net = (struct net *)ctl->extra1; delay = net->ipv6.sysctl.flush_delay; proc_dointvec(ctl, write, buffer, lenp, ppos); - fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net); + fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0); return 0; } -- cgit v1.2.3 From c1e1eb159efadf28f0da0de550d696abdb30aefc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= Date: Thu, 1 Aug 2013 10:04:24 +0200 Subject: ipv6: update ip6_rt_last_gc every time GC is run commit 49a18d86f66d33a20144ecb5a34bba0d1856b260 upstream. As pointed out by Eric Dumazet, net->ipv6.ip6_rt_last_gc should hold the last time garbage collector was run so that we should update it whenever fib6_run_gc() calls fib6_clean_all(), not only if we got there from ip6_dst_gc(). Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller Cc: Konstantin Khlebnikov Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_fib.c | 6 +++++- net/ipv6/route.c | 4 +--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 0b5e9086322d..46458ee31939 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1650,6 +1650,8 @@ static DEFINE_SPINLOCK(fib6_gc_lock); void fib6_run_gc(unsigned long expires, struct net *net, bool force) { + unsigned long now; + if (force) { spin_lock_bh(&fib6_gc_lock); } else if (!spin_trylock_bh(&fib6_gc_lock)) { @@ -1662,10 +1664,12 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force) gc_args.more = icmp6_dst_gc(); fib6_clean_all(net, fib6_age, 0, NULL); + now = jiffies; + net->ipv6.ip6_rt_last_gc = now; if (gc_args.more) mod_timer(&net->ipv6.ip6_fib_timer, - round_jiffies(jiffies + round_jiffies(now + net->ipv6.sysctl.ip6_rt_gc_interval)); else del_timer(&net->ipv6.ip6_fib_timer); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index bd83c90f970c..6ebefd46f718 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1334,7 +1334,6 @@ static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg), static int ip6_dst_gc(struct dst_ops *ops) { - unsigned long now = jiffies; struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops); int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval; int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size; @@ -1344,13 +1343,12 @@ static int ip6_dst_gc(struct dst_ops *ops) int entries; entries = dst_entries_get_fast(ops); - if (time_after(rt_last_gc + rt_min_interval, now) && + if (time_after(rt_last_gc + rt_min_interval, jiffies) && entries <= rt_max_size) goto out; net->ipv6.ip6_rt_gc_expire++; fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size); - net->ipv6.ip6_rt_last_gc = now; entries = dst_entries_get_slow(ops); if (entries < ops->gc_thresh) net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; -- cgit v1.2.3 From b93e0acc5001b80153cb0c7efcdcc53cc56aac4a Mon Sep 17 00:00:00 2001 From: Jari Ruusu Date: Sat, 13 Jun 2015 19:01:31 +0300 Subject: d_walk() might skip too much When Al Viro's VFS deadlock fix "deal with deadlock in d_walk()" was backported to 3.10.y 3.4.y and 3.2.y stable kernel brances, the deadlock fix was copied to 3 different places. Later, a bug in that code was discovered. Al Viro's fix involved fixing only one part of code in mainline kernel. That fix is called "d_walk() might skip too much". 3.10.y 3.4.y and 3.2.y stable kernel brances need that later fix copied to 3 different places. Greg Kroah-Hartman included Al Viro's "d_walk() might skip too much" fix only once in 3.10.80 kernel, leaving 2 more places without a fix. The patch below was not written by me. I only applied Al Viro's "d_walk() might skip too much" fix 2 more times to 3.10.80 kernel, and cheched that the fixes went to correct places. With this patch applied, all 3 places that I am aware of 3.10.y stable branch are now fixed. Signed-off-by: Jari Ruusu Cc: Willy Tarreau Signed-off-by: Greg Kroah-Hartman --- fs/dcache.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index e2800926ae05..38c4a302fab4 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1053,13 +1053,13 @@ ascend: /* might go back up the wrong parent if we have had a rename. */ if (!locked && read_seqretry(&rename_lock, seq)) goto rename_retry; - next = child->d_child.next; - while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)) { + /* go into the first sibling still alive */ + do { + next = child->d_child.next; if (next == &this_parent->d_subdirs) goto ascend; child = list_entry(next, struct dentry, d_child); - next = next->next; - } + } while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)); rcu_read_unlock(); goto resume; } @@ -2977,13 +2977,13 @@ ascend: /* might go back up the wrong parent if we have had a rename. */ if (!locked && read_seqretry(&rename_lock, seq)) goto rename_retry; - next = child->d_child.next; - while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)) { + /* go into the first sibling still alive */ + do { + next = child->d_child.next; if (next == &this_parent->d_subdirs) goto ascend; child = list_entry(next, struct dentry, d_child); - next = next->next; - } + } while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)); rcu_read_unlock(); goto resume; } -- cgit v1.2.3 From 4cae4bbc1a8f5258a986801d72678752f9075a64 Mon Sep 17 00:00:00 2001 From: Sebastien Szymanski Date: Wed, 20 May 2015 16:30:37 +0200 Subject: ARM: clk-imx6q: refine sata's parent commit da946aeaeadcd24ff0cda9984c6fb8ed2bfd462a upstream. According to IMX6D/Q RM, table 18-3, sata clock's parent is ahb, not ipg. Signed-off-by: Sebastien Szymanski Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo [dirk.behme: Adjust moved file] Signed-off-by: Dirk Behme Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-imx/clk-imx6q.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-imx/clk-imx6q.c b/arch/arm/mach-imx/clk-imx6q.c index 2acaded8025d..ed00c9e3bfc6 100644 --- a/arch/arm/mach-imx/clk-imx6q.c +++ b/arch/arm/mach-imx/clk-imx6q.c @@ -515,7 +515,7 @@ int __init mx6q_clocks_init(void) clk[gpmi_io] = imx_clk_gate2("gpmi_io", "enfc", base + 0x78, 28); clk[gpmi_apb] = imx_clk_gate2("gpmi_apb", "usdhc3", base + 0x78, 30); clk[rom] = imx_clk_gate2("rom", "ahb", base + 0x7c, 0); - clk[sata] = imx_clk_gate2("sata", "ipg", base + 0x7c, 4); + clk[sata] = imx_clk_gate2("sata", "ahb", base + 0x7c, 4); clk[sdma] = imx_clk_gate2("sdma", "ahb", base + 0x7c, 6); clk[spba] = imx_clk_gate2("spba", "ipg", base + 0x7c, 12); clk[ssi1_ipg] = imx_clk_gate2("ssi1_ipg", "ipg", base + 0x7c, 18); -- cgit v1.2.3 From ea0d66be1c27f55825a80d01ad1ff72f6e005c29 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Thu, 11 Jun 2015 02:05:33 -0400 Subject: KVM: nSVM: Check for NRIPS support before updating control field commit f104765b4f81fd74d69e0eb161e89096deade2db upstream. If hardware doesn't support DecodeAssist - a feature that provides more information about the intercept in the VMCB, KVM decodes the instruction and then updates the next_rip vmcb control field. However, NRIP support itself depends on cpuid Fn8000_000A_EDX[NRIPS]. Since skip_emulated_instruction() doesn't verify nrip support before accepting control.next_rip as valid, avoid writing this field if support isn't present. Signed-off-by: Bandan Das Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 8bf40a243d75..224d2ef754cc 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -495,8 +495,10 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - if (svm->vmcb->control.next_rip != 0) + if (svm->vmcb->control.next_rip != 0) { + WARN_ON(!static_cpu_has(X86_FEATURE_NRIPS)); svm->next_rip = svm->vmcb->control.next_rip; + } if (!svm->next_rip) { if (emulate_instruction(vcpu, EMULTYPE_SKIP) != @@ -4229,7 +4231,9 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, break; } - vmcb->control.next_rip = info->next_rip; + /* TODO: Advertise NRIPS to guest hypervisor unconditionally */ + if (static_cpu_has(X86_FEATURE_NRIPS)) + vmcb->control.next_rip = info->next_rip; vmcb->control.exit_code = icpt_info.exit_code; vmexit = nested_svm_exit_handled(svm); -- cgit v1.2.3 From 2667677fb84b4b92e31573f2f2ffaec7a8772747 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Mon, 14 Apr 2014 15:47:01 +0200 Subject: bus: mvebu: pass the coherency availability information at init time commit 5686a1e5aa436c49187a60052d5885fb1f541ce6 upstream. Until now, the mvebu-mbus was guessing by itself whether hardware I/O coherency was available or not by poking into the Device Tree to see if the coherency fabric Device Tree node was present or not. However, on some upcoming SoCs, the presence or absence of the coherency fabric DT node isn't sufficient: in CONFIG_SMP, the coherency can be enabled, but not in !CONFIG_SMP. In order to clean this up, the mvebu_mbus_dt_init() function is extended to get a boolean argument telling whether coherency is enabled or not. Therefore, the logic to decide whether coherency is available or not now belongs to the core SoC code instead of the mvebu-mbus driver itself, which is much better. Signed-off-by: Thomas Petazzoni Link: https://lkml.kernel.org/r/1397483228-25625-4-git-send-email-thomas.petazzoni@free-electrons.com Signed-off-by: Jason Cooper [ Greg Ungerer: back ported to linux-3.10.y Back port necessary due to large code differences in affected files. This change in combination with commit e553554536 ("ARM: mvebu: disable I/O coherency on non-SMP situations on Armada 370/375/38x/XP") is critical to the hardware I/O coherency being set correctly by both the mbus driver and all peripheral hardware drivers. Without this change drivers will incorrectly enable I/O coherency window attributes and this causes rare unreliable system behavior including oops. ] Signed-off-by: Greg Ungerer Acked-by: Gregory CLEMENT Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-dove/common.c | 2 +- arch/arm/mach-kirkwood/common.c | 2 +- arch/arm/mach-mv78xx0/common.c | 4 ++-- arch/arm/mach-mvebu/armada-370-xp.c | 3 ++- arch/arm/mach-mvebu/coherency.c | 15 +++++++++++++++ arch/arm/mach-mvebu/coherency.h | 1 + arch/arm/mach-orion5x/common.c | 2 +- drivers/bus/mvebu-mbus.c | 5 ++--- include/linux/mbus.h | 2 +- 9 files changed, 26 insertions(+), 10 deletions(-) diff --git a/arch/arm/mach-dove/common.c b/arch/arm/mach-dove/common.c index e2b5da031f96..8d4f5dc56910 100644 --- a/arch/arm/mach-dove/common.c +++ b/arch/arm/mach-dove/common.c @@ -226,7 +226,7 @@ void __init dove_init_early(void) orion_time_set_base(TIMER_VIRT_BASE); mvebu_mbus_init("marvell,dove-mbus", BRIDGE_WINS_BASE, BRIDGE_WINS_SZ, - DOVE_MC_WINS_BASE, DOVE_MC_WINS_SZ); + DOVE_MC_WINS_BASE, DOVE_MC_WINS_SZ, 0); } static int __init dove_find_tclk(void) diff --git a/arch/arm/mach-kirkwood/common.c b/arch/arm/mach-kirkwood/common.c index f38922897563..4f6831ea88c5 100644 --- a/arch/arm/mach-kirkwood/common.c +++ b/arch/arm/mach-kirkwood/common.c @@ -530,7 +530,7 @@ void __init kirkwood_init_early(void) mvebu_mbus_init("marvell,kirkwood-mbus", BRIDGE_WINS_BASE, BRIDGE_WINS_SZ, - DDR_WINDOW_CPU_BASE, DDR_WINDOW_CPU_SZ); + DDR_WINDOW_CPU_BASE, DDR_WINDOW_CPU_SZ, 0); } int kirkwood_tclk; diff --git a/arch/arm/mach-mv78xx0/common.c b/arch/arm/mach-mv78xx0/common.c index 749a7f8c4992..4722c98dc1bb 100644 --- a/arch/arm/mach-mv78xx0/common.c +++ b/arch/arm/mach-mv78xx0/common.c @@ -337,11 +337,11 @@ void __init mv78xx0_init_early(void) if (mv78xx0_core_index() == 0) mvebu_mbus_init("marvell,mv78xx0-mbus", BRIDGE_WINS_CPU0_BASE, BRIDGE_WINS_SZ, - DDR_WINDOW_CPU0_BASE, DDR_WINDOW_CPU_SZ); + DDR_WINDOW_CPU0_BASE, DDR_WINDOW_CPU_SZ, 0); else mvebu_mbus_init("marvell,mv78xx0-mbus", BRIDGE_WINS_CPU1_BASE, BRIDGE_WINS_SZ, - DDR_WINDOW_CPU1_BASE, DDR_WINDOW_CPU_SZ); + DDR_WINDOW_CPU1_BASE, DDR_WINDOW_CPU_SZ, 0); } void __init_refok mv78xx0_timer_init(void) diff --git a/arch/arm/mach-mvebu/armada-370-xp.c b/arch/arm/mach-mvebu/armada-370-xp.c index 1c48890bb72b..4377c3484a62 100644 --- a/arch/arm/mach-mvebu/armada-370-xp.c +++ b/arch/arm/mach-mvebu/armada-370-xp.c @@ -66,7 +66,8 @@ void __init armada_370_xp_init_early(void) ARMADA_370_XP_MBUS_WINS_BASE, ARMADA_370_XP_MBUS_WINS_SIZE, ARMADA_370_XP_SDRAM_WINS_BASE, - ARMADA_370_XP_SDRAM_WINS_SIZE); + ARMADA_370_XP_SDRAM_WINS_SIZE, + coherency_available()); #ifdef CONFIG_CACHE_L2X0 l2x0_of_init(0, ~0UL); diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c index 3ee701f1d38e..ea26ebb5bb5a 100644 --- a/arch/arm/mach-mvebu/coherency.c +++ b/arch/arm/mach-mvebu/coherency.c @@ -137,6 +137,20 @@ static struct notifier_block mvebu_hwcc_platform_nb = { .notifier_call = mvebu_hwcc_platform_notifier, }; +/* + * Keep track of whether we have IO hardware coherency enabled or not. + * On Armada 370's we will not be using it for example. We need to make + * that available [through coherency_available()] so the mbus controller + * doesn't enable the IO coherency bit in the attribute bits of the + * chip selects. + */ +static int coherency_enabled; + +int coherency_available(void) +{ + return coherency_enabled; +} + int __init coherency_init(void) { struct device_node *np; @@ -170,6 +184,7 @@ int __init coherency_init(void) coherency_base = of_iomap(np, 0); coherency_cpu_base = of_iomap(np, 1); set_cpu_coherent(cpu_logical_map(smp_processor_id()), 0); + coherency_enabled = 1; bus_register_notifier(&platform_bus_type, &mvebu_hwcc_platform_nb); } diff --git a/arch/arm/mach-mvebu/coherency.h b/arch/arm/mach-mvebu/coherency.h index 2f428137f6fe..1501a4e5eea0 100644 --- a/arch/arm/mach-mvebu/coherency.h +++ b/arch/arm/mach-mvebu/coherency.h @@ -19,6 +19,7 @@ int coherency_get_cpu_count(void); #endif int set_cpu_coherent(int cpu_id, int smp_group_id); +int coherency_available(void); int coherency_init(void); #endif /* __MACH_370_XP_COHERENCY_H */ diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c index f8a6db9239bf..048773926ad4 100644 --- a/arch/arm/mach-orion5x/common.c +++ b/arch/arm/mach-orion5x/common.c @@ -213,7 +213,7 @@ void __init orion5x_init_early(void) mbus_soc_name = NULL; mvebu_mbus_init(mbus_soc_name, ORION5X_BRIDGE_WINS_BASE, ORION5X_BRIDGE_WINS_SZ, - ORION5X_DDR_WINS_BASE, ORION5X_DDR_WINS_SZ); + ORION5X_DDR_WINS_BASE, ORION5X_DDR_WINS_SZ, 0); } void orion5x_setup_wins(void) diff --git a/drivers/bus/mvebu-mbus.c b/drivers/bus/mvebu-mbus.c index 711dcf4a0313..7c437826c2f9 100644 --- a/drivers/bus/mvebu-mbus.c +++ b/drivers/bus/mvebu-mbus.c @@ -838,7 +838,7 @@ fs_initcall(mvebu_mbus_debugfs_init); int __init mvebu_mbus_init(const char *soc, phys_addr_t mbuswins_phys_base, size_t mbuswins_size, phys_addr_t sdramwins_phys_base, - size_t sdramwins_size) + size_t sdramwins_size, int is_coherent) { struct mvebu_mbus_state *mbus = &mbus_state; const struct of_device_id *of_id; @@ -865,8 +865,7 @@ int __init mvebu_mbus_init(const char *soc, phys_addr_t mbuswins_phys_base, return -ENOMEM; } - if (of_find_compatible_node(NULL, NULL, "marvell,coherency-fabric")) - mbus->hw_io_coherency = 1; + mbus->hw_io_coherency = is_coherent; for (win = 0; win < mbus->soc->num_wins; win++) mvebu_mbus_disable_window(mbus, win); diff --git a/include/linux/mbus.h b/include/linux/mbus.h index dba482e31a13..e80b9c7ec8da 100644 --- a/include/linux/mbus.h +++ b/include/linux/mbus.h @@ -67,6 +67,6 @@ int mvebu_mbus_add_window(const char *devname, phys_addr_t base, int mvebu_mbus_del_window(phys_addr_t base, size_t size); int mvebu_mbus_init(const char *soc, phys_addr_t mbus_phys_base, size_t mbus_size, phys_addr_t sdram_phys_base, - size_t sdram_size); + size_t sdram_size, int is_coherent); #endif /* __LINUX_MBUS_H */ -- cgit v1.2.3 From c48465423908a8955cd6de09b871161324cf5205 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 3 Jul 2015 19:48:19 -0700 Subject: Linux 3.10.83 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5e3e665a10b7..21529dbcc11d 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 10 -SUBLEVEL = 82 +SUBLEVEL = 83 EXTRAVERSION = NAME = TOSSUG Baby Fish -- cgit v1.2.3