From bd23a539d0733c9f9ec3f9fc628491fad2658e82 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 23 Mar 2011 09:56:30 -0400 Subject: fix leaks in path_lookupat() Signed-off-by: Al Viro --- fs/namei.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 5a9a6c3094d..a4dfac650c3 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1644,13 +1644,16 @@ static int path_lookupat(int dfd, const char *name, err = -ECHILD; } - if (!err) + if (!err) { err = handle_reval_path(nd); + if (err) + path_put(&nd->path); + } if (!err && nd->flags & LOOKUP_DIRECTORY) { if (!nd->inode->i_op->lookup) { path_put(&nd->path); - return -ENOTDIR; + err = -ENOTDIR; } } -- cgit v1.2.3 From 26ec3c646e75ce7a69fda429d68fcbdcd5eacc62 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 15 Feb 2011 21:24:05 -0500 Subject: make sessionid permissions in /proc/*/task/* match those in /proc/* Signed-off-by: Al Viro --- fs/proc/base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index d49c4b5d2c3..b77236de6d8 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3161,7 +3161,7 @@ static const struct pid_entry tid_base_stuff[] = { REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), - REG("sessionid", S_IRUSR, proc_sessionid_operations), + REG("sessionid", S_IRUGO, proc_sessionid_operations), #endif #ifdef CONFIG_FAULT_INJECTION REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), -- cgit v1.2.3 From ca6b0bf0e086513b9ee5efc0aa5770ecb57778af Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 15 Feb 2011 22:04:37 -0500 Subject: pagemap: close races with suid execve just use mm_for_maps() Signed-off-by: Al Viro --- fs/proc/base.c | 4 ++-- fs/proc/task_mmu.c | 7 ++----- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index b77236de6d8..df73573e12c 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2797,7 +2797,7 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_PROC_PAGE_MONITOR REG("clear_refs", S_IWUSR, proc_clear_refs_operations), REG("smaps", S_IRUGO, proc_smaps_operations), - REG("pagemap", S_IRUSR, proc_pagemap_operations), + REG("pagemap", S_IRUGO, proc_pagemap_operations), #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), @@ -3133,7 +3133,7 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_PROC_PAGE_MONITOR REG("clear_refs", S_IWUSR, proc_clear_refs_operations), REG("smaps", S_IRUGO, proc_smaps_operations), - REG("pagemap", S_IRUSR, proc_pagemap_operations), + REG("pagemap", S_IRUGO, proc_pagemap_operations), #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 60b914860f8..c966413c139 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -729,7 +729,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, goto out; ret = -EACCES; - if (!ptrace_may_access(task, PTRACE_MODE_READ)) + mm = mm_for_maps(task); + if (!mm) goto out_task; ret = -EINVAL; @@ -742,10 +743,6 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, if (!count) goto out_task; - mm = get_task_mm(task); - if (!mm) - goto out_task; - pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); ret = -ENOMEM; -- cgit v1.2.3 From ec6fd8a4355cda81cd9f06bebc048e83eb514ac7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 15 Feb 2011 22:22:54 -0500 Subject: report errors in /proc/*/*map* sanely Signed-off-by: Al Viro --- fs/proc/base.c | 8 +++++--- fs/proc/task_mmu.c | 10 +++++----- fs/proc/task_nommu.c | 6 +++--- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index df73573e12c..c2828110208 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -225,15 +225,17 @@ static int check_mem_permission(struct task_struct *task) struct mm_struct *mm_for_maps(struct task_struct *task) { struct mm_struct *mm; + int err; - if (mutex_lock_killable(&task->signal->cred_guard_mutex)) - return NULL; + err = mutex_lock_killable(&task->signal->cred_guard_mutex); + if (err) + return ERR_PTR(err); mm = get_task_mm(task); if (mm && mm != current->mm && !ptrace_may_access(task, PTRACE_MODE_READ)) { mmput(mm); - mm = NULL; + mm = ERR_PTR(-EACCES); } mutex_unlock(&task->signal->cred_guard_mutex); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index c966413c139..8fed0f88fbf 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -119,11 +119,11 @@ static void *m_start(struct seq_file *m, loff_t *pos) priv->task = get_pid_task(priv->pid, PIDTYPE_PID); if (!priv->task) - return NULL; + return ERR_PTR(-ESRCH); mm = mm_for_maps(priv->task); - if (!mm) - return NULL; + if (!mm || IS_ERR(mm)) + return mm; down_read(&mm->mmap_sem); tail_vma = get_gate_vma(priv->task); @@ -728,9 +728,9 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, if (!task) goto out; - ret = -EACCES; mm = mm_for_maps(task); - if (!mm) + ret = PTR_ERR(mm); + if (!mm || IS_ERR(mm)) goto out_task; ret = -EINVAL; diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index b535d3e5d5f..980de547c07 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -199,13 +199,13 @@ static void *m_start(struct seq_file *m, loff_t *pos) /* pin the task and mm whilst we play with them */ priv->task = get_pid_task(priv->pid, PIDTYPE_PID); if (!priv->task) - return NULL; + return ERR_PTR(-ESRCH); mm = mm_for_maps(priv->task); - if (!mm) { + if (!mm || IS_ERR(mm)) { put_task_struct(priv->task); priv->task = NULL; - return NULL; + return mm; } down_read(&mm->mmap_sem); -- cgit v1.2.3 From d6f64b89d7ff22ce05896ab4a93a653e8d0b123d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 15 Feb 2011 22:26:01 -0500 Subject: close race in /proc/*/environ Switch to mm_for_maps(). Maybe we ought to make it r--r--r--, since we do checks on IO anyway... Signed-off-by: Al Viro --- fs/proc/base.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index c2828110208..fc471b8766d 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -919,20 +919,18 @@ static ssize_t environ_read(struct file *file, char __user *buf, if (!task) goto out_no_task; - if (!ptrace_may_access(task, PTRACE_MODE_READ)) - goto out; - ret = -ENOMEM; page = (char *)__get_free_page(GFP_TEMPORARY); if (!page) goto out; - ret = 0; - mm = get_task_mm(task); - if (!mm) + mm = mm_for_maps(task); + ret = PTR_ERR(mm); + if (!mm || IS_ERR(mm)) goto out_free; + ret = 0; while (count > 0) { int this_len, retval, max_len; -- cgit v1.2.3 From 2fadaef41283aad7100fa73f01998cddaca25833 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 15 Feb 2011 22:52:11 -0500 Subject: auxv: require the target to be tracable (or yourself) same as for environ, except that we didn't do any checks to prevent access after suid execve Signed-off-by: Al Viro --- fs/proc/base.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index fc471b8766d..e94b58b496f 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -281,9 +281,9 @@ out: static int proc_pid_auxv(struct task_struct *task, char *buffer) { - int res = 0; - struct mm_struct *mm = get_task_mm(task); - if (mm) { + struct mm_struct *mm = mm_for_maps(task); + int res = PTR_ERR(mm); + if (mm && !IS_ERR(mm)) { unsigned int nwords = 0; do { nwords += 2; -- cgit v1.2.3 From c2ef45df3b98a027ec8f9081bd2a19dff520ef9d Mon Sep 17 00:00:00 2001 From: Stephen Wilson Date: Sun, 13 Mar 2011 15:49:13 -0400 Subject: x86: add context tag to mark mm when running a task in 32-bit compatibility mode This tag is intended to mirror the thread info TIF_IA32 flag. Will be used to identify mm's which support 32 bit tasks running in compatibility mode without requiring a reference to the task itself. Signed-off-by: Stephen Wilson Reviewed-by: Michel Lespinasse Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Al Viro --- arch/x86/include/asm/mmu.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index 80a1dee5bea..aeff3e89b22 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -13,6 +13,12 @@ typedef struct { int size; struct mutex lock; void *vdso; + +#ifdef CONFIG_X86_64 + /* True if mm supports a task running in 32 bit compatibility mode. */ + unsigned short ia32_compat; +#endif + } mm_context_t; #ifdef CONFIG_SMP -- cgit v1.2.3 From 375906f8765e131a4a159b1ffebf78c15db7b3bf Mon Sep 17 00:00:00 2001 From: Stephen Wilson Date: Sun, 13 Mar 2011 15:49:14 -0400 Subject: x86: mark associated mm when running a task in 32 bit compatibility mode This patch simply follows the same practice as for setting the TIF_IA32 flag. In particular, an mm is marked as holding 32-bit tasks when a 32-bit binary is exec'ed. Both ELF and a.out formats are updated. Signed-off-by: Stephen Wilson Reviewed-by: Michel Lespinasse Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Al Viro --- arch/x86/ia32/ia32_aout.c | 1 + arch/x86/kernel/process_64.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 2d93bdbc9ac..fd843877e84 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -298,6 +298,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) /* OK, This is the point of no return */ set_personality(PER_LINUX); set_thread_flag(TIF_IA32); + current->mm->context.ia32_compat = 1; setup_new_exec(bprm); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index bd387e8f73b..6c9dd922ac0 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -501,6 +501,10 @@ void set_personality_64bit(void) /* Make sure to be in 64bit mode */ clear_thread_flag(TIF_IA32); + /* Ensure the corresponding mm is not marked. */ + if (current->mm) + current->mm->context.ia32_compat = 0; + /* TBD: overwrites user setup. Should have two bits. But 64bit processes have always behaved this way, so it's not too bad. The main problem is just that @@ -516,6 +520,10 @@ void set_personality_ia32(void) set_thread_flag(TIF_IA32); current->personality |= force_personality32; + /* Mark the associated mm as containing 32-bit tasks. */ + if (current->mm) + current->mm->context.ia32_compat = 1; + /* Prepare the first "return" to user space */ current_thread_info()->status |= TS_COMPAT; } -- cgit v1.2.3 From 31db58b3ab432f72ea76be58b12e6ffaf627d5db Mon Sep 17 00:00:00 2001 From: Stephen Wilson Date: Sun, 13 Mar 2011 15:49:15 -0400 Subject: mm: arch: make get_gate_vma take an mm_struct instead of a task_struct Morally, the presence of a gate vma is more an attribute of a particular mm than a particular task. Moreover, dropping the dependency on task_struct will help make both existing and future operations on mm's more flexible and convenient. Signed-off-by: Stephen Wilson Reviewed-by: Michel Lespinasse Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Al Viro --- arch/powerpc/kernel/vdso.c | 2 +- arch/s390/kernel/vdso.c | 2 +- arch/sh/kernel/vsyscall/vsyscall.c | 2 +- arch/x86/mm/init_64.c | 6 +++--- arch/x86/vdso/vdso32-setup.c | 11 ++++++----- fs/binfmt_elf.c | 2 +- fs/proc/task_mmu.c | 8 +++++--- include/linux/mm.h | 2 +- mm/memory.c | 4 ++-- mm/mlock.c | 4 ++-- 10 files changed, 23 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index fd8728729ab..6169f175693 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -830,7 +830,7 @@ int in_gate_area(struct task_struct *task, unsigned long addr) return 0; } -struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { return NULL; } diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index f438d74dedb..d19f30504c6 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -347,7 +347,7 @@ int in_gate_area(struct task_struct *task, unsigned long addr) return 0; } -struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { return NULL; } diff --git a/arch/sh/kernel/vsyscall/vsyscall.c b/arch/sh/kernel/vsyscall/vsyscall.c index 242117cbad6..3f9b6f41813 100644 --- a/arch/sh/kernel/vsyscall/vsyscall.c +++ b/arch/sh/kernel/vsyscall/vsyscall.c @@ -94,7 +94,7 @@ const char *arch_vma_name(struct vm_area_struct *vma) return NULL; } -struct vm_area_struct *get_gate_vma(struct task_struct *task) +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { return NULL; } diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 0aa34669ed3..dd4809b5844 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -861,10 +861,10 @@ static struct vm_area_struct gate_vma = { .vm_flags = VM_READ | VM_EXEC }; -struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { #ifdef CONFIG_IA32_EMULATION - if (test_tsk_thread_flag(tsk, TIF_IA32)) + if (!mm || mm->context.ia32_compat) return NULL; #endif return &gate_vma; @@ -872,7 +872,7 @@ struct vm_area_struct *get_gate_vma(struct task_struct *tsk) int in_gate_area(struct task_struct *task, unsigned long addr) { - struct vm_area_struct *vma = get_gate_vma(task); + struct vm_area_struct *vma = get_gate_vma(task->mm); if (!vma) return 0; diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 36df991985b..1f651f6bdf6 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -417,11 +417,12 @@ const char *arch_vma_name(struct vm_area_struct *vma) return NULL; } -struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { - struct mm_struct *mm = tsk->mm; - - /* Check to see if this task was created in compat vdso mode */ + /* + * Check to see if the corresponding task was created in compat vdso + * mode. + */ if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE) return &gate_vma; return NULL; @@ -429,7 +430,7 @@ struct vm_area_struct *get_gate_vma(struct task_struct *tsk) int in_gate_area(struct task_struct *task, unsigned long addr) { - const struct vm_area_struct *vma = get_gate_vma(task); + const struct vm_area_struct *vma = get_gate_vma(task->mm); return vma && addr >= vma->vm_start && addr < vma->vm_end; } diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index d5b640ba6cb..bbabdcce117 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1906,7 +1906,7 @@ static int elf_core_dump(struct coredump_params *cprm) segs = current->mm->map_count; segs += elf_core_extra_phdrs(); - gate_vma = get_gate_vma(current); + gate_vma = get_gate_vma(current->mm); if (gate_vma != NULL) segs++; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 8fed0f88fbf..e73314afc53 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -126,7 +126,7 @@ static void *m_start(struct seq_file *m, loff_t *pos) return mm; down_read(&mm->mmap_sem); - tail_vma = get_gate_vma(priv->task); + tail_vma = get_gate_vma(priv->task->mm); priv->tail_vma = tail_vma; /* Start with last addr hint */ @@ -277,7 +277,8 @@ static int show_map(struct seq_file *m, void *v) show_map_vma(m, vma); if (m->count < m->size) /* vma is copied successfully */ - m->version = (vma != get_gate_vma(task))? vma->vm_start: 0; + m->version = (vma != get_gate_vma(task->mm)) + ? vma->vm_start : 0; return 0; } @@ -436,7 +437,8 @@ static int show_smap(struct seq_file *m, void *v) (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0); if (m->count < m->size) /* vma is copied successfully */ - m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0; + m->version = (vma != get_gate_vma(task->mm)) + ? vma->vm_start : 0; return 0; } diff --git a/include/linux/mm.h b/include/linux/mm.h index 581703d86fb..18b4a6358ab 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1578,7 +1578,7 @@ static inline bool kernel_page_present(struct page *page) { return true; } #endif /* CONFIG_HIBERNATION */ #endif -extern struct vm_area_struct *get_gate_vma(struct task_struct *tsk); +extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm); #ifdef __HAVE_ARCH_GATE_AREA int in_gate_area_no_task(unsigned long addr); int in_gate_area(struct task_struct *task, unsigned long addr); diff --git a/mm/memory.c b/mm/memory.c index e48945ab362..b6dc3709743 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1488,7 +1488,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, vma = find_extend_vma(mm, start); if (!vma && in_gate_area(tsk, start)) { unsigned long pg = start & PAGE_MASK; - struct vm_area_struct *gate_vma = get_gate_vma(tsk); + struct vm_area_struct *gate_vma = get_gate_vma(tsk->mm); pgd_t *pgd; pud_t *pud; pmd_t *pmd; @@ -3496,7 +3496,7 @@ static int __init gate_vma_init(void) __initcall(gate_vma_init); #endif -struct vm_area_struct *get_gate_vma(struct task_struct *tsk) +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) { #ifdef AT_SYSINFO_EHDR return &gate_vma; diff --git a/mm/mlock.c b/mm/mlock.c index c3924c7f00b..2689a08c79a 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -237,7 +237,7 @@ long mlock_vma_pages_range(struct vm_area_struct *vma, if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) || is_vm_hugetlb_page(vma) || - vma == get_gate_vma(current))) { + vma == get_gate_vma(current->mm))) { __mlock_vma_pages_range(vma, start, end, NULL); @@ -332,7 +332,7 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, int lock = newflags & VM_LOCKED; if (newflags == vma->vm_flags || (vma->vm_flags & VM_SPECIAL) || - is_vm_hugetlb_page(vma) || vma == get_gate_vma(current)) + is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm)) goto out; /* don't set VM_LOCKED, don't count */ pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); -- cgit v1.2.3 From 83b964bbf82eb13a8f31bb49ca420787fe01f7a6 Mon Sep 17 00:00:00 2001 From: Stephen Wilson Date: Sun, 13 Mar 2011 15:49:16 -0400 Subject: mm: arch: make in_gate_area take an mm_struct instead of a task_struct Morally, the question of whether an address lies in a gate vma should be asked with respect to an mm, not a particular task. Moreover, dropping the dependency on task_struct will help make existing and future operations on mm's more flexible and convenient. Signed-off-by: Stephen Wilson Reviewed-by: Michel Lespinasse Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Al Viro --- arch/powerpc/kernel/vdso.c | 2 +- arch/s390/kernel/vdso.c | 2 +- arch/sh/kernel/vsyscall/vsyscall.c | 2 +- arch/x86/mm/init_64.c | 4 ++-- arch/x86/vdso/vdso32-setup.c | 4 ++-- include/linux/mm.h | 4 ++-- mm/memory.c | 2 +- 7 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 6169f175693..467aa9ecbf9 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -825,7 +825,7 @@ int in_gate_area_no_task(unsigned long addr) return 0; } -int in_gate_area(struct task_struct *task, unsigned long addr) +int in_gate_area(struct mm_struct *mm, unsigned long addr) { return 0; } diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index d19f30504c6..9006e966ef0 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -342,7 +342,7 @@ int in_gate_area_no_task(unsigned long addr) return 0; } -int in_gate_area(struct task_struct *task, unsigned long addr) +int in_gate_area(struct mm_struct *mm, unsigned long addr) { return 0; } diff --git a/arch/sh/kernel/vsyscall/vsyscall.c b/arch/sh/kernel/vsyscall/vsyscall.c index 3f9b6f41813..62c36a8961d 100644 --- a/arch/sh/kernel/vsyscall/vsyscall.c +++ b/arch/sh/kernel/vsyscall/vsyscall.c @@ -99,7 +99,7 @@ struct vm_area_struct *get_gate_vma(struct mm_struct *mm) return NULL; } -int in_gate_area(struct task_struct *task, unsigned long address) +int in_gate_area(struct mm_struct *mm, unsigned long address) { return 0; } diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index dd4809b5844..43c441622c8 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -870,9 +870,9 @@ struct vm_area_struct *get_gate_vma(struct mm_struct *mm) return &gate_vma; } -int in_gate_area(struct task_struct *task, unsigned long addr) +int in_gate_area(struct mm_struct *mm, unsigned long addr) { - struct vm_area_struct *vma = get_gate_vma(task->mm); + struct vm_area_struct *vma = get_gate_vma(mm); if (!vma) return 0; diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 1f651f6bdf6..f849bb29fda 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -428,9 +428,9 @@ struct vm_area_struct *get_gate_vma(struct mm_struct *mm) return NULL; } -int in_gate_area(struct task_struct *task, unsigned long addr) +int in_gate_area(struct mm_struct *mm, unsigned long addr) { - const struct vm_area_struct *vma = get_gate_vma(task->mm); + const struct vm_area_struct *vma = get_gate_vma(mm); return vma && addr >= vma->vm_start && addr < vma->vm_end; } diff --git a/include/linux/mm.h b/include/linux/mm.h index 18b4a6358ab..5c6d916cd30 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1581,10 +1581,10 @@ static inline bool kernel_page_present(struct page *page) { return true; } extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm); #ifdef __HAVE_ARCH_GATE_AREA int in_gate_area_no_task(unsigned long addr); -int in_gate_area(struct task_struct *task, unsigned long addr); +int in_gate_area(struct mm_struct *mm, unsigned long addr); #else int in_gate_area_no_task(unsigned long addr); -#define in_gate_area(task, addr) ({(void)task; in_gate_area_no_task(addr);}) +#define in_gate_area(mm, addr) ({(void)mm; in_gate_area_no_task(addr);}) #endif /* __HAVE_ARCH_GATE_AREA */ int drop_caches_sysctl_handler(struct ctl_table *, int, diff --git a/mm/memory.c b/mm/memory.c index b6dc3709743..931d479b80c 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1486,7 +1486,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, struct vm_area_struct *vma; vma = find_extend_vma(mm, start); - if (!vma && in_gate_area(tsk, start)) { + if (!vma && in_gate_area(tsk->mm, start)) { unsigned long pg = start & PAGE_MASK; struct vm_area_struct *gate_vma = get_gate_vma(tsk->mm); pgd_t *pgd; -- cgit v1.2.3 From cae5d39032acf26c265f6b1dc73d7ce6ff4bc387 Mon Sep 17 00:00:00 2001 From: Stephen Wilson Date: Sun, 13 Mar 2011 15:49:17 -0400 Subject: mm: arch: rename in_gate_area_no_task to in_gate_area_no_mm Now that gate vma's are referenced with respect to a particular mm and not a particular task it only makes sense to propagate the change to this predicate as well. Signed-off-by: Stephen Wilson Reviewed-by: Michel Lespinasse Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Al Viro --- arch/powerpc/kernel/vdso.c | 2 +- arch/s390/kernel/vdso.c | 2 +- arch/sh/kernel/vsyscall/vsyscall.c | 2 +- arch/x86/mm/init_64.c | 8 ++++---- arch/x86/vdso/vdso32-setup.c | 2 +- include/linux/mm.h | 6 +++--- kernel/kallsyms.c | 4 ++-- mm/memory.c | 2 +- mm/nommu.c | 2 +- 9 files changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 467aa9ecbf9..142ab1008c3 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -820,7 +820,7 @@ static int __init vdso_init(void) } arch_initcall(vdso_init); -int in_gate_area_no_task(unsigned long addr) +int in_gate_area_no_mm(unsigned long addr) { return 0; } diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 9006e966ef0..d73630b4fe1 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -337,7 +337,7 @@ static int __init vdso_init(void) } arch_initcall(vdso_init); -int in_gate_area_no_task(unsigned long addr) +int in_gate_area_no_mm(unsigned long addr) { return 0; } diff --git a/arch/sh/kernel/vsyscall/vsyscall.c b/arch/sh/kernel/vsyscall/vsyscall.c index 62c36a8961d..1d6d51a1ce7 100644 --- a/arch/sh/kernel/vsyscall/vsyscall.c +++ b/arch/sh/kernel/vsyscall/vsyscall.c @@ -104,7 +104,7 @@ int in_gate_area(struct mm_struct *mm, unsigned long address) return 0; } -int in_gate_area_no_task(unsigned long address) +int in_gate_area_no_mm(unsigned long address) { return 0; } diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 43c441622c8..835393c8554 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -881,11 +881,11 @@ int in_gate_area(struct mm_struct *mm, unsigned long addr) } /* - * Use this when you have no reliable task/vma, typically from interrupt - * context. It is less reliable than using the task's vma and may give - * false positives: + * Use this when you have no reliable mm, typically from interrupt + * context. It is less reliable than using a task's mm and may give + * false positives. */ -int in_gate_area_no_task(unsigned long addr) +int in_gate_area_no_mm(unsigned long addr) { return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END); } diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index f849bb29fda..468d591dde3 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c @@ -435,7 +435,7 @@ int in_gate_area(struct mm_struct *mm, unsigned long addr) return vma && addr >= vma->vm_start && addr < vma->vm_end; } -int in_gate_area_no_task(unsigned long addr) +int in_gate_area_no_mm(unsigned long addr) { return 0; } diff --git a/include/linux/mm.h b/include/linux/mm.h index 5c6d916cd30..9d6efefdde5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1580,11 +1580,11 @@ static inline bool kernel_page_present(struct page *page) { return true; } extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm); #ifdef __HAVE_ARCH_GATE_AREA -int in_gate_area_no_task(unsigned long addr); +int in_gate_area_no_mm(unsigned long addr); int in_gate_area(struct mm_struct *mm, unsigned long addr); #else -int in_gate_area_no_task(unsigned long addr); -#define in_gate_area(mm, addr) ({(void)mm; in_gate_area_no_task(addr);}) +int in_gate_area_no_mm(unsigned long addr); +#define in_gate_area(mm, addr) ({(void)mm; in_gate_area_no_mm(addr);}) #endif /* __HAVE_ARCH_GATE_AREA */ int drop_caches_sysctl_handler(struct ctl_table *, int, diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 6f6d091b575..b9d0fd1d21c 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -64,14 +64,14 @@ static inline int is_kernel_text(unsigned long addr) if ((addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) || arch_is_kernel_text(addr)) return 1; - return in_gate_area_no_task(addr); + return in_gate_area_no_mm(addr); } static inline int is_kernel(unsigned long addr) { if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end) return 1; - return in_gate_area_no_task(addr); + return in_gate_area_no_mm(addr); } static int is_ksym_addr(unsigned long addr) diff --git a/mm/memory.c b/mm/memory.c index 931d479b80c..5f5b5de5a40 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3505,7 +3505,7 @@ struct vm_area_struct *get_gate_vma(struct mm_struct *mm) #endif } -int in_gate_area_no_task(unsigned long addr) +int in_gate_area_no_mm(unsigned long addr) { #ifdef AT_SYSINFO_EHDR if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END)) diff --git a/mm/nommu.c b/mm/nommu.c index f59e1424d3d..e629143f944 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1963,7 +1963,7 @@ error: return -ENOMEM; } -int in_gate_area_no_task(unsigned long addr) +int in_gate_area_no_mm(unsigned long addr) { return 0; } -- cgit v1.2.3 From e7f22e207bacdba5b73f2893a3abe935a5373e2e Mon Sep 17 00:00:00 2001 From: Stephen Wilson Date: Sun, 13 Mar 2011 15:49:18 -0400 Subject: mm: use mm_struct to resolve gate vma's in __get_user_pages We now check if a requested user page overlaps a gate vma using the supplied mm instead of the supplied task. The given task is now used solely for accounting purposes and may be NULL. Signed-off-by: Stephen Wilson Signed-off-by: Al Viro --- mm/memory.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 5f5b5de5a40..5f585b65d73 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1486,9 +1486,9 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, struct vm_area_struct *vma; vma = find_extend_vma(mm, start); - if (!vma && in_gate_area(tsk->mm, start)) { + if (!vma && in_gate_area(mm, start)) { unsigned long pg = start & PAGE_MASK; - struct vm_area_struct *gate_vma = get_gate_vma(tsk->mm); + struct vm_area_struct *gate_vma = get_gate_vma(mm); pgd_t *pgd; pud_t *pud; pmd_t *pmd; @@ -1589,10 +1589,13 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, return i ? i : -EFAULT; BUG(); } - if (ret & VM_FAULT_MAJOR) - tsk->maj_flt++; - else - tsk->min_flt++; + + if (tsk) { + if (ret & VM_FAULT_MAJOR) + tsk->maj_flt++; + else + tsk->min_flt++; + } if (ret & VM_FAULT_RETRY) { *nonblocking = 0; @@ -1638,7 +1641,8 @@ EXPORT_SYMBOL(__get_user_pages); /** * get_user_pages() - pin user pages in memory - * @tsk: task_struct of target task + * @tsk: the task_struct to use for page fault accounting, or + * NULL if faults are not to be recorded. * @mm: mm_struct of target mm * @start: starting user address * @nr_pages: number of pages from start to pin -- cgit v1.2.3 From 206cb636576b969e9b471cdedeaea7752e6acb33 Mon Sep 17 00:00:00 2001 From: Stephen Wilson Date: Sun, 13 Mar 2011 15:49:19 -0400 Subject: mm: factor out main logic of access_process_vm Introduce an internal helper __access_remote_vm and base access_process_vm on top of it. This new method may be called with a NULL task_struct if page fault accounting is not desired. This code will be shared with a new address space accessor that is independent of task_struct. Signed-off-by: Stephen Wilson Signed-off-by: Al Viro --- mm/memory.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 5f585b65d73..820b4c4810f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3650,20 +3650,15 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, #endif /* - * Access another process' address space. - * Source/target buffer must be kernel space, - * Do not walk the page table directly, use get_user_pages + * Access another process' address space as given in mm. If non-NULL, use the + * given task for page fault accounting. */ -int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write) +static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, + unsigned long addr, void *buf, int len, int write) { - struct mm_struct *mm; struct vm_area_struct *vma; void *old_buf = buf; - mm = get_task_mm(tsk); - if (!mm) - return 0; - down_read(&mm->mmap_sem); /* ignore errors, just check how much was successfully transferred */ while (len) { @@ -3712,11 +3707,31 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in addr += bytes; } up_read(&mm->mmap_sem); - mmput(mm); return buf - old_buf; } +/* + * Access another process' address space. + * Source/target buffer must be kernel space, + * Do not walk the page table directly, use get_user_pages + */ +int access_process_vm(struct task_struct *tsk, unsigned long addr, + void *buf, int len, int write) +{ + struct mm_struct *mm; + int ret; + + mm = get_task_mm(tsk); + if (!mm) + return 0; + + ret = __access_remote_vm(tsk, mm, addr, buf, len, write); + mmput(mm); + + return ret; +} + /* * Print the name of a VMA. */ -- cgit v1.2.3 From 5ddd36b9c59887c6416e21daf984fbdd9b1818df Mon Sep 17 00:00:00 2001 From: Stephen Wilson Date: Sun, 13 Mar 2011 15:49:20 -0400 Subject: mm: implement access_remote_vm Provide an alternative to access_process_vm that allows the caller to obtain a reference to the supplied mm_struct. Signed-off-by: Stephen Wilson Signed-off-by: Al Viro --- include/linux/mm.h | 2 ++ mm/memory.c | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index 9d6efefdde5..60011d26bff 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -971,6 +971,8 @@ static inline int handle_mm_fault(struct mm_struct *mm, extern int make_pages_present(unsigned long addr, unsigned long end); extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); +extern int access_remote_vm(struct mm_struct *mm, unsigned long addr, + void *buf, int len, int write); int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int len, unsigned int foll_flags, diff --git a/mm/memory.c b/mm/memory.c index 820b4c4810f..468f5076754 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3711,6 +3711,22 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, return buf - old_buf; } +/** + * @access_remote_vm - access another process' address space + * @mm: the mm_struct of the target address space + * @addr: start address to access + * @buf: source or destination buffer + * @len: number of bytes to transfer + * @write: whether the access is a write + * + * The caller must hold a reference on @mm. + */ +int access_remote_vm(struct mm_struct *mm, unsigned long addr, + void *buf, int len, int write) +{ + return __access_remote_vm(NULL, mm, addr, buf, len, write); +} + /* * Access another process' address space. * Source/target buffer must be kernel space, -- cgit v1.2.3 From 26947f8c8f9598209001cdcd31bb2162a2e54691 Mon Sep 17 00:00:00 2001 From: Stephen Wilson Date: Sun, 13 Mar 2011 15:49:21 -0400 Subject: proc: disable mem_write after exec This change makes mem_write() observe the same constraints as mem_read(). This is particularly important for mem_write as an accidental leak of the fd across an exec could result in arbitrary modification of the target process' memory. IOW, /proc/pid/mem is implicitly close-on-exec. Signed-off-by: Stephen Wilson Signed-off-by: Al Viro --- fs/proc/base.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/proc/base.c b/fs/proc/base.c index e94b58b496f..9af49a3984f 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -850,6 +850,10 @@ static ssize_t mem_write(struct file * file, const char __user *buf, if (check_mem_permission(task)) goto out; + copied = -EIO; + if (file->private_data != (void *)((long)current->self_exec_id)) + goto out; + copied = -ENOMEM; page = (char *)__get_free_page(GFP_TEMPORARY); if (!page) -- cgit v1.2.3 From 18f661bcf898742212182d75f22f05b048cc04bb Mon Sep 17 00:00:00 2001 From: Stephen Wilson Date: Sun, 13 Mar 2011 15:49:22 -0400 Subject: proc: hold cred_guard_mutex in check_mem_permission() Avoid a potential race when task exec's and we get a new ->mm but check against the old credentials in ptrace_may_access(). Holding of the mutex is implemented by factoring out the body of the code into a helper function __check_mem_permission(). Performing this factorization now simplifies upcoming changes and minimizes churn in the diff's. Signed-off-by: Stephen Wilson Signed-off-by: Al Viro --- fs/proc/base.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 9af49a3984f..013f116b322 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -191,10 +191,7 @@ static int proc_root_link(struct inode *inode, struct path *path) return result; } -/* - * Return zero if current may access user memory in @task, -error if not. - */ -static int check_mem_permission(struct task_struct *task) +static int __check_mem_permission(struct task_struct *task) { /* * A task can always look at itself, in case it chooses @@ -222,6 +219,27 @@ static int check_mem_permission(struct task_struct *task) return -EPERM; } +/* + * Return zero if current may access user memory in @task, -error if not. + */ +static int check_mem_permission(struct task_struct *task) +{ + int err; + + /* + * Avoid racing if task exec's as we might get a new mm but validate + * against old credentials. + */ + err = mutex_lock_killable(&task->signal->cred_guard_mutex); + if (err) + return err; + + err = __check_mem_permission(task); + mutex_unlock(&task->signal->cred_guard_mutex); + + return err; +} + struct mm_struct *mm_for_maps(struct task_struct *task) { struct mm_struct *mm; -- cgit v1.2.3 From 8b0db9db19858b08c46a84540acfd35f6e6487b8 Mon Sep 17 00:00:00 2001 From: Stephen Wilson Date: Sun, 13 Mar 2011 15:49:23 -0400 Subject: proc: make check_mem_permission() return an mm_struct on success This change allows us to take advantage of access_remote_vm(), which in turn eliminates a security issue with the mem_write() implementation. The previous implementation of mem_write() was insecure since the target task could exec a setuid-root binary between the permission check and the actual write. Holding a reference to the target mm_struct eliminates this vulnerability. Signed-off-by: Stephen Wilson Signed-off-by: Al Viro --- fs/proc/base.c | 58 ++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 34 insertions(+), 24 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 013f116b322..e34c3c33b2d 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -191,14 +191,20 @@ static int proc_root_link(struct inode *inode, struct path *path) return result; } -static int __check_mem_permission(struct task_struct *task) +static struct mm_struct *__check_mem_permission(struct task_struct *task) { + struct mm_struct *mm; + + mm = get_task_mm(task); + if (!mm) + return ERR_PTR(-EINVAL); + /* * A task can always look at itself, in case it chooses * to use system calls instead of load instructions. */ if (task == current) - return 0; + return mm; /* * If current is actively ptrace'ing, and would also be @@ -210,20 +216,23 @@ static int __check_mem_permission(struct task_struct *task) match = (tracehook_tracer_task(task) == current); rcu_read_unlock(); if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH)) - return 0; + return mm; } /* * Noone else is allowed. */ - return -EPERM; + mmput(mm); + return ERR_PTR(-EPERM); } /* - * Return zero if current may access user memory in @task, -error if not. + * If current may access user memory in @task return a reference to the + * corresponding mm, otherwise ERR_PTR. */ -static int check_mem_permission(struct task_struct *task) +static struct mm_struct *check_mem_permission(struct task_struct *task) { + struct mm_struct *mm; int err; /* @@ -232,12 +241,12 @@ static int check_mem_permission(struct task_struct *task) */ err = mutex_lock_killable(&task->signal->cred_guard_mutex); if (err) - return err; + return ERR_PTR(err); - err = __check_mem_permission(task); + mm = __check_mem_permission(task); mutex_unlock(&task->signal->cred_guard_mutex); - return err; + return mm; } struct mm_struct *mm_for_maps(struct task_struct *task) @@ -795,18 +804,14 @@ static ssize_t mem_read(struct file * file, char __user * buf, if (!task) goto out_no_task; - if (check_mem_permission(task)) - goto out; - ret = -ENOMEM; page = (char *)__get_free_page(GFP_TEMPORARY); if (!page) goto out; - ret = 0; - - mm = get_task_mm(task); - if (!mm) + mm = check_mem_permission(task); + ret = PTR_ERR(mm); + if (IS_ERR(mm)) goto out_free; ret = -EIO; @@ -820,8 +825,8 @@ static ssize_t mem_read(struct file * file, char __user * buf, int this_len, retval; this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; - retval = access_process_vm(task, src, page, this_len, 0); - if (!retval || check_mem_permission(task)) { + retval = access_remote_vm(mm, src, page, this_len, 0); + if (!retval) { if (!ret) ret = -EIO; break; @@ -860,22 +865,25 @@ static ssize_t mem_write(struct file * file, const char __user *buf, char *page; struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); unsigned long dst = *ppos; + struct mm_struct *mm; copied = -ESRCH; if (!task) goto out_no_task; - if (check_mem_permission(task)) - goto out; + mm = check_mem_permission(task); + copied = PTR_ERR(mm); + if (IS_ERR(mm)) + goto out_task; copied = -EIO; if (file->private_data != (void *)((long)current->self_exec_id)) - goto out; + goto out_mm; copied = -ENOMEM; page = (char *)__get_free_page(GFP_TEMPORARY); if (!page) - goto out; + goto out_mm; copied = 0; while (count > 0) { @@ -886,7 +894,7 @@ static ssize_t mem_write(struct file * file, const char __user *buf, copied = -EFAULT; break; } - retval = access_process_vm(task, dst, page, this_len, 1); + retval = access_remote_vm(mm, dst, page, this_len, 1); if (!retval) { if (!copied) copied = -EIO; @@ -899,7 +907,9 @@ static ssize_t mem_write(struct file * file, const char __user *buf, } *ppos = dst; free_page((unsigned long) page); -out: +out_mm: + mmput(mm); +out_task: put_task_struct(task); out_no_task: return copied; -- cgit v1.2.3 From 198214a7ee50375fa71a65e518341980cfd4b2f0 Mon Sep 17 00:00:00 2001 From: Stephen Wilson Date: Sun, 13 Mar 2011 15:49:24 -0400 Subject: proc: enable writing to /proc/pid/mem With recent changes there is no longer a security hazard with writing to /proc/pid/mem. Remove the #ifdef. Signed-off-by: Stephen Wilson Signed-off-by: Al Viro --- fs/proc/base.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index e34c3c33b2d..bc15df390ec 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -854,10 +854,6 @@ out_no_task: return ret; } -#define mem_write NULL - -#ifndef mem_write -/* This is a security hazard */ static ssize_t mem_write(struct file * file, const char __user *buf, size_t count, loff_t *ppos) { @@ -914,7 +910,6 @@ out_task: out_no_task: return copied; } -#endif loff_t mem_lseek(struct file *file, loff_t offset, int orig) { -- cgit v1.2.3 From a9712bc12c40c172e393f85a9b2ba8db4bf59509 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 23 Mar 2011 15:52:50 -0400 Subject: deal with races in /proc/*/{syscall,stack,personality} All of those are rw-r--r-- and all are broken for suid - if you open a file before the target does suid-root exec, you'll be still able to access it. For personality it's not a big deal, but for syscall and stack it's a real problem. Fix: check that task is tracable for you at the time of read(). Signed-off-by: Al Viro --- fs/proc/base.c | 69 ++++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 50 insertions(+), 19 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index bc15df390ec..7d5bb8b9a4f 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -347,6 +347,23 @@ static int proc_pid_wchan(struct task_struct *task, char *buffer) } #endif /* CONFIG_KALLSYMS */ +static int lock_trace(struct task_struct *task) +{ + int err = mutex_lock_killable(&task->signal->cred_guard_mutex); + if (err) + return err; + if (!ptrace_may_access(task, PTRACE_MODE_ATTACH)) { + mutex_unlock(&task->signal->cred_guard_mutex); + return -EPERM; + } + return 0; +} + +static void unlock_trace(struct task_struct *task) +{ + mutex_unlock(&task->signal->cred_guard_mutex); +} + #ifdef CONFIG_STACKTRACE #define MAX_STACK_TRACE_DEPTH 64 @@ -356,6 +373,7 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, { struct stack_trace trace; unsigned long *entries; + int err; int i; entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL); @@ -366,15 +384,20 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, trace.max_entries = MAX_STACK_TRACE_DEPTH; trace.entries = entries; trace.skip = 0; - save_stack_trace_tsk(task, &trace); - for (i = 0; i < trace.nr_entries; i++) { - seq_printf(m, "[<%p>] %pS\n", - (void *)entries[i], (void *)entries[i]); + err = lock_trace(task); + if (!err) { + save_stack_trace_tsk(task, &trace); + + for (i = 0; i < trace.nr_entries; i++) { + seq_printf(m, "[<%p>] %pS\n", + (void *)entries[i], (void *)entries[i]); + } + unlock_trace(task); } kfree(entries); - return 0; + return err; } #endif @@ -537,18 +560,22 @@ static int proc_pid_syscall(struct task_struct *task, char *buffer) { long nr; unsigned long args[6], sp, pc; + int res = lock_trace(task); + if (res) + return res; if (task_current_syscall(task, &nr, args, 6, &sp, &pc)) - return sprintf(buffer, "running\n"); - - if (nr < 0) - return sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc); - - return sprintf(buffer, + res = sprintf(buffer, "running\n"); + else if (nr < 0) + res = sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc); + else + res = sprintf(buffer, "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", nr, args[0], args[1], args[2], args[3], args[4], args[5], sp, pc); + unlock_trace(task); + return res; } #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ @@ -2775,8 +2802,12 @@ static int proc_tgid_io_accounting(struct task_struct *task, char *buffer) static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { - seq_printf(m, "%08x\n", task->personality); - return 0; + int err = lock_trace(task); + if (!err) { + seq_printf(m, "%08x\n", task->personality); + unlock_trace(task); + } + return err; } /* @@ -2795,7 +2826,7 @@ static const struct pid_entry tgid_base_stuff[] = { REG("environ", S_IRUSR, proc_environ_operations), INF("auxv", S_IRUSR, proc_pid_auxv), ONE("status", S_IRUGO, proc_pid_status), - ONE("personality", S_IRUSR, proc_pid_personality), + ONE("personality", S_IRUGO, proc_pid_personality), INF("limits", S_IRUGO, proc_pid_limits), #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), @@ -2805,7 +2836,7 @@ static const struct pid_entry tgid_base_stuff[] = { #endif REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), #ifdef CONFIG_HAVE_ARCH_TRACEHOOK - INF("syscall", S_IRUSR, proc_pid_syscall), + INF("syscall", S_IRUGO, proc_pid_syscall), #endif INF("cmdline", S_IRUGO, proc_pid_cmdline), ONE("stat", S_IRUGO, proc_tgid_stat), @@ -2833,7 +2864,7 @@ static const struct pid_entry tgid_base_stuff[] = { INF("wchan", S_IRUGO, proc_pid_wchan), #endif #ifdef CONFIG_STACKTRACE - ONE("stack", S_IRUSR, proc_pid_stack), + ONE("stack", S_IRUGO, proc_pid_stack), #endif #ifdef CONFIG_SCHEDSTATS INF("schedstat", S_IRUGO, proc_pid_schedstat), @@ -3135,14 +3166,14 @@ static const struct pid_entry tid_base_stuff[] = { REG("environ", S_IRUSR, proc_environ_operations), INF("auxv", S_IRUSR, proc_pid_auxv), ONE("status", S_IRUGO, proc_pid_status), - ONE("personality", S_IRUSR, proc_pid_personality), + ONE("personality", S_IRUGO, proc_pid_personality), INF("limits", S_IRUGO, proc_pid_limits), #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), #endif REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), #ifdef CONFIG_HAVE_ARCH_TRACEHOOK - INF("syscall", S_IRUSR, proc_pid_syscall), + INF("syscall", S_IRUGO, proc_pid_syscall), #endif INF("cmdline", S_IRUGO, proc_pid_cmdline), ONE("stat", S_IRUGO, proc_tid_stat), @@ -3169,7 +3200,7 @@ static const struct pid_entry tid_base_stuff[] = { INF("wchan", S_IRUGO, proc_pid_wchan), #endif #ifdef CONFIG_STACKTRACE - ONE("stack", S_IRUSR, proc_pid_stack), + ONE("stack", S_IRUGO, proc_pid_stack), #endif #ifdef CONFIG_SCHEDSTATS INF("schedstat", S_IRUGO, proc_pid_schedstat), -- cgit v1.2.3