From d6ad3e286d2c075a60b9f11075a2c55aeeeca2ad Mon Sep 17 00:00:00 2001 From: Jason Wessel Date: Wed, 27 Jan 2010 16:25:22 -0600 Subject: softlockup: Add sched_clock_tick() to avoid kernel warning on kgdb resume When CONFIG_HAVE_UNSTABLE_SCHED_CLOCK is set, sched_clock() gets the time from hardware such as the TSC on x86. In this configuration kgdb will report a softlock warning message on resuming or detaching from a debug session. Sequence of events in the problem case: 1) "cpu sched clock" and "hardware time" are at 100 sec prior to a call to kgdb_handle_exception() 2) Debugger waits in kgdb_handle_exception() for 80 sec and on exit the following is called ... touch_softlockup_watchdog() --> __raw_get_cpu_var(touch_timestamp) = 0; 3) "cpu sched clock" = 100s (it was not updated, because the interrupt was disabled in kgdb) but the "hardware time" = 180 sec 4) The first timer interrupt after resuming from kgdb_handle_exception updates the watchdog from the "cpu sched clock" update_process_times() { ... run_local_timers() --> softlockup_tick() --> check (touch_timestamp == 0) (it is "YES" here, we have set "touch_timestamp = 0" at kgdb) --> __touch_softlockup_watchdog() ***(A)--> reset "touch_timestamp" to "get_timestamp()" (Here, the "touch_timestamp" will still be set to 100s.) ... scheduler_tick() ***(B)--> sched_clock_tick() (update "cpu sched clock" to "hardware time" = 180s) ... } 5) The Second timer interrupt handler appears to have a large jump and trips the softlockup warning. update_process_times() { ... run_local_timers() --> softlockup_tick() --> "cpu sched clock" - "touch_timestamp" = 180s-100s > 60s --> printk "soft lockup error messages" ... } note: ***(A) reset "touch_timestamp" to "get_timestamp(this_cpu)" Why is "touch_timestamp" 100 sec, instead of 180 sec? When CONFIG_HAVE_UNSTABLE_SCHED_CLOCK is set, the call trace of get_timestamp() is: get_timestamp(this_cpu) -->cpu_clock(this_cpu) -->sched_clock_cpu(this_cpu) -->__update_sched_clock(sched_clock_data, now) The __update_sched_clock() function uses the GTOD tick value to create a window to normalize the "now" values. So if "now" value is too big for sched_clock_data, it will be ignored. The fix is to invoke sched_clock_tick() to update "cpu sched clock" in order to recover from this state. This is done by introducing the function touch_softlockup_watchdog_sync(). This allows kgdb to request that the sched clock is updated when the watchdog thread runs the first time after a resume from kgdb. [yong.zhang0@gmail.com: Use per cpu instead of an array] Signed-off-by: Jason Wessel Signed-off-by: Dongdong Deng Cc: kgdb-bugreport@lists.sourceforge.net Cc: peterz@infradead.org LKML-Reference: <1264631124-4837-2-git-send-email-jason.wessel@windriver.com> Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6f7bba93929..89232151a9d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -310,6 +310,7 @@ extern void sched_show_task(struct task_struct *p); #ifdef CONFIG_DETECT_SOFTLOCKUP extern void softlockup_tick(void); extern void touch_softlockup_watchdog(void); +extern void touch_softlockup_watchdog_sync(void); extern void touch_all_softlockup_watchdogs(void); extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write, void __user *buffer, @@ -323,6 +324,9 @@ static inline void softlockup_tick(void) static inline void touch_softlockup_watchdog(void) { } +static inline void touch_softlockup_watchdog_sync(void) +{ +} static inline void touch_all_softlockup_watchdogs(void) { } -- cgit v1.2.3 From f98bfbd78c37c5946cc53089da32a5f741efdeb7 Mon Sep 17 00:00:00 2001 From: Evgeniy Polyakov Date: Tue, 2 Feb 2010 15:58:48 -0800 Subject: connector: Delete buggy notification code. On Tue, Feb 02, 2010 at 02:57:14PM -0800, Greg KH (gregkh@suse.de) wrote: > > There are at least two ways to fix it: using a big cannon and a small > > one. The former way is to disable notification registration, since it is > > not used by anyone at all. Second way is to check whether calling > > process is root and its destination group is -1 (kind of priveledged > > one) before command is dispatched to workqueue. > > Well if no one is using it, removing it makes the most sense, right? > > No objection from me, care to make up a patch either way for this? Getting it is not used, let's drop support for notifications about (un)registered events from connector. Another option was to check credentials on receiving, but we can always restore it without bugs if needed, but genetlink has a wider code base and none complained, that userspace can not get notification when some other clients were (un)registered. Kudos for Sebastian Krahmer , who found a bug in the code. Signed-off-by: Evgeniy Polyakov Acked-by: Greg Kroah-Hartman Signed-off-by: David S. Miller --- include/linux/connector.h | 32 -------------------------------- 1 file changed, 32 deletions(-) (limited to 'include') diff --git a/include/linux/connector.h b/include/linux/connector.h index 72ba63eb83c..3a779ffba60 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -24,9 +24,6 @@ #include -#define CN_IDX_CONNECTOR 0xffffffff -#define CN_VAL_CONNECTOR 0xffffffff - /* * Process Events connector unique ids -- used for message routing */ @@ -75,30 +72,6 @@ struct cn_msg { __u8 data[0]; }; -/* - * Notify structure - requests notification about - * registering/unregistering idx/val in range [first, first+range]. - */ -struct cn_notify_req { - __u32 first; - __u32 range; -}; - -/* - * Main notification control message - * *_notify_num - number of appropriate cn_notify_req structures after - * this struct. - * group - notification receiver's idx. - * len - total length of the attached data. - */ -struct cn_ctl_msg { - __u32 idx_notify_num; - __u32 val_notify_num; - __u32 group; - __u32 len; - __u8 data[0]; -}; - #ifdef __KERNEL__ #include @@ -151,11 +124,6 @@ struct cn_callback_entry { u32 seq, group; }; -struct cn_ctl_entry { - struct list_head notify_entry; - struct cn_ctl_msg *msg; -}; - struct cn_dev { struct cb_id id; -- cgit v1.2.3 From cd757645fbdc34a8343c04bb0e74e06fccc2cb10 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Sat, 30 Jan 2010 10:25:18 +0530 Subject: perf: Make bp_len type to u64 generic across the arch Change 'bp_len' type to __u64 to make it work across archs as the s390 architecture watch point length can be upto 2^64. reference: http://lkml.org/lkml/2010/1/25/212 This is an ABI change that is not backward compatible with the previous hardware breakpoint info layout integrated in this development cycle, a rebuilt of perf tools is necessary for versions based on 2.6.33-rc1 - 2.6.33-rc6 to work with a kernel based on this patch. Signed-off-by: Mahesh Salgaonkar Acked-by: Peter Zijlstra Cc: Ananth N Mavinakayanahalli Cc: "K. Prasad" Cc: Maneesh Soni Cc: Heiko Carstens Cc: Martin LKML-Reference: <20100130045518.GA20776@in.ibm.com> Signed-off-by: Frederic Weisbecker --- include/linux/hw_breakpoint.h | 2 +- include/linux/perf_event.h | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 070ba062173..5977b724f7c 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -44,7 +44,7 @@ static inline int hw_breakpoint_type(struct perf_event *bp) return bp->attr.bp_type; } -static inline int hw_breakpoint_len(struct perf_event *bp) +static inline unsigned long hw_breakpoint_len(struct perf_event *bp) { return bp->attr.bp_len; } diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8fa71874113..a177698d95e 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -211,11 +211,9 @@ struct perf_event_attr { __u32 wakeup_watermark; /* bytes before wakeup */ }; - __u32 __reserved_2; - - __u64 bp_addr; __u32 bp_type; - __u32 bp_len; + __u64 bp_addr; + __u64 bp_len; }; /* -- cgit v1.2.3 From f7acede65d6b65919aee5b6a360a17cedb11f2f7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 28 Jan 2010 13:30:11 +0100 Subject: libata: fix ata_id_logical_per_physical_sectors The value we get from the low byte of the ATA_ID_SECTOR_SIZE word is not not a plain multiple, but the log of it, so fix the helper to give the correct answer. Without this we'll get an incorrect minimal I/O size in the block limits VPD page for 4k sector drives. Also change the return value of ata_id_logical_per_physical_sectors to u16 for the unlikely case of very large logical sectors. Signed-off-by: Christoph Hellwig Signed-off-by: Jeff Garzik --- include/linux/ata.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ata.h b/include/linux/ata.h index 38a6948ce0c..20f31567cce 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -647,9 +647,9 @@ static inline int ata_id_has_large_logical_sectors(const u16 *id) return id[ATA_ID_SECTOR_SIZE] & (1 << 13); } -static inline u8 ata_id_logical_per_physical_sectors(const u16 *id) +static inline u16 ata_id_logical_per_physical_sectors(const u16 *id) { - return id[ATA_ID_SECTOR_SIZE] & 0xf; + return 1 << (id[ATA_ID_SECTOR_SIZE] & 0xf); } static inline int ata_id_has_lba48(const u16 *id) -- cgit v1.2.3 From 2938429501b73f6aeb312236eac7ed0416a07cd5 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Fri, 5 Feb 2010 16:09:11 +1100 Subject: percpu: add __percpu for sparse This is to make the annotation of percpu variables during the next merge window less painfull. Extracted from a patch by Rusty Russell. Signed-off-by: Stephen Rothwell Acked-by: Tejun Heo Signed-off-by: Linus Torvalds --- include/linux/compiler.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 5be3dab4a69..188fcae10a9 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -15,6 +15,7 @@ # define __acquire(x) __context__(x,1) # define __release(x) __context__(x,-1) # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) +# define __percpu __attribute__((noderef, address_space(3))) extern void __chk_user_ptr(const volatile void __user *); extern void __chk_io_ptr(const volatile void __iomem *); #else @@ -32,6 +33,7 @@ extern void __chk_io_ptr(const volatile void __iomem *); # define __acquire(x) (void)0 # define __release(x) (void)0 # define __cond_lock(x,c) (c) +# define __percpu #endif #ifdef __KERNEL__ -- cgit v1.2.3 From 8eb988c70e7709b7bd1a69f0ec53d19ac20dea84 Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Wed, 20 Jan 2010 15:35:41 -0500 Subject: fix ima breakage The "Untangling ima mess, part 2 with counters" patch messed up the counters. Based on conversations with Al Viro, this patch streamlines ima_path_check() by removing the counter maintaince. The counters are now updated independently, from measuring the file, in __dentry_open() and alloc_file() by calling ima_counts_get(). ima_path_check() is called from nfsd and do_filp_open(). It also did not measure all files that should have been measured. Reason: ima_path_check() got bogus value passed as mask. [AV: mea culpa] [AV: add missing nfsd bits] Signed-off-by: Mimi Zohar Signed-off-by: Al Viro --- include/linux/ima.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ima.h b/include/linux/ima.h index 99dc6d5cf7e..aa55a8f1f5b 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -17,7 +17,7 @@ struct linux_binprm; extern int ima_bprm_check(struct linux_binprm *bprm); extern int ima_inode_alloc(struct inode *inode); extern void ima_inode_free(struct inode *inode); -extern int ima_path_check(struct path *path, int mask); +extern int ima_path_check(struct file *file, int mask); extern void ima_file_free(struct file *file); extern int ima_file_mmap(struct file *file, unsigned long prot); extern void ima_counts_get(struct file *file); @@ -38,7 +38,7 @@ static inline void ima_inode_free(struct inode *inode) return; } -static inline int ima_path_check(struct path *path, int mask) +static inline int ima_path_check(struct file *file, int mask) { return 0; } -- cgit v1.2.3 From 9bbb6cad0173e6220f3ac609e26beb48dab3b7cd Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Tue, 26 Jan 2010 17:02:40 -0500 Subject: ima: rename ima_path_check to ima_file_check ima_path_check actually deals with files! call it ima_file_check instead. Signed-off-by: Eric Paris Acked-by: Mimi Zohar Signed-off-by: Al Viro --- include/linux/ima.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ima.h b/include/linux/ima.h index aa55a8f1f5b..975837e7d6c 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -17,7 +17,7 @@ struct linux_binprm; extern int ima_bprm_check(struct linux_binprm *bprm); extern int ima_inode_alloc(struct inode *inode); extern void ima_inode_free(struct inode *inode); -extern int ima_path_check(struct file *file, int mask); +extern int ima_file_check(struct file *file, int mask); extern void ima_file_free(struct file *file); extern int ima_file_mmap(struct file *file, unsigned long prot); extern void ima_counts_get(struct file *file); @@ -38,7 +38,7 @@ static inline void ima_inode_free(struct inode *inode) return; } -static inline int ima_path_check(struct file *file, int mask) +static inline int ima_file_check(struct file *file, int mask) { return 0; } -- cgit v1.2.3 From 5b3501faa8741d50617ce4191c20061c6ef36cb3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 8 Feb 2010 11:16:56 -0800 Subject: netfilter: nf_conntrack: per netns nf_conntrack_cachep nf_conntrack_cachep is currently shared by all netns instances, but because of SLAB_DESTROY_BY_RCU special semantics, this is wrong. If we use a shared slab cache, one object can instantly flight between one hash table (netns ONE) to another one (netns TWO), and concurrent reader (doing a lookup in netns ONE, 'finding' an object of netns TWO) can be fooled without notice, because no RCU grace period has to be observed between object freeing and its reuse. We dont have this problem with UDP/TCP slab caches because TCP/UDP hashtables are global to the machine (and each object has a pointer to its netns). If we use per netns conntrack hash tables, we also *must* use per netns conntrack slab caches, to guarantee an object can not escape from one namespace to another one. Signed-off-by: Eric Dumazet [Patrick: added unique slab name allocation] Cc: stable@kernel.org Signed-off-by: Patrick McHardy --- include/net/netns/conntrack.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index ba1ba0c5efd..aed23b6c847 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -11,6 +11,7 @@ struct nf_conntrack_ecache; struct netns_ct { atomic_t count; unsigned int expect_count; + struct kmem_cache *nf_conntrack_cachep; struct hlist_nulls_head *hash; struct hlist_head *expect_hash; struct hlist_nulls_head unconfirmed; @@ -28,5 +29,6 @@ struct netns_ct { #endif int hash_vmalloc; int expect_vmalloc; + char *slabname; }; #endif -- cgit v1.2.3 From d696c7bdaa55e2208e56c6f98e6bc1599f34286d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 8 Feb 2010 11:18:07 -0800 Subject: netfilter: nf_conntrack: fix hash resizing with namespaces As noticed by Jon Masters , the conntrack hash size is global and not per namespace, but modifiable at runtime through /sys/module/nf_conntrack/hashsize. Changing the hash size will only resize the hash in the current namespace however, so other namespaces will use an invalid hash size. This can cause crashes when enlarging the hashsize, or false negative lookups when shrinking it. Move the hash size into the per-namespace data and only use the global hash size to initialize the per-namespace value when instanciating a new namespace. Additionally restrict hash resizing to init_net for now as other namespaces are not handled currently. Cc: stable@kernel.org Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/netns/conntrack.h | 1 + include/net/netns/ipv4.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index aed23b6c847..63d449807d9 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -11,6 +11,7 @@ struct nf_conntrack_ecache; struct netns_ct { atomic_t count; unsigned int expect_count; + unsigned int htable_size; struct kmem_cache *nf_conntrack_cachep; struct hlist_nulls_head *hash; struct hlist_head *expect_hash; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 2eb3814d625..9a4b8b71407 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -40,6 +40,7 @@ struct netns_ipv4 { struct xt_table *iptable_security; struct xt_table *nat_table; struct hlist_head *nat_bysource; + unsigned int nat_htable_size; int nat_vmalloced; #endif -- cgit v1.2.3 From 69c9700b544e496dc3ccf472a4f3a76dcf4abaf7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Ko=C5=9Bcielnicki?= Date: Tue, 26 Jan 2010 18:39:20 +0000 Subject: drm/nouveau: Add getparam to get available PGRAPH units. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On nv50, this will be needed by applications using CUDA to know how much stack/local memory to allocate. Signed-off-by: Marcin Koƛcielnicki Signed-off-by: Ben Skeggs --- include/drm/nouveau_drm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/nouveau_drm.h b/include/drm/nouveau_drm.h index 1e67c441ea8..f745948b61e 100644 --- a/include/drm/nouveau_drm.h +++ b/include/drm/nouveau_drm.h @@ -77,6 +77,7 @@ struct drm_nouveau_gpuobj_free { #define NOUVEAU_GETPARAM_PCI_PHYSICAL 10 #define NOUVEAU_GETPARAM_CHIPSET_ID 11 #define NOUVEAU_GETPARAM_VM_VRAM_BASE 12 +#define NOUVEAU_GETPARAM_GRAPH_UNITS 13 struct drm_nouveau_getparam { uint64_t param; uint64_t value; -- cgit v1.2.3 From f77cef3db357aeea22d82a2aa4f0ef8fbae41d47 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Tue, 9 Feb 2010 19:41:55 +0000 Subject: drm/vmwgfx: Update the user-space interface. When time-based throttling is implemented, we need to bump minor. When the old way of detecting scanout is removed, we need to bump major. In the meantime, this change should not break existing user-space. Signed-off-by: Thomas Hellstrom Signed-off-by: Jakob Bornecrantz Signed-off-by: Dave Airlie --- include/drm/vmwgfx_drm.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/drm/vmwgfx_drm.h b/include/drm/vmwgfx_drm.h index 2be7e1249b6..dfaf3c2d2c8 100644 --- a/include/drm/vmwgfx_drm.h +++ b/include/drm/vmwgfx_drm.h @@ -68,7 +68,8 @@ #define DRM_VMW_PARAM_NUM_FREE_STREAMS 1 #define DRM_VMW_PARAM_3D 2 #define DRM_VMW_PARAM_FIFO_OFFSET 3 - +#define DRM_VMW_PARAM_HW_CAPS 4 +#define DRM_VMW_PARAM_FIFO_CAPS 5 /** * struct drm_vmw_getparam_arg @@ -181,6 +182,8 @@ struct drm_vmw_context_arg { * The size of the array should equal the total number of mipmap levels. * @shareable: Boolean whether other clients (as identified by file descriptors) * may reference this surface. + * @scanout: Boolean whether the surface is intended to be used as a + * scanout. * * Input data to the DRM_VMW_CREATE_SURFACE Ioctl. * Output data from the DRM_VMW_REF_SURFACE Ioctl. @@ -192,7 +195,7 @@ struct drm_vmw_surface_create_req { uint32_t mip_levels[DRM_VMW_MAX_SURFACE_FACES]; uint64_t size_addr; int32_t shareable; - uint32_t pad64; + int32_t scanout; }; /** @@ -295,6 +298,9 @@ union drm_vmw_surface_reference_arg { * * @commands: User-space address of a command buffer cast to an uint64_t. * @command-size: Size in bytes of the command buffer. + * @throttle-us: Sleep until software is less than @throttle_us + * microseconds ahead of hardware. The driver may round this value + * to the nearest kernel tick. * @fence_rep: User-space address of a struct drm_vmw_fence_rep cast to an * uint64_t. * @@ -304,7 +310,7 @@ union drm_vmw_surface_reference_arg { struct drm_vmw_execbuf_arg { uint64_t commands; uint32_t command_size; - uint32_t pad64; + uint32_t throttle_us; uint64_t fence_rep; }; -- cgit v1.2.3 From a87897edbae2d60db7bcb6bb0a75e82013d68305 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Tue, 9 Feb 2010 21:29:47 +0000 Subject: drm/vmwgfx: Drop scanout flag compat and add execbuf ioctl parameter members. Bumps major. Even if this bumps the version to 1 it does not mean the driver is out of staging. From what we know this is the last backwards incompatible change to the driver. Signed-off-by: Jakob Bornecrantz Signed-off-by: Thomas Hellstrom Signed-off-by: Dave Airlie --- include/drm/vmwgfx_drm.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include') diff --git a/include/drm/vmwgfx_drm.h b/include/drm/vmwgfx_drm.h index dfaf3c2d2c8..c7645f480d1 100644 --- a/include/drm/vmwgfx_drm.h +++ b/include/drm/vmwgfx_drm.h @@ -303,15 +303,23 @@ union drm_vmw_surface_reference_arg { * to the nearest kernel tick. * @fence_rep: User-space address of a struct drm_vmw_fence_rep cast to an * uint64_t. + * @version: Allows expanding the execbuf ioctl parameters without breaking + * backwards compatibility, since user-space will always tell the kernel + * which version it uses. + * @flags: Execbuf flags. None currently. * * Argument to the DRM_VMW_EXECBUF Ioctl. */ +#define DRM_VMW_EXECBUF_VERSION 0 + struct drm_vmw_execbuf_arg { uint64_t commands; uint32_t command_size; uint32_t throttle_us; uint64_t fence_rep; + uint32_t version; + uint32_t flags; }; /** -- cgit v1.2.3