diff options
author | Gary S. Robertson <gary.robertson@linaro.org> | 2015-05-26 11:07:03 -0500 |
---|---|---|
committer | Gary S. Robertson <gary.robertson@linaro.org> | 2015-05-26 11:07:03 -0500 |
commit | 8e493d88e9454760abe679b872da4509ff34d60f (patch) | |
tree | a050572ccdd7767ebce453c98d66ebb94668770f | |
parent | fe0d143682058b43650a5430645aa99fd76b6293 (diff) | |
parent | 131b3aa7c04250554e4d303bc90f798bf21423dd (diff) |
Merge tag 'lsk-v3.14-15.04-rt' of http://git-geo.linaro.org/kernel/linux-linaro-stable into linux-linaro-lng-v3.14-rt
260 files changed, 7099 insertions, 1489 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 5b0c083d7c0e..821c8f4d359f 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -47,6 +47,8 @@ prototypes: int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t); int (*rename) (struct inode *, struct dentry *, struct inode *, struct dentry *); + int (*rename2) (struct inode *, struct dentry *, + struct inode *, struct dentry *, unsigned int); int (*readlink) (struct dentry *, char __user *,int); void * (*follow_link) (struct dentry *, struct nameidata *); void (*put_link) (struct dentry *, struct nameidata *, void *); @@ -65,6 +67,7 @@ prototypes: struct file *, unsigned open_flag, umode_t create_mode, int *opened); int (*tmpfile) (struct inode *, struct dentry *, umode_t); + int (*dentry_open)(struct dentry *, struct file *, const struct cred *); locking rules: all may block @@ -78,6 +81,7 @@ mkdir: yes unlink: yes (both) rmdir: yes (both) (see below) rename: yes (all) (see below) +rename2: yes (all) (see below) readlink: no follow_link: no put_link: no @@ -93,10 +97,12 @@ fiemap: no update_time: no atomic_open: yes tmpfile: no +dentry_open: no Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on victim. - cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem. + cross-directory ->rename() and rename2() has (per-superblock) +->s_vfs_rename_sem. See Documentation/filesystems/directory-locking for more detailed discussion of the locking scheme for directory operations. diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt new file mode 100644 index 000000000000..6db0e5d1da07 --- /dev/null +++ b/Documentation/filesystems/overlayfs.txt @@ -0,0 +1,226 @@ +Written by: Neil Brown <neilb@suse.de> + +Overlay Filesystem +================== + +This document describes a prototype for a new approach to providing +overlay-filesystem functionality in Linux (sometimes referred to as +union-filesystems). An overlay-filesystem tries to present a +filesystem which is the result over overlaying one filesystem on top +of the other. + +The result will inevitably fail to look exactly like a normal +filesystem for various technical reasons. The expectation is that +many use cases will be able to ignore these differences. + +This approach is 'hybrid' because the objects that appear in the +filesystem do not all appear to belong to that filesystem. In many +cases an object accessed in the union will be indistinguishable +from accessing the corresponding object from the original filesystem. +This is most obvious from the 'st_dev' field returned by stat(2). + +While directories will report an st_dev from the overlay-filesystem, +all non-directory objects will report an st_dev from the lower or +upper filesystem that is providing the object. Similarly st_ino will +only be unique when combined with st_dev, and both of these can change +over the lifetime of a non-directory object. Many applications and +tools ignore these values and will not be affected. + +Upper and Lower +--------------- + +An overlay filesystem combines two filesystems - an 'upper' filesystem +and a 'lower' filesystem. When a name exists in both filesystems, the +object in the 'upper' filesystem is visible while the object in the +'lower' filesystem is either hidden or, in the case of directories, +merged with the 'upper' object. + +It would be more correct to refer to an upper and lower 'directory +tree' rather than 'filesystem' as it is quite possible for both +directory trees to be in the same filesystem and there is no +requirement that the root of a filesystem be given for either upper or +lower. + +The lower filesystem can be any filesystem supported by Linux and does +not need to be writable. The lower filesystem can even be another +overlayfs. The upper filesystem will normally be writable and if it +is it must support the creation of trusted.* extended attributes, and +must provide valid d_type in readdir responses, so NFS is not suitable. + +A read-only overlay of two read-only filesystems may use any +filesystem type. + +Directories +----------- + +Overlaying mainly involves directories. If a given name appears in both +upper and lower filesystems and refers to a non-directory in either, +then the lower object is hidden - the name refers only to the upper +object. + +Where both upper and lower objects are directories, a merged directory +is formed. + +At mount time, the two directories given as mount options "lowerdir" and +"upperdir" are combined into a merged directory: + + mount -t overlay overlay -olowerdir=/lower,upperdir=/upper,\ +workdir=/work /merged + +The "workdir" needs to be an empty directory on the same filesystem +as upperdir. + +Then whenever a lookup is requested in such a merged directory, the +lookup is performed in each actual directory and the combined result +is cached in the dentry belonging to the overlay filesystem. If both +actual lookups find directories, both are stored and a merged +directory is created, otherwise only one is stored: the upper if it +exists, else the lower. + +Only the lists of names from directories are merged. Other content +such as metadata and extended attributes are reported for the upper +directory only. These attributes of the lower directory are hidden. + +whiteouts and opaque directories +-------------------------------- + +In order to support rm and rmdir without changing the lower +filesystem, an overlay filesystem needs to record in the upper filesystem +that files have been removed. This is done using whiteouts and opaque +directories (non-directories are always opaque). + +A whiteout is created as a character device with 0/0 device number. +When a whiteout is found in the upper level of a merged directory, any +matching name in the lower level is ignored, and the whiteout itself +is also hidden. + +A directory is made opaque by setting the xattr "trusted.overlay.opaque" +to "y". Where the upper filesystem contains an opaque directory, any +directory in the lower filesystem with the same name is ignored. + +readdir +------- + +When a 'readdir' request is made on a merged directory, the upper and +lower directories are each read and the name lists merged in the +obvious way (upper is read first, then lower - entries that already +exist are not re-added). This merged name list is cached in the +'struct file' and so remains as long as the file is kept open. If the +directory is opened and read by two processes at the same time, they +will each have separate caches. A seekdir to the start of the +directory (offset 0) followed by a readdir will cause the cache to be +discarded and rebuilt. + +This means that changes to the merged directory do not appear while a +directory is being read. This is unlikely to be noticed by many +programs. + +seek offsets are assigned sequentially when the directories are read. +Thus if + - read part of a directory + - remember an offset, and close the directory + - re-open the directory some time later + - seek to the remembered offset + +there may be little correlation between the old and new locations in +the list of filenames, particularly if anything has changed in the +directory. + +Readdir on directories that are not merged is simply handled by the +underlying directory (upper or lower). + + +Non-directories +--------------- + +Objects that are not directories (files, symlinks, device-special +files etc.) are presented either from the upper or lower filesystem as +appropriate. When a file in the lower filesystem is accessed in a way +the requires write-access, such as opening for write access, changing +some metadata etc., the file is first copied from the lower filesystem +to the upper filesystem (copy_up). Note that creating a hard-link +also requires copy_up, though of course creation of a symlink does +not. + +The copy_up may turn out to be unnecessary, for example if the file is +opened for read-write but the data is not modified. + +The copy_up process first makes sure that the containing directory +exists in the upper filesystem - creating it and any parents as +necessary. It then creates the object with the same metadata (owner, +mode, mtime, symlink-target etc.) and then if the object is a file, the +data is copied from the lower to the upper filesystem. Finally any +extended attributes are copied up. + +Once the copy_up is complete, the overlay filesystem simply +provides direct access to the newly created file in the upper +filesystem - future operations on the file are barely noticed by the +overlay filesystem (though an operation on the name of the file such as +rename or unlink will of course be noticed and handled). + + +Multiple lower layers +--------------------- + +Multiple lower layers can now be given using the the colon (":") as a +separator character between the directory names. For example: + + mount -t overlay overlay -olowerdir=/lower1:/lower2:/lower3 /merged + +As the example shows, "upperdir=" and "workdir=" may be omitted. In +that case the overlay will be read-only. + +The specified lower directories will be stacked beginning from the +rightmost one and going left. In the above example lower1 will be the +top, lower2 the middle and lower3 the bottom layer. + + +Non-standard behavior +--------------------- + +The copy_up operation essentially creates a new, identical file and +moves it over to the old name. The new file may be on a different +filesystem, so both st_dev and st_ino of the file may change. + +Any open files referring to this inode will access the old data and +metadata. Similarly any file locks obtained before copy_up will not +apply to the copied up file. + +On a file opened with O_RDONLY fchmod(2), fchown(2), futimesat(2) and +fsetxattr(2) will fail with EROFS. + +If a file with multiple hard links is copied up, then this will +"break" the link. Changes will not be propagated to other names +referring to the same inode. + +Symlinks in /proc/PID/ and /proc/PID/fd which point to a non-directory +object in overlayfs will not contain valid absolute paths, only +relative paths leading up to the filesystem's root. This will be +fixed in the future. + +Some operations are not atomic, for example a crash during copy_up or +rename will leave the filesystem in an inconsistent state. This will +be addressed in the future. + +Changes to underlying filesystems +--------------------------------- + +Offline changes, when the overlay is not mounted, are allowed to either +the upper or the lower trees. + +Changes to the underlying filesystems while part of a mounted overlay +filesystem are not allowed. If the underlying filesystem is changed, +the behavior of the overlay is undefined, though it will not result in +a crash or deadlock. + +Testsuite +--------- + +There's testsuite developed by David Howells at: + + git://git.infradead.org/users/dhowells/unionmount-testsuite.git + +Run as root: + + # cd unionmount-testsuite + # ./run --ov diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index c53784c119c8..5d06f2a19798 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -347,6 +347,8 @@ struct inode_operations { int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t); int (*rename) (struct inode *, struct dentry *, struct inode *, struct dentry *); + int (*rename2) (struct inode *, struct dentry *, + struct inode *, struct dentry *, unsigned int); int (*readlink) (struct dentry *, char __user *,int); void * (*follow_link) (struct dentry *, struct nameidata *); void (*put_link) (struct dentry *, struct nameidata *, void *); @@ -362,6 +364,7 @@ struct inode_operations { int (*atomic_open)(struct inode *, struct dentry *, struct file *, unsigned open_flag, umode_t create_mode, int *opened); int (*tmpfile) (struct inode *, struct dentry *, umode_t); + int (*dentry_open)(struct dentry *, struct file *, const struct cred *); }; Again, all methods are called without any locks being held, unless @@ -414,6 +417,20 @@ otherwise noted. rename: called by the rename(2) system call to rename the object to have the parent and name given by the second inode and dentry. + rename2: this has an additional flags argument compared to rename. + If no flags are supported by the filesystem then this method + need not be implemented. If some flags are supported then the + filesystem must return -EINVAL for any unsupported or unknown + flags. Currently the following flags are implemented: + (1) RENAME_NOREPLACE: this flag indicates that if the target + of the rename exists the rename should fail with -EEXIST + instead of replacing the target. The VFS already checks for + existence, so for local filesystems the RENAME_NOREPLACE + implementation is equivalent to plain rename. + (2) RENAME_EXCHANGE: exchange source and target. Both must + exist; this is checked by the VFS. Unlike plain rename, + source and target may be of different type. + readlink: called by the readlink(2) system call. Only required if you want to support reading symbolic links @@ -681,6 +698,12 @@ struct address_space_operations { but instead uses bmap to find out where the blocks in the file are and uses those addresses directly. + dentry_open: *WARNING: probably going away soon, do not use!* This is an + alternative to f_op->open(), the difference is that this method may open + a file not necessarily originating from the same filesystem as the one + i_op->open() was called on. It may be useful for stacking filesystems + which want to allow native I/O directly on underlying files. + invalidatepage: If a page has PagePrivate set, then invalidatepage will be called when part or all of the page is to be removed diff --git a/MAINTAINERS b/MAINTAINERS index c701f48b8e00..cad9448ea92d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6468,6 +6468,13 @@ F: drivers/scsi/osd/ F: include/scsi/osd_* F: fs/exofs/ +OVERLAY FILESYSTEM +M: Miklos Szeredi <miklos@szeredi.hu> +L: linux-fsdevel@vger.kernel.org +S: Supported +F: fs/overlayfs/* +F: Documentation/filesystems/overlayfs.txt + P54 WIRELESS DRIVER M: Christian Lamparter <chunkeey@googlemail.com> L: linux-wireless@vger.kernel.org @@ -1,6 +1,6 @@ VERSION = 3 PATCHLEVEL = 14 -SUBLEVEL = 36 +SUBLEVEL = 39 EXTRAVERSION = NAME = Remembering Coco diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 1402fcc11c2c..98f57d34923a 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -104,11 +104,12 @@ struct osf_dirent_callback { }; static int -osf_filldir(void *__buf, const char *name, int namlen, loff_t offset, - u64 ino, unsigned int d_type) +osf_filldir(struct dir_context *ctx, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type) { struct osf_dirent __user *dirent; - struct osf_dirent_callback *buf = (struct osf_dirent_callback *) __buf; + struct osf_dirent_callback *buf = + container_of(ctx, struct osf_dirent_callback, ctx); unsigned int reclen = ALIGN(NAME_OFFSET + namlen + 1, sizeof(u32)); unsigned int d_ino; diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c index 7e95e1a86510..d68b410595c8 100644 --- a/arch/arc/kernel/signal.c +++ b/arch/arc/kernel/signal.c @@ -67,7 +67,7 @@ stash_usr_regs(struct rt_sigframe __user *sf, struct pt_regs *regs, sigset_t *set) { int err; - err = __copy_to_user(&(sf->uc.uc_mcontext.regs), regs, + err = __copy_to_user(&(sf->uc.uc_mcontext.regs.scratch), regs, sizeof(sf->uc.uc_mcontext.regs.scratch)); err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(sigset_t)); @@ -83,7 +83,7 @@ static int restore_usr_regs(struct pt_regs *regs, struct rt_sigframe __user *sf) if (!err) set_current_blocked(&set); - err |= __copy_from_user(regs, &(sf->uc.uc_mcontext.regs), + err |= __copy_from_user(regs, &(sf->uc.uc_mcontext.regs.scratch), sizeof(sf->uc.uc_mcontext.regs.scratch)); return err; diff --git a/arch/arm/boot/dts/dra7xx-clocks.dtsi b/arch/arm/boot/dts/dra7xx-clocks.dtsi index e96da9a898ad..f2512e1d28c7 100644 --- a/arch/arm/boot/dts/dra7xx-clocks.dtsi +++ b/arch/arm/boot/dts/dra7xx-clocks.dtsi @@ -243,10 +243,18 @@ ti,invert-autoidle-bit; }; + dpll_core_byp_mux: dpll_core_byp_mux { + #clock-cells = <0>; + compatible = "ti,mux-clock"; + clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>; + ti,bit-shift = <23>; + reg = <0x012c>; + }; + dpll_core_ck: dpll_core_ck { #clock-cells = <0>; compatible = "ti,omap4-dpll-core-clock"; - clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>; + clocks = <&sys_clkin1>, <&dpll_core_byp_mux>; reg = <0x0120>, <0x0124>, <0x012c>, <0x0128>; }; @@ -309,10 +317,18 @@ clock-div = <1>; }; + dpll_dsp_byp_mux: dpll_dsp_byp_mux { + #clock-cells = <0>; + compatible = "ti,mux-clock"; + clocks = <&sys_clkin1>, <&dsp_dpll_hs_clk_div>; + ti,bit-shift = <23>; + reg = <0x0240>; + }; + dpll_dsp_ck: dpll_dsp_ck { #clock-cells = <0>; compatible = "ti,omap4-dpll-clock"; - clocks = <&sys_clkin1>, <&dsp_dpll_hs_clk_div>; + clocks = <&sys_clkin1>, <&dpll_dsp_byp_mux>; reg = <0x0234>, <0x0238>, <0x0240>, <0x023c>; }; @@ -335,10 +351,18 @@ clock-div = <1>; }; + dpll_iva_byp_mux: dpll_iva_byp_mux { + #clock-cells = <0>; + compatible = "ti,mux-clock"; + clocks = <&sys_clkin1>, <&iva_dpll_hs_clk_div>; + ti,bit-shift = <23>; + reg = <0x01ac>; + }; + dpll_iva_ck: dpll_iva_ck { #clock-cells = <0>; compatible = "ti,omap4-dpll-clock"; - clocks = <&sys_clkin1>, <&iva_dpll_hs_clk_div>; + clocks = <&sys_clkin1>, <&dpll_iva_byp_mux>; reg = <0x01a0>, <0x01a4>, <0x01ac>, <0x01a8>; }; @@ -361,10 +385,18 @@ clock-div = <1>; }; + dpll_gpu_byp_mux: dpll_gpu_byp_mux { + #clock-cells = <0>; + compatible = "ti,mux-clock"; + clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>; + ti,bit-shift = <23>; + reg = <0x02e4>; + }; + dpll_gpu_ck: dpll_gpu_ck { #clock-cells = <0>; compatible = "ti,omap4-dpll-clock"; - clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>; + clocks = <&sys_clkin1>, <&dpll_gpu_byp_mux>; reg = <0x02d8>, <0x02dc>, <0x02e4>, <0x02e0>; }; @@ -398,10 +430,18 @@ clock-div = <1>; }; + dpll_ddr_byp_mux: dpll_ddr_byp_mux { + #clock-cells = <0>; + compatible = "ti,mux-clock"; + clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>; + ti,bit-shift = <23>; + reg = <0x021c>; + }; + dpll_ddr_ck: dpll_ddr_ck { #clock-cells = <0>; compatible = "ti,omap4-dpll-clock"; - clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>; + clocks = <&sys_clkin1>, <&dpll_ddr_byp_mux>; reg = <0x0210>, <0x0214>, <0x021c>, <0x0218>; }; @@ -416,10 +456,18 @@ ti,invert-autoidle-bit; }; + dpll_gmac_byp_mux: dpll_gmac_byp_mux { + #clock-cells = <0>; + compatible = "ti,mux-clock"; + clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>; + ti,bit-shift = <23>; + reg = <0x02b4>; + }; + dpll_gmac_ck: dpll_gmac_ck { #clock-cells = <0>; compatible = "ti,omap4-dpll-clock"; - clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>; + clocks = <&sys_clkin1>, <&dpll_gmac_byp_mux>; reg = <0x02a8>, <0x02ac>, <0x02b4>, <0x02b0>; }; @@ -482,10 +530,18 @@ clock-div = <1>; }; + dpll_eve_byp_mux: dpll_eve_byp_mux { + #clock-cells = <0>; + compatible = "ti,mux-clock"; + clocks = <&sys_clkin1>, <&eve_dpll_hs_clk_div>; + ti,bit-shift = <23>; + reg = <0x0290>; + }; + dpll_eve_ck: dpll_eve_ck { #clock-cells = <0>; compatible = "ti,omap4-dpll-clock"; - clocks = <&sys_clkin1>, <&eve_dpll_hs_clk_div>; + clocks = <&sys_clkin1>, <&dpll_eve_byp_mux>; reg = <0x0284>, <0x0288>, <0x0290>, <0x028c>; }; @@ -1214,10 +1270,18 @@ clock-div = <1>; }; + dpll_per_byp_mux: dpll_per_byp_mux { + #clock-cells = <0>; + compatible = "ti,mux-clock"; + clocks = <&sys_clkin1>, <&per_dpll_hs_clk_div>; + ti,bit-shift = <23>; + reg = <0x014c>; + }; + dpll_per_ck: dpll_per_ck { #clock-cells = <0>; compatible = "ti,omap4-dpll-clock"; - clocks = <&sys_clkin1>, <&per_dpll_hs_clk_div>; + clocks = <&sys_clkin1>, <&dpll_per_byp_mux>; reg = <0x0140>, <0x0144>, <0x014c>, <0x0148>; }; @@ -1240,10 +1304,18 @@ clock-div = <1>; }; + dpll_usb_byp_mux: dpll_usb_byp_mux { + #clock-cells = <0>; + compatible = "ti,mux-clock"; + clocks = <&sys_clkin1>, <&usb_dpll_hs_clk_div>; + ti,bit-shift = <23>; + reg = <0x018c>; + }; + dpll_usb_ck: dpll_usb_ck { #clock-cells = <0>; compatible = "ti,omap4-dpll-j-type-clock"; - clocks = <&sys_clkin1>, <&usb_dpll_hs_clk_div>; + clocks = <&sys_clkin1>, <&dpll_usb_byp_mux>; reg = <0x0180>, <0x0184>, <0x018c>, <0x0188>; }; diff --git a/arch/arm/crypto/aesbs-core.S_shipped b/arch/arm/crypto/aesbs-core.S_shipped index 71e5fc7cfb18..1d1800f71c5b 100644 --- a/arch/arm/crypto/aesbs-core.S_shipped +++ b/arch/arm/crypto/aesbs-core.S_shipped @@ -58,14 +58,18 @@ # define VFP_ABI_FRAME 0 # define BSAES_ASM_EXTENDED_KEY # define XTS_CHAIN_TWEAK -# define __ARM_ARCH__ 7 +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ 7 #endif #ifdef __thumb__ # define adrl adr #endif -#if __ARM_ARCH__>=7 +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + .text .syntax unified @ ARMv7-capable assembler is expected to handle this #ifdef __thumb2__ @@ -74,8 +78,6 @@ .code 32 #endif -.fpu neon - .type _bsaes_decrypt8,%function .align 4 _bsaes_decrypt8: @@ -2095,9 +2097,11 @@ bsaes_xts_decrypt: vld1.8 {q8}, [r0] @ initial tweak adr r2, .Lxts_magic +#ifndef XTS_CHAIN_TWEAK tst r9, #0xf @ if not multiple of 16 it ne @ Thumb2 thing, sanity check in ARM subne r9, #0x10 @ subtract another 16 bytes +#endif subs r9, #0x80 blo .Lxts_dec_short diff --git a/arch/arm/crypto/bsaes-armv7.pl b/arch/arm/crypto/bsaes-armv7.pl index be068db960ee..a4d3856e7d24 100644 --- a/arch/arm/crypto/bsaes-armv7.pl +++ b/arch/arm/crypto/bsaes-armv7.pl @@ -701,14 +701,18 @@ $code.=<<___; # define VFP_ABI_FRAME 0 # define BSAES_ASM_EXTENDED_KEY # define XTS_CHAIN_TWEAK -# define __ARM_ARCH__ 7 +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ 7 #endif #ifdef __thumb__ # define adrl adr #endif -#if __ARM_ARCH__>=7 +#if __ARM_MAX_ARCH__>=7 +.arch armv7-a +.fpu neon + .text .syntax unified @ ARMv7-capable assembler is expected to handle this #ifdef __thumb2__ @@ -717,8 +721,6 @@ $code.=<<___; .code 32 #endif -.fpu neon - .type _bsaes_decrypt8,%function .align 4 _bsaes_decrypt8: @@ -2076,9 +2078,11 @@ bsaes_xts_decrypt: vld1.8 {@XMM[8]}, [r0] @ initial tweak adr $magic, .Lxts_magic +#ifndef XTS_CHAIN_TWEAK tst $len, #0xf @ if not multiple of 16 it ne @ Thumb2 thing, sanity check in ARM subne $len, #0x10 @ subtract another 16 bytes +#endif subs $len, #0x80 blo .Lxts_dec_short diff --git a/arch/arm/include/asm/cmpxchg.h b/arch/arm/include/asm/cmpxchg.h index df2fbba7efc8..30b1a4010b89 100644 --- a/arch/arm/include/asm/cmpxchg.h +++ b/arch/arm/include/asm/cmpxchg.h @@ -127,6 +127,8 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size #else /* min ARCH >= ARMv6 */ +#define __HAVE_ARCH_CMPXCHG 1 + extern void __bad_cmpxchg(volatile void *ptr, int size); /* diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h index 2aff798fbef4..54d26a98ff84 100644 --- a/arch/arm/include/asm/futex.h +++ b/arch/arm/include/asm/futex.h @@ -90,6 +90,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; + preempt_disable_rt(); + __asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n" "1: " TUSER(ldr) " %1, [%4]\n" " teq %1, %2\n" @@ -101,6 +103,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : "cc", "memory"); *uval = val; + + preempt_enable_rt(); return ret; } diff --git a/arch/arm/mach-at91/pm.h b/arch/arm/mach-at91/pm.h index c5101dcb4fb0..1d4df3b70ebc 100644 --- a/arch/arm/mach-at91/pm.h +++ b/arch/arm/mach-at91/pm.h @@ -45,7 +45,7 @@ static inline void at91rm9200_standby(void) " mcr p15, 0, %0, c7, c0, 4\n\t" " str %5, [%1, %2]" : - : "r" (0), "r" (AT91_BASE_SYS), "r" (AT91RM9200_SDRAMC_LPR), + : "r" (0), "r" (at91_ramc_base[0]), "r" (AT91RM9200_SDRAMC_LPR), "r" (1), "r" (AT91RM9200_SDRAMC_SRR), "r" (lpr)); } diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 77a7fe6cbd4a..12903cb4b40c 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -431,6 +431,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, if (addr < TASK_SIZE) return do_page_fault(addr, fsr, regs); + if (interrupts_enabled(regs)) + local_irq_enable(); + if (user_mode(regs)) goto bad_area; @@ -507,6 +510,9 @@ do_translation_fault(unsigned long addr, unsigned int fsr, static int do_sect_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { + if (interrupts_enabled(regs)) + local_irq_enable(); + do_bad_area(addr, fsr, regs); return 0; } diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index a9eee33dfa62..101a42bde728 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -151,6 +151,15 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, { unsigned int cpu = smp_processor_id(); + /* + * init_mm.pgd does not contain any user mappings and it is always + * active for kernel addresses in TTBR1. Just set the reserved TTBR0. + */ + if (next == &init_mm) { + cpu_set_reserved_ttbr0(); + return; + } + if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next) check_and_switch_context(next, tsk); } diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 90c4c129f71a..dff33006322b 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -310,25 +310,6 @@ static void __init setup_machine_fdt(phys_addr_t dt_phys) } } -/* - * Limit the memory size that was specified via FDT. - */ -static int __init early_mem(char *p) -{ - phys_addr_t limit; - - if (!p) - return 1; - - limit = memparse(p, &p) & PAGE_MASK; - pr_notice("Memory limited to %lldMB\n", limit >> 20); - - memblock_enforce_memory_limit(limit); - - return 0; -} -early_param("mem", early_mem); - static void __init request_standard_resources(void) { struct memblock_region *region; diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 4164c5ace9f8..de3abbe6c59f 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -55,6 +55,7 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size, flags |= GFP_DMA; if (IS_ENABLED(CONFIG_DMA_CMA)) { struct page *page; + void *addr; size = PAGE_ALIGN(size); page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, @@ -63,7 +64,10 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size, return NULL; *dma_handle = phys_to_dma(dev, page_to_phys(page)); - return page_address(page); + addr = page_address(page); + if (flags & __GFP_ZERO) + memset(addr, 0, size); + return addr; } else { return swiotlb_alloc_coherent(dev, size, dma_handle, flags); } diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index cc3339d0a276..2d8f884d7f92 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -124,10 +124,29 @@ static void arm64_memory_present(void) } #endif +static phys_addr_t memory_limit = (phys_addr_t)ULLONG_MAX; + +/* + * Limit the memory size that was specified via FDT. + */ +static int __init early_mem(char *p) +{ + if (!p) + return 1; + + memory_limit = memparse(p, &p) & PAGE_MASK; + pr_notice("Memory limited to %lldMB\n", memory_limit >> 20); + + return 0; +} +early_param("mem", early_mem); + void __init arm64_memblock_init(void) { u64 *reserve_map, base, size; + memblock_enforce_memory_limit(memory_limit); + /* * Register the kernel text, kernel data, initrd, and initial * pagetables with memblock. diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 4be046bfc673..334d3559bb4d 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -125,7 +125,7 @@ void *kmap_coherent(struct page *page, unsigned long addr) BUG_ON(Page_dcache_dirty(page)); - pagefault_disable(); + raw_pagefault_disable(); idx = (addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1); #ifdef CONFIG_MIPS_MT_SMTC idx += FIX_N_COLOURS * smp_processor_id() + @@ -192,7 +192,7 @@ void kunmap_coherent(void) write_c0_entryhi(old_ctx); EXIT_CRITICAL(flags); #endif - pagefault_enable(); + raw_pagefault_enable(); } void copy_user_highpage(struct page *to, struct page *from, diff --git a/arch/parisc/hpux/fs.c b/arch/parisc/hpux/fs.c index 2bedafea3d94..97a7bf8df348 100644 --- a/arch/parisc/hpux/fs.c +++ b/arch/parisc/hpux/fs.c @@ -56,11 +56,12 @@ struct getdents_callback { #define NAME_OFFSET(de) ((int) ((de)->d_name - (char __user *) (de))) -static int filldir(void * __buf, const char * name, int namlen, loff_t offset, - u64 ino, unsigned d_type) +static int filldir(struct dir_context *ctx, const char *name, int namlen, + loff_t offset, u64 ino, unsigned d_type) { struct hpux_dirent __user * dirent; - struct getdents_callback * buf = (struct getdents_callback *) __buf; + struct getdents_callback *buf = + container_of(ctx, struct getdents_callback, ctx); ino_t d_ino; int reclen = ALIGN(NAME_OFFSET(dirent) + namlen + 1, sizeof(long)); diff --git a/arch/powerpc/boot/dts/fsl/pq3-etsec2-0.dtsi b/arch/powerpc/boot/dts/fsl/pq3-etsec2-0.dtsi index 1382fec9e8c5..7fcb1ac0f232 100644 --- a/arch/powerpc/boot/dts/fsl/pq3-etsec2-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/pq3-etsec2-0.dtsi @@ -50,6 +50,7 @@ ethernet@b0000 { fsl,num_tx_queues = <0x8>; fsl,magic-packet; local-mac-address = [ 00 00 00 00 00 00 ]; + ranges; queue-group@b0000 { #address-cells = <1>; diff --git a/arch/powerpc/boot/dts/fsl/pq3-etsec2-1.dtsi b/arch/powerpc/boot/dts/fsl/pq3-etsec2-1.dtsi index 221cd2ea5b31..9f25427c1527 100644 --- a/arch/powerpc/boot/dts/fsl/pq3-etsec2-1.dtsi +++ b/arch/powerpc/boot/dts/fsl/pq3-etsec2-1.dtsi @@ -50,6 +50,7 @@ ethernet@b1000 { fsl,num_tx_queues = <0x8>; fsl,magic-packet; local-mac-address = [ 00 00 00 00 00 00 ]; + ranges; queue-group@b1000 { #address-cells = <1>; diff --git a/arch/powerpc/boot/dts/fsl/pq3-etsec2-2.dtsi b/arch/powerpc/boot/dts/fsl/pq3-etsec2-2.dtsi index 61456c317609..cd7c318ab131 100644 --- a/arch/powerpc/boot/dts/fsl/pq3-etsec2-2.dtsi +++ b/arch/powerpc/boot/dts/fsl/pq3-etsec2-2.dtsi @@ -49,6 +49,7 @@ ethernet@b2000 { fsl,num_tx_queues = <0x8>; fsl,magic-packet; local-mac-address = [ 00 00 00 00 00 00 ]; + ranges; queue-group@b2000 { #address-cells = <1>; diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 38d507306a11..5193116eadc0 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1422,7 +1422,7 @@ machine_check_handle_early: bne 9f /* continue in V mode if we are. */ 5: -#ifdef CONFIG_KVM_BOOK3S_64_HV +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER /* * We are coming from kernel context. Check if we are coming from * guest. if yes, then we can continue. We will fall through diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index cde4e0a095ae..bf3829242aff 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -24,10 +24,10 @@ static struct kobject *mobility_kobj; struct update_props_workarea { - u32 phandle; - u32 state; - u64 reserved; - u32 nprops; + __be32 phandle; + __be32 state; + __be64 reserved; + __be32 nprops; } __packed; #define NODE_ACTION_MASK 0xff000000 @@ -53,11 +53,11 @@ static int mobility_rtas_call(int token, char *buf, s32 scope) return rc; } -static int delete_dt_node(u32 phandle) +static int delete_dt_node(__be32 phandle) { struct device_node *dn; - dn = of_find_node_by_phandle(phandle); + dn = of_find_node_by_phandle(be32_to_cpu(phandle)); if (!dn) return -ENOENT; @@ -126,7 +126,7 @@ static int update_dt_property(struct device_node *dn, struct property **prop, return 0; } -static int update_dt_node(u32 phandle, s32 scope) +static int update_dt_node(__be32 phandle, s32 scope) { struct update_props_workarea *upwa; struct device_node *dn; @@ -135,6 +135,7 @@ static int update_dt_node(u32 phandle, s32 scope) char *prop_data; char *rtas_buf; int update_properties_token; + u32 nprops; u32 vd; update_properties_token = rtas_token("ibm,update-properties"); @@ -145,7 +146,7 @@ static int update_dt_node(u32 phandle, s32 scope) if (!rtas_buf) return -ENOMEM; - dn = of_find_node_by_phandle(phandle); + dn = of_find_node_by_phandle(be32_to_cpu(phandle)); if (!dn) { kfree(rtas_buf); return -ENOENT; @@ -161,6 +162,7 @@ static int update_dt_node(u32 phandle, s32 scope) break; prop_data = rtas_buf + sizeof(*upwa); + nprops = be32_to_cpu(upwa->nprops); /* On the first call to ibm,update-properties for a node the * the first property value descriptor contains an empty @@ -169,17 +171,17 @@ static int update_dt_node(u32 phandle, s32 scope) */ if (*prop_data == 0) { prop_data++; - vd = *(u32 *)prop_data; + vd = be32_to_cpu(*(__be32 *)prop_data); prop_data += vd + sizeof(vd); - upwa->nprops--; + nprops--; } - for (i = 0; i < upwa->nprops; i++) { + for (i = 0; i < nprops; i++) { char *prop_name; prop_name = prop_data; prop_data += strlen(prop_name) + 1; - vd = *(u32 *)prop_data; + vd = be32_to_cpu(*(__be32 *)prop_data); prop_data += sizeof(vd); switch (vd) { @@ -211,13 +213,13 @@ static int update_dt_node(u32 phandle, s32 scope) return 0; } -static int add_dt_node(u32 parent_phandle, u32 drc_index) +static int add_dt_node(__be32 parent_phandle, __be32 drc_index) { struct device_node *dn; struct device_node *parent_dn; int rc; - parent_dn = of_find_node_by_phandle(parent_phandle); + parent_dn = of_find_node_by_phandle(be32_to_cpu(parent_phandle)); if (!parent_dn) return -ENOENT; @@ -236,7 +238,7 @@ static int add_dt_node(u32 parent_phandle, u32 drc_index) int pseries_devicetree_update(s32 scope) { char *rtas_buf; - u32 *data; + __be32 *data; int update_nodes_token; int rc; @@ -253,17 +255,17 @@ int pseries_devicetree_update(s32 scope) if (rc && rc != 1) break; - data = (u32 *)rtas_buf + 4; - while (*data & NODE_ACTION_MASK) { + data = (__be32 *)rtas_buf + 4; + while (be32_to_cpu(*data) & NODE_ACTION_MASK) { int i; - u32 action = *data & NODE_ACTION_MASK; - int node_count = *data & NODE_COUNT_MASK; + u32 action = be32_to_cpu(*data) & NODE_ACTION_MASK; + u32 node_count = be32_to_cpu(*data) & NODE_COUNT_MASK; data++; for (i = 0; i < node_count; i++) { - u32 phandle = *data++; - u32 drc_index; + __be32 phandle = *data++; + __be32 drc_index; switch (action) { case DELETE_DT_NODE: diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 617b9fe33771..3ccb6777a7e1 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -960,6 +960,8 @@ out: cpuc->pcr[0] |= cpuc->event[0]->hw.config_base; } +static void sparc_pmu_start(struct perf_event *event, int flags); + /* On this PMU each PIC has it's own PCR control register. */ static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc) { @@ -972,20 +974,13 @@ static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc) struct perf_event *cp = cpuc->event[i]; struct hw_perf_event *hwc = &cp->hw; int idx = hwc->idx; - u64 enc; if (cpuc->current_idx[i] != PIC_NO_INDEX) continue; - sparc_perf_event_set_period(cp, hwc, idx); cpuc->current_idx[i] = idx; - enc = perf_event_get_enc(cpuc->events[i]); - cpuc->pcr[idx] &= ~mask_for_index(idx); - if (hwc->state & PERF_HES_STOPPED) - cpuc->pcr[idx] |= nop_for_index(idx); - else - cpuc->pcr[idx] |= event_encoding(enc, idx); + sparc_pmu_start(cp, PERF_EF_RELOAD); } out: for (i = 0; i < cpuc->n_events; i++) { @@ -1101,7 +1096,6 @@ static void sparc_pmu_del(struct perf_event *event, int _flags) int i; local_irq_save(flags); - perf_pmu_disable(event->pmu); for (i = 0; i < cpuc->n_events; i++) { if (event == cpuc->event[i]) { @@ -1127,7 +1121,6 @@ static void sparc_pmu_del(struct perf_event *event, int _flags) } } - perf_pmu_enable(event->pmu); local_irq_restore(flags); } @@ -1361,7 +1354,6 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags) unsigned long flags; local_irq_save(flags); - perf_pmu_disable(event->pmu); n0 = cpuc->n_events; if (n0 >= sparc_pmu->max_hw_events) @@ -1394,7 +1386,6 @@ nocheck: ret = 0; out: - perf_pmu_enable(event->pmu); local_irq_restore(flags); return ret; } diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index c6f7113b6e2f..1a79d6877981 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -281,6 +281,8 @@ void arch_trigger_all_cpu_backtrace(void) printk(" TPC[%lx] O7[%lx] I7[%lx] RPC[%lx]\n", gp->tpc, gp->o7, gp->i7, gp->rpc); } + + touch_nmi_watchdog(); } memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot)); @@ -356,6 +358,8 @@ static void pmu_snapshot_all_cpus(void) (cpu == this_cpu ? '*' : ' '), cpu, pp->pcr[0], pp->pcr[1], pp->pcr[2], pp->pcr[3], pp->pic[0], pp->pic[1], pp->pic[2], pp->pic[3]); + + touch_nmi_watchdog(); } memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot)); diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index beb0b5a5f21f..25db14a33d03 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -332,7 +332,7 @@ SYSCALL_DEFINE6(sparc_ipc, unsigned int, call, int, first, unsigned long, second long err; /* No need for backward compatibility. We can start fresh... */ - if (call <= SEMCTL) { + if (call <= SEMTIMEDOP) { switch (call) { case SEMOP: err = sys_semtimedop(first, ptr, diff --git a/arch/sparc/lib/memmove.S b/arch/sparc/lib/memmove.S index b7f6334e159f..857ad4f8905f 100644 --- a/arch/sparc/lib/memmove.S +++ b/arch/sparc/lib/memmove.S @@ -8,9 +8,11 @@ .text ENTRY(memmove) /* o0=dst o1=src o2=len */ - mov %o0, %g1 + brz,pn %o2, 99f + mov %o0, %g1 + cmp %o0, %o1 - bleu,pt %xcc, memcpy + bleu,pt %xcc, 2f add %o1, %o2, %g7 cmp %g7, %o0 bleu,pt %xcc, memcpy @@ -24,7 +26,34 @@ ENTRY(memmove) /* o0=dst o1=src o2=len */ stb %g7, [%o0] bne,pt %icc, 1b sub %o0, 1, %o0 - +99: retl mov %g1, %o0 + + /* We can't just call memcpy for these memmove cases. On some + * chips the memcpy uses cache initializing stores and when dst + * and src are close enough, those can clobber the source data + * before we've loaded it in. + */ +2: or %o0, %o1, %g7 + or %o2, %g7, %g7 + andcc %g7, 0x7, %g0 + bne,pn %xcc, 4f + nop + +3: ldx [%o1], %g7 + add %o1, 8, %o1 + subcc %o2, 8, %o2 + add %o0, 8, %o0 + bne,pt %icc, 3b + stx %g7, [%o0 - 0x8] + ba,a,pt %xcc, 99b + +4: ldub [%o1], %g7 + add %o1, 1, %o1 + subcc %o2, 1, %o2 + add %o0, 1, %o0 + bne,pt %icc, 4b + stb %g7, [%o0 - 0x1] + ba,a,pt %xcc, 99b ENDPROC(memmove) diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c index cfbe53c17b0d..09daebdee552 100644 --- a/arch/sparc/mm/srmmu.c +++ b/arch/sparc/mm/srmmu.c @@ -460,10 +460,12 @@ static void __init sparc_context_init(int numctx) void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, struct task_struct *tsk) { + unsigned long flags; + if (mm->context == NO_CONTEXT) { - spin_lock(&srmmu_context_spinlock); + spin_lock_irqsave(&srmmu_context_spinlock, flags); alloc_context(old_mm, mm); - spin_unlock(&srmmu_context_spinlock); + spin_unlock_irqrestore(&srmmu_context_spinlock, flags); srmmu_ctxd_set(&srmmu_context_table[mm->context], mm->pgd); } @@ -988,14 +990,15 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) void destroy_context(struct mm_struct *mm) { + unsigned long flags; if (mm->context != NO_CONTEXT) { flush_cache_mm(mm); srmmu_ctxd_set(&srmmu_context_table[mm->context], srmmu_swapper_pg_dir); flush_tlb_mm(mm); - spin_lock(&srmmu_context_spinlock); + spin_lock_irqsave(&srmmu_context_spinlock, flags); free_context(mm->context); - spin_unlock(&srmmu_context_spinlock); + spin_unlock_irqrestore(&srmmu_context_spinlock, flags); mm->context = NO_CONTEXT; } } diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index df965e5dbb72..694e2ac4d65e 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -1111,7 +1111,7 @@ static int __driver_rfc4106_decrypt(struct aead_request *req) src = kmalloc(req->cryptlen + req->assoclen, GFP_ATOMIC); if (!src) return -ENOMEM; - assoc = (src + req->cryptlen + auth_tag_len); + assoc = (src + req->cryptlen); scatterwalk_map_and_copy(src, req->src, 0, req->cryptlen, 0); scatterwalk_map_and_copy(assoc, req->assoc, 0, req->assoclen, 0); @@ -1136,7 +1136,7 @@ static int __driver_rfc4106_decrypt(struct aead_request *req) scatterwalk_done(&src_sg_walk, 0, 0); scatterwalk_done(&assoc_sg_walk, 0, 0); } else { - scatterwalk_map_and_copy(dst, req->dst, 0, req->cryptlen, 1); + scatterwalk_map_and_copy(dst, req->dst, 0, tempCipherLen, 1); kfree(src); } return retval; diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index cea1c76d49bf..1ac1c009090d 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -368,7 +368,7 @@ static inline void drop_fpu(struct task_struct *tsk) preempt_disable(); tsk->thread.fpu_counter = 0; __drop_fpu(tsk); - clear_used_math(); + clear_stopped_child_used_math(tsk); preempt_enable(); } diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 0b46ef261c77..c2c8a7828f04 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -611,9 +611,9 @@ struct bau_control { cycles_t send_message; cycles_t period_end; cycles_t period_time; - spinlock_t uvhub_lock; - spinlock_t queue_lock; - spinlock_t disable_lock; + raw_spinlock_t uvhub_lock; + raw_spinlock_t queue_lock; + raw_spinlock_t disable_lock; /* tunables */ int max_concurr; int max_concurr_const; @@ -773,15 +773,15 @@ static inline int atom_asr(short i, struct atomic_short *v) * to be lowered below the current 'v'. atomic_add_unless can only stop * on equal. */ -static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u) +static inline int atomic_inc_unless_ge(raw_spinlock_t *lock, atomic_t *v, int u) { - spin_lock(lock); + raw_spin_lock(lock); if (atomic_read(v) >= u) { - spin_unlock(lock); + raw_spin_unlock(lock); return 0; } atomic_inc(v); - spin_unlock(lock); + raw_spin_unlock(lock); return 1; } diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index a30836c8ac4d..f559a092d8fa 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -502,7 +502,7 @@ struct uv_blade_info { unsigned short nr_online_cpus; unsigned short pnode; short memory_nid; - spinlock_t nmi_lock; /* obsolete, see uv_hub_nmi */ + raw_spinlock_t nmi_lock; /* obsolete, see uv_hub_nmi */ unsigned long nmi_count; /* obsolete, see uv_hub_nmi */ }; extern struct uv_blade_info *uv_blade_info; diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index d263b1307de1..1ef690e5459f 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -911,7 +911,7 @@ void __init uv_system_init(void) uv_blade_info[blade].pnode = pnode; uv_blade_info[blade].nr_possible_cpus = 0; uv_blade_info[blade].nr_online_cpus = 0; - spin_lock_init(&uv_blade_info[blade].nmi_lock); + raw_spin_lock_init(&uv_blade_info[blade].nmi_lock); min_pnode = min(pnode, min_pnode); max_pnode = max(pnode, max_pnode); blade++; diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index c752cb43e52f..a6aa91f77654 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -181,6 +181,16 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { }, }, + /* ASRock */ + { /* Handle problems with rebooting on ASRock Q1900DC-ITX */ + .callback = set_pci_reboot, + .ident = "ASRock Q1900DC-ITX", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "ASRock"), + DMI_MATCH(DMI_BOARD_NAME, "Q1900DC-ITX"), + }, + }, + /* ASUS */ { /* Handle problems with rebooting on ASUS P4S800 */ .callback = set_bios_reboot, diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index dd50e26c58f6..7a09aca4b33a 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -375,7 +375,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) * thread's fpu state, reconstruct fxstate from the fsave * header. Sanitize the copied state etc. */ - struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; + struct fpu *fpu = &tsk->thread.fpu; struct user_i387_ia32_struct env; int err = 0; @@ -389,14 +389,15 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) */ drop_fpu(tsk); - if (__copy_from_user(xsave, buf_fx, state_size) || + if (__copy_from_user(&fpu->state->xsave, buf_fx, state_size) || __copy_from_user(&env, buf, sizeof(env))) { + fpu_finit(fpu); err = -1; } else { sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only); - set_used_math(); } + set_used_math(); if (use_eager_fpu()) { preempt_disable(); math_state_restore(); diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index dfe605ac1bcd..999fd5be9aa1 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -719,9 +719,9 @@ static void destination_plugged(struct bau_desc *bau_desc, quiesce_local_uvhub(hmaster); - spin_lock(&hmaster->queue_lock); + raw_spin_lock(&hmaster->queue_lock); reset_with_ipi(&bau_desc->distribution, bcp); - spin_unlock(&hmaster->queue_lock); + raw_spin_unlock(&hmaster->queue_lock); end_uvhub_quiesce(hmaster); @@ -741,9 +741,9 @@ static void destination_timeout(struct bau_desc *bau_desc, quiesce_local_uvhub(hmaster); - spin_lock(&hmaster->queue_lock); + raw_spin_lock(&hmaster->queue_lock); reset_with_ipi(&bau_desc->distribution, bcp); - spin_unlock(&hmaster->queue_lock); + raw_spin_unlock(&hmaster->queue_lock); end_uvhub_quiesce(hmaster); @@ -764,7 +764,7 @@ static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat) cycles_t tm1; hmaster = bcp->uvhub_master; - spin_lock(&hmaster->disable_lock); + raw_spin_lock(&hmaster->disable_lock); if (!bcp->baudisabled) { stat->s_bau_disabled++; tm1 = get_cycles(); @@ -777,7 +777,7 @@ static void disable_for_period(struct bau_control *bcp, struct ptc_stats *stat) } } } - spin_unlock(&hmaster->disable_lock); + raw_spin_unlock(&hmaster->disable_lock); } static void count_max_concurr(int stat, struct bau_control *bcp, @@ -840,7 +840,7 @@ static void record_send_stats(cycles_t time1, cycles_t time2, */ static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat) { - spinlock_t *lock = &hmaster->uvhub_lock; + raw_spinlock_t *lock = &hmaster->uvhub_lock; atomic_t *v; v = &hmaster->active_descriptor_count; @@ -972,7 +972,7 @@ static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) struct bau_control *hmaster; hmaster = bcp->uvhub_master; - spin_lock(&hmaster->disable_lock); + raw_spin_lock(&hmaster->disable_lock); if (bcp->baudisabled && (get_cycles() >= bcp->set_bau_on_time)) { stat->s_bau_reenabled++; for_each_present_cpu(tcpu) { @@ -984,10 +984,10 @@ static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) tbcp->period_giveups = 0; } } - spin_unlock(&hmaster->disable_lock); + raw_spin_unlock(&hmaster->disable_lock); return 0; } - spin_unlock(&hmaster->disable_lock); + raw_spin_unlock(&hmaster->disable_lock); return -1; } @@ -1895,9 +1895,9 @@ static void __init init_per_cpu_tunables(void) bcp->cong_reps = congested_reps; bcp->disabled_period = sec_2_cycles(disabled_period); bcp->giveup_limit = giveup_limit; - spin_lock_init(&bcp->queue_lock); - spin_lock_init(&bcp->uvhub_lock); - spin_lock_init(&bcp->disable_lock); + raw_spin_lock_init(&bcp->queue_lock); + raw_spin_lock_init(&bcp->uvhub_lock); + raw_spin_lock_init(&bcp->disable_lock); } } diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c index f90444f2fc7f..8ca505846a0b 100644 --- a/arch/x86/platform/uv/uv_time.c +++ b/arch/x86/platform/uv/uv_time.c @@ -58,7 +58,7 @@ static DEFINE_PER_CPU(struct clock_event_device, cpu_ced); /* There is one of these allocated per node */ struct uv_rtc_timer_head { - spinlock_t lock; + raw_spinlock_t lock; /* next cpu waiting for timer, local node relative: */ int next_cpu; /* number of cpus on this node: */ @@ -178,7 +178,7 @@ static __init int uv_rtc_allocate_timers(void) uv_rtc_deallocate_timers(); return -ENOMEM; } - spin_lock_init(&head->lock); + raw_spin_lock_init(&head->lock); head->ncpus = uv_blade_nr_possible_cpus(bid); head->next_cpu = -1; blade_info[bid] = head; @@ -232,7 +232,7 @@ static int uv_rtc_set_timer(int cpu, u64 expires) unsigned long flags; int next_cpu; - spin_lock_irqsave(&head->lock, flags); + raw_spin_lock_irqsave(&head->lock, flags); next_cpu = head->next_cpu; *t = expires; @@ -244,12 +244,12 @@ static int uv_rtc_set_timer(int cpu, u64 expires) if (uv_setup_intr(cpu, expires)) { *t = ULLONG_MAX; uv_rtc_find_next_timer(head, pnode); - spin_unlock_irqrestore(&head->lock, flags); + raw_spin_unlock_irqrestore(&head->lock, flags); return -ETIME; } } - spin_unlock_irqrestore(&head->lock, flags); + raw_spin_unlock_irqrestore(&head->lock, flags); return 0; } @@ -268,7 +268,7 @@ static int uv_rtc_unset_timer(int cpu, int force) unsigned long flags; int rc = 0; - spin_lock_irqsave(&head->lock, flags); + raw_spin_lock_irqsave(&head->lock, flags); if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force) rc = 1; @@ -280,7 +280,7 @@ static int uv_rtc_unset_timer(int cpu, int force) uv_rtc_find_next_timer(head, pnode); } - spin_unlock_irqrestore(&head->lock, flags); + raw_spin_unlock_irqrestore(&head->lock, flags); return rc; } @@ -300,13 +300,18 @@ static int uv_rtc_unset_timer(int cpu, int force) static cycle_t uv_read_rtc(struct clocksource *cs) { unsigned long offset; + cycle_t cycles; + preempt_disable(); if (uv_get_min_hub_revision_id() == 1) offset = 0; else offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE; - return (cycle_t)uv_read_local_mmr(UVH_RTC | offset); + cycles = (cycle_t)uv_read_local_mmr(UVH_RTC | offset); + preempt_enable(); + + return cycles; } /* diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index 7f517ca19d32..ec255a1646d2 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -322,6 +322,7 @@ 313 common finit_module sys_finit_module 314 common sched_setattr sys_sched_setattr 315 common sched_getattr sys_sched_getattr +316 common renameat2 sys_renameat2 # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/arch/x86/vdso/vdso32/sigreturn.S b/arch/x86/vdso/vdso32/sigreturn.S index 31776d0efc8c..d7ec4e251c0a 100644 --- a/arch/x86/vdso/vdso32/sigreturn.S +++ b/arch/x86/vdso/vdso32/sigreturn.S @@ -17,6 +17,7 @@ .text .globl __kernel_sigreturn .type __kernel_sigreturn,@function + nop /* this guy is needed for .LSTARTFDEDLSI1 below (watch for HACK) */ ALIGN __kernel_sigreturn: .LSTART_sigreturn: diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 17f9ec501972..fd8496a92b45 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -962,7 +962,7 @@ static int acpi_processor_setup_cpuidle_states(struct acpi_processor *pr) return -EINVAL; drv->safe_state_index = -1; - for (i = 0; i < CPUIDLE_STATE_MAX; i++) { + for (i = CPUIDLE_DRIVER_STATE_START; i < CPUIDLE_STATE_MAX; i++) { drv->states[i].name[0] = '\0'; drv->states[i].desc[0] = '\0'; } diff --git a/drivers/base/regmap/regcache-rbtree.c b/drivers/base/regmap/regcache-rbtree.c index 930cad4e5df8..2b946bc4212d 100644 --- a/drivers/base/regmap/regcache-rbtree.c +++ b/drivers/base/regmap/regcache-rbtree.c @@ -313,7 +313,7 @@ static int regcache_rbtree_insert_to_block(struct regmap *map, if (pos == 0) { memmove(blk + offset * map->cache_word_size, blk, rbnode->blklen * map->cache_word_size); - bitmap_shift_right(present, present, offset, blklen); + bitmap_shift_left(present, present, offset, blklen); } /* update the rbnode block, its size and the base register */ diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 55298db36b2d..d18093681af2 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -814,10 +814,6 @@ static int __init nbd_init(void) return -EINVAL; } - nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL); - if (!nbd_dev) - return -ENOMEM; - part_shift = 0; if (max_part > 0) { part_shift = fls(max_part); @@ -839,6 +835,10 @@ static int __init nbd_init(void) if (nbds_max > 1UL << (MINORBITS - part_shift)) return -EINVAL; + nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL); + if (!nbd_dev) + return -ENOMEM; + for (i = 0; i < nbds_max; i++) { struct gendisk *disk = alloc_disk(1 << part_shift); if (!disk) diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c index eff9d5870034..102463ba745d 100644 --- a/drivers/char/tpm/tpm_ibmvtpm.c +++ b/drivers/char/tpm/tpm_ibmvtpm.c @@ -124,7 +124,7 @@ static int tpm_ibmvtpm_send(struct tpm_chip *chip, u8 *buf, size_t count) { struct ibmvtpm_dev *ibmvtpm; struct ibmvtpm_crq crq; - u64 *word = (u64 *) &crq; + __be64 *word = (__be64 *)&crq; int rc; ibmvtpm = (struct ibmvtpm_dev *)TPM_VPRIV(chip); @@ -145,11 +145,11 @@ static int tpm_ibmvtpm_send(struct tpm_chip *chip, u8 *buf, size_t count) memcpy((void *)ibmvtpm->rtce_buf, (void *)buf, count); crq.valid = (u8)IBMVTPM_VALID_CMD; crq.msg = (u8)VTPM_TPM_COMMAND; - crq.len = (u16)count; - crq.data = ibmvtpm->rtce_dma_handle; + crq.len = cpu_to_be16(count); + crq.data = cpu_to_be32(ibmvtpm->rtce_dma_handle); - rc = ibmvtpm_send_crq(ibmvtpm->vdev, cpu_to_be64(word[0]), - cpu_to_be64(word[1])); + rc = ibmvtpm_send_crq(ibmvtpm->vdev, be64_to_cpu(word[0]), + be64_to_cpu(word[1])); if (rc != H_SUCCESS) { dev_err(ibmvtpm->dev, "tpm_ibmvtpm_send failed rc=%d\n", rc); rc = 0; diff --git a/drivers/char/tpm/tpm_ibmvtpm.h b/drivers/char/tpm/tpm_ibmvtpm.h index bd82a791f995..b2c231b1beec 100644 --- a/drivers/char/tpm/tpm_ibmvtpm.h +++ b/drivers/char/tpm/tpm_ibmvtpm.h @@ -22,9 +22,9 @@ struct ibmvtpm_crq { u8 valid; u8 msg; - u16 len; - u32 data; - u64 reserved; + __be16 len; + __be32 data; + __be64 reserved; } __attribute__((packed, aligned(8))); struct ibmvtpm_crq_queue { diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 6928d094451d..b08eadb4b1d2 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -142,6 +142,7 @@ struct ports_device { * notification */ struct work_struct control_work; + struct work_struct config_work; struct list_head ports; @@ -1832,10 +1833,21 @@ static void config_intr(struct virtio_device *vdev) portdev = vdev->priv; + if (!use_multiport(portdev)) + schedule_work(&portdev->config_work); +} + +static void config_work_handler(struct work_struct *work) +{ + struct ports_device *portdev; + + portdev = container_of(work, struct ports_device, control_work); if (!use_multiport(portdev)) { + struct virtio_device *vdev; struct port *port; u16 rows, cols; + vdev = portdev->vdev; virtio_cread(vdev, struct virtio_console_config, cols, &cols); virtio_cread(vdev, struct virtio_console_config, rows, &rows); @@ -2024,12 +2036,14 @@ static int virtcons_probe(struct virtio_device *vdev) spin_lock_init(&portdev->ports_lock); INIT_LIST_HEAD(&portdev->ports); + INIT_WORK(&portdev->config_work, &config_work_handler); + INIT_WORK(&portdev->control_work, &control_work_handler); + if (multiport) { unsigned int nr_added_bufs; spin_lock_init(&portdev->c_ivq_lock); spin_lock_init(&portdev->c_ovq_lock); - INIT_WORK(&portdev->control_work, &control_work_handler); nr_added_bufs = fill_queue(portdev->c_ivq, &portdev->c_ivq_lock); @@ -2097,6 +2111,8 @@ static void virtcons_remove(struct virtio_device *vdev) /* Finish up work that's lined up */ if (use_multiport(portdev)) cancel_work_sync(&portdev->control_work); + else + cancel_work_sync(&portdev->config_work); list_for_each_entry_safe(port, port2, &portdev->ports, list) unplug_port(port); @@ -2148,6 +2164,7 @@ static int virtcons_freeze(struct virtio_device *vdev) virtqueue_disable_cb(portdev->c_ivq); cancel_work_sync(&portdev->control_work); + cancel_work_sync(&portdev->config_work); /* * Once more: if control_work_handler() was running, it would * enable the cb as the last step. diff --git a/drivers/clocksource/time-efm32.c b/drivers/clocksource/time-efm32.c index 0c03d4c9ba90..78664dc12bb4 100644 --- a/drivers/clocksource/time-efm32.c +++ b/drivers/clocksource/time-efm32.c @@ -229,12 +229,12 @@ static int __init efm32_clockevent_init(struct device_node *np) clock_event_ddata.base = base; clock_event_ddata.periodic_top = DIV_ROUND_CLOSEST(rate, 1024 * HZ); - setup_irq(irq, &efm32_clock_event_irq); - clockevents_config_and_register(&clock_event_ddata.evtdev, DIV_ROUND_CLOSEST(rate, 1024), 0xf, 0xffff); + setup_irq(irq, &efm32_clock_event_irq); + return 0; err_get_irq: diff --git a/drivers/clocksource/timer-sun5i.c b/drivers/clocksource/timer-sun5i.c index 7756191a7ba4..0850b325889f 100644 --- a/drivers/clocksource/timer-sun5i.c +++ b/drivers/clocksource/timer-sun5i.c @@ -176,10 +176,6 @@ static void __init sun5i_timer_init(struct device_node *node) ticks_per_jiffy = DIV_ROUND_UP(rate, HZ); - ret = setup_irq(irq, &sun5i_timer_irq); - if (ret) - pr_warn("failed to setup irq %d\n", irq); - /* Enable timer0 interrupt */ val = readl(timer_base + TIMER_IRQ_EN_REG); writel(val | TIMER_IRQ_EN(0), timer_base + TIMER_IRQ_EN_REG); @@ -189,6 +185,10 @@ static void __init sun5i_timer_init(struct device_node *node) clockevents_config_and_register(&sun5i_clockevent, rate, TIMER_SYNC_TICKS, 0xffffffff); + + ret = setup_irq(irq, &sun5i_timer_irq); + if (ret) + pr_warn("failed to setup irq %d\n", irq); } CLOCKSOURCE_OF_DECLARE(sun5i_a13, "allwinner,sun5i-a13-hstimer", sun5i_timer_init); diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index a55e68f2cfc8..e3d2052e7552 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -252,9 +252,6 @@ int cpuidle_enable_device(struct cpuidle_device *dev) if (!dev->registered) return -EINVAL; - if (!dev->state_count) - dev->state_count = drv->state_count; - ret = cpuidle_add_device_sysfs(dev); if (ret) return ret; diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index e918b6d0caf7..dcaae4c8bc08 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -398,7 +398,7 @@ static int cpuidle_add_state_sysfs(struct cpuidle_device *device) struct cpuidle_driver *drv = cpuidle_get_cpu_driver(device); /* state statistics */ - for (i = 0; i < device->state_count; i++) { + for (i = 0; i < drv->state_count; i++) { kobj = kzalloc(sizeof(struct cpuidle_state_kobj), GFP_KERNEL); if (!kobj) goto error_state; @@ -430,9 +430,10 @@ error_state: */ static void cpuidle_remove_state_sysfs(struct cpuidle_device *device) { + struct cpuidle_driver *drv = cpuidle_get_cpu_driver(device); int i; - for (i = 0; i < device->state_count; i++) + for (i = 0; i < drv->state_count; i++) cpuidle_free_state_kobj(device, i); } diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c index 453822cc4f9d..fe8b0c991518 100644 --- a/drivers/dma/dw/platform.c +++ b/drivers/dma/dw/platform.c @@ -48,6 +48,8 @@ static bool dw_dma_of_filter(struct dma_chan *chan, void *param) return true; } +#define DRV_NAME "dw_dmac" + static struct dma_chan *dw_dma_of_xlate(struct of_phandle_args *dma_spec, struct of_dma *ofdma) { @@ -293,7 +295,7 @@ static struct platform_driver dw_driver = { .remove = dw_remove, .shutdown = dw_shutdown, .driver = { - .name = "dw_dmac", + .name = DRV_NAME, .pm = &dw_dev_pm_ops, .of_match_table = of_match_ptr(dw_dma_of_id_table), .acpi_match_table = ACPI_PTR(dw_dma_acpi_id_table), @@ -314,3 +316,4 @@ module_exit(dw_exit); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller platform driver"); +MODULE_ALIAS("platform:" DRV_NAME); diff --git a/drivers/dma/omap-dma.c b/drivers/dma/omap-dma.c index 362e7c49f2e1..12f82942e347 100644 --- a/drivers/dma/omap-dma.c +++ b/drivers/dma/omap-dma.c @@ -487,6 +487,7 @@ static int omap_dma_terminate_all(struct omap_chan *c) * c->desc is NULL and exit.) */ if (c->desc) { + omap_dma_desc_free(&c->desc->vd); c->desc = NULL; /* Avoid stopping the dma twice */ if (!c->paused) diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c index 424319061e09..0f057af22fb1 100644 --- a/drivers/gpio/gpio-omap.c +++ b/drivers/gpio/gpio-omap.c @@ -59,7 +59,7 @@ struct gpio_bank { u32 saved_datain; u32 level_mask; u32 toggle_mask; - spinlock_t lock; + raw_spinlock_t lock; struct gpio_chip chip; struct clk *dbck; u32 mod_usage; @@ -503,14 +503,14 @@ static int gpio_irq_type(struct irq_data *d, unsigned type) (type & (IRQ_TYPE_LEVEL_LOW|IRQ_TYPE_LEVEL_HIGH))) return -EINVAL; - spin_lock_irqsave(&bank->lock, flags); + raw_spin_lock_irqsave(&bank->lock, flags); offset = GPIO_INDEX(bank, gpio); retval = _set_gpio_triggering(bank, offset, type); if (!LINE_USED(bank->mod_usage, offset)) { _enable_gpio_module(bank, offset); _set_gpio_direction(bank, offset, 1); } else if (!gpio_is_input(bank, 1 << offset)) { - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); return -EINVAL; } @@ -518,12 +518,12 @@ static int gpio_irq_type(struct irq_data *d, unsigned type) if (retval) { dev_err(bank->dev, "unable to lock offset %d for IRQ\n", offset); - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); return retval; } bank->irq_usage |= 1 << GPIO_INDEX(bank, gpio); - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); if (type & (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH)) __irq_set_handler_locked(d->irq, handle_level_irq); @@ -640,14 +640,14 @@ static int _set_gpio_wakeup(struct gpio_bank *bank, int gpio, int enable) return -EINVAL; } - spin_lock_irqsave(&bank->lock, flags); + raw_spin_lock_irqsave(&bank->lock, flags); if (enable) bank->context.wake_en |= gpio_bit; else bank->context.wake_en &= ~gpio_bit; writel_relaxed(bank->context.wake_en, bank->base + bank->regs->wkup_en); - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); return 0; } @@ -682,7 +682,7 @@ static int omap_gpio_request(struct gpio_chip *chip, unsigned offset) if (!BANK_USED(bank)) pm_runtime_get_sync(bank->dev); - spin_lock_irqsave(&bank->lock, flags); + raw_spin_lock_irqsave(&bank->lock, flags); /* Set trigger to none. You need to enable the desired trigger with * request_irq() or set_irq_type(). Only do this if the IRQ line has * not already been requested. @@ -692,7 +692,7 @@ static int omap_gpio_request(struct gpio_chip *chip, unsigned offset) _enable_gpio_module(bank, offset); } bank->mod_usage |= 1 << offset; - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); return 0; } @@ -702,11 +702,11 @@ static void omap_gpio_free(struct gpio_chip *chip, unsigned offset) struct gpio_bank *bank = container_of(chip, struct gpio_bank, chip); unsigned long flags; - spin_lock_irqsave(&bank->lock, flags); + raw_spin_lock_irqsave(&bank->lock, flags); bank->mod_usage &= ~(1 << offset); _disable_gpio_module(bank, offset); _reset_gpio(bank, bank->chip.base + offset); - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); /* * If this is the last gpio to be freed in the bank, @@ -804,12 +804,12 @@ static void gpio_irq_shutdown(struct irq_data *d) unsigned long flags; unsigned offset = GPIO_INDEX(bank, gpio); - spin_lock_irqsave(&bank->lock, flags); + raw_spin_lock_irqsave(&bank->lock, flags); gpio_unlock_as_irq(&bank->chip, offset); bank->irq_usage &= ~(1 << offset); _disable_gpio_module(bank, offset); _reset_gpio(bank, gpio); - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); /* * If this is the last IRQ to be freed in the bank, @@ -833,10 +833,10 @@ static void gpio_mask_irq(struct irq_data *d) unsigned int gpio = irq_to_gpio(bank, d->hwirq); unsigned long flags; - spin_lock_irqsave(&bank->lock, flags); + raw_spin_lock_irqsave(&bank->lock, flags); _set_gpio_irqenable(bank, gpio, 0); _set_gpio_triggering(bank, GPIO_INDEX(bank, gpio), IRQ_TYPE_NONE); - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); } static void gpio_unmask_irq(struct irq_data *d) @@ -847,7 +847,7 @@ static void gpio_unmask_irq(struct irq_data *d) u32 trigger = irqd_get_trigger_type(d); unsigned long flags; - spin_lock_irqsave(&bank->lock, flags); + raw_spin_lock_irqsave(&bank->lock, flags); if (trigger) _set_gpio_triggering(bank, GPIO_INDEX(bank, gpio), trigger); @@ -859,7 +859,7 @@ static void gpio_unmask_irq(struct irq_data *d) } _set_gpio_irqenable(bank, gpio, 1); - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); } static struct irq_chip gpio_irq_chip = { @@ -882,9 +882,9 @@ static int omap_mpuio_suspend_noirq(struct device *dev) OMAP_MPUIO_GPIO_MASKIT / bank->stride; unsigned long flags; - spin_lock_irqsave(&bank->lock, flags); + raw_spin_lock_irqsave(&bank->lock, flags); writel_relaxed(0xffff & ~bank->context.wake_en, mask_reg); - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); return 0; } @@ -897,9 +897,9 @@ static int omap_mpuio_resume_noirq(struct device *dev) OMAP_MPUIO_GPIO_MASKIT / bank->stride; unsigned long flags; - spin_lock_irqsave(&bank->lock, flags); + raw_spin_lock_irqsave(&bank->lock, flags); writel_relaxed(bank->context.wake_en, mask_reg); - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); return 0; } @@ -942,9 +942,9 @@ static int gpio_input(struct gpio_chip *chip, unsigned offset) unsigned long flags; bank = container_of(chip, struct gpio_bank, chip); - spin_lock_irqsave(&bank->lock, flags); + raw_spin_lock_irqsave(&bank->lock, flags); _set_gpio_direction(bank, offset, 1); - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); return 0; } @@ -968,10 +968,10 @@ static int gpio_output(struct gpio_chip *chip, unsigned offset, int value) unsigned long flags; bank = container_of(chip, struct gpio_bank, chip); - spin_lock_irqsave(&bank->lock, flags); + raw_spin_lock_irqsave(&bank->lock, flags); bank->set_dataout(bank, offset, value); _set_gpio_direction(bank, offset, 0); - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); return 0; } @@ -983,9 +983,9 @@ static int gpio_debounce(struct gpio_chip *chip, unsigned offset, bank = container_of(chip, struct gpio_bank, chip); - spin_lock_irqsave(&bank->lock, flags); + raw_spin_lock_irqsave(&bank->lock, flags); _set_gpio_debounce(bank, offset, debounce); - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); return 0; } @@ -996,9 +996,9 @@ static void gpio_set(struct gpio_chip *chip, unsigned offset, int value) unsigned long flags; bank = container_of(chip, struct gpio_bank, chip); - spin_lock_irqsave(&bank->lock, flags); + raw_spin_lock_irqsave(&bank->lock, flags); bank->set_dataout(bank, offset, value); - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); } /*---------------------------------------------------------------------*/ @@ -1210,7 +1210,7 @@ static int omap_gpio_probe(struct platform_device *pdev) else bank->set_dataout = _set_gpio_dataout_mask; - spin_lock_init(&bank->lock); + raw_spin_lock_init(&bank->lock); /* Static mapping, never released */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -1267,7 +1267,7 @@ static int omap_gpio_runtime_suspend(struct device *dev) unsigned long flags; u32 wake_low, wake_hi; - spin_lock_irqsave(&bank->lock, flags); + raw_spin_lock_irqsave(&bank->lock, flags); /* * Only edges can generate a wakeup event to the PRCM. @@ -1320,7 +1320,7 @@ update_gpio_context_count: bank->get_context_loss_count(bank->dev); _gpio_dbck_disable(bank); - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); return 0; } @@ -1335,7 +1335,7 @@ static int omap_gpio_runtime_resume(struct device *dev) unsigned long flags; int c; - spin_lock_irqsave(&bank->lock, flags); + raw_spin_lock_irqsave(&bank->lock, flags); /* * On the first resume during the probe, the context has not @@ -1371,14 +1371,14 @@ static int omap_gpio_runtime_resume(struct device *dev) if (c != bank->context_loss_count) { omap_gpio_restore_context(bank); } else { - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); return 0; } } } if (!bank->workaround_enabled) { - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); return 0; } @@ -1433,7 +1433,7 @@ static int omap_gpio_runtime_resume(struct device *dev) } bank->workaround_enabled = false; - spin_unlock_irqrestore(&bank->lock, flags); + raw_spin_unlock_irqrestore(&bank->lock, flags); return 0; } diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index 0cca5f24196a..663394f0c166 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -1306,6 +1306,9 @@ static int dce4_crtc_do_set_base(struct drm_crtc *crtc, (x << 16) | y); viewport_w = crtc->mode.hdisplay; viewport_h = (crtc->mode.vdisplay + 1) & ~1; + if ((rdev->family >= CHIP_BONAIRE) && + (crtc->mode.flags & DRM_MODE_FLAG_INTERLACE)) + viewport_h *= 2; WREG32(EVERGREEN_VIEWPORT_SIZE + radeon_crtc->crtc_offset, (viewport_w << 16) | viewport_h); diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index f0ed0baddf70..c3664bc05acf 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -7069,6 +7069,9 @@ int cik_irq_set(struct radeon_device *rdev) WREG32(DC_HPD5_INT_CONTROL, hpd5); WREG32(DC_HPD6_INT_CONTROL, hpd6); + /* posting read */ + RREG32(SRBM_STATUS); + return 0; } diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 7138f3e31b7c..ee9f0b4e90d6 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -4596,6 +4596,9 @@ int evergreen_irq_set(struct radeon_device *rdev) WREG32(AFMT_AUDIO_PACKET_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, afmt5); WREG32(AFMT_AUDIO_PACKET_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, afmt6); + /* posting read */ + RREG32(SRBM_STATUS); + return 0; } diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 07620e198a6d..e28de20d469a 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -742,6 +742,10 @@ int r100_irq_set(struct radeon_device *rdev) tmp |= RADEON_FP2_DETECT_MASK; } WREG32(RADEON_GEN_INT_CNTL, tmp); + + /* read back to post the write */ + RREG32(RADEON_GEN_INT_CNTL); + return 0; } diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 788f602e8989..74a8a38db4d5 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3647,6 +3647,9 @@ int r600_irq_set(struct radeon_device *rdev) WREG32(RV770_CG_THERMAL_INT, thermal_int); } + /* posting read */ + RREG32(R_000E50_SRBM_STATUS); + return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_bios.c b/drivers/gpu/drm/radeon/radeon_bios.c index 9ab30976287d..c43335ce8778 100644 --- a/drivers/gpu/drm/radeon/radeon_bios.c +++ b/drivers/gpu/drm/radeon/radeon_bios.c @@ -76,7 +76,7 @@ static bool igp_read_bios_from_vram(struct radeon_device *rdev) static bool radeon_read_bios(struct radeon_device *rdev) { - uint8_t __iomem *bios; + uint8_t __iomem *bios, val1, val2; size_t size; rdev->bios = NULL; @@ -86,15 +86,19 @@ static bool radeon_read_bios(struct radeon_device *rdev) return false; } - if (size == 0 || bios[0] != 0x55 || bios[1] != 0xaa) { + val1 = readb(&bios[0]); + val2 = readb(&bios[1]); + + if (size == 0 || val1 != 0x55 || val2 != 0xaa) { pci_unmap_rom(rdev->pdev, bios); return false; } - rdev->bios = kmemdup(bios, size, GFP_KERNEL); + rdev->bios = kzalloc(size, GFP_KERNEL); if (rdev->bios == NULL) { pci_unmap_rom(rdev->pdev, bios); return false; } + memcpy_fromio(rdev->bios, bios, size); pci_unmap_rom(rdev->pdev, bios); return true; } diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 7f2d6c0d11c1..2f2d2ce34709 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -179,11 +179,13 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data) u32 ring = RADEON_CS_RING_GFX; s32 priority = 0; + INIT_LIST_HEAD(&p->validated); + if (!cs->num_chunks) { return 0; } + /* get chunks */ - INIT_LIST_HEAD(&p->validated); p->idx = 0; p->ib.sa_bo = NULL; p->ib.semaphore = NULL; diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index e5619d5e2a30..4261b3865cb0 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -700,6 +700,10 @@ int rs600_irq_set(struct radeon_device *rdev) WREG32(R_007D18_DC_HOT_PLUG_DETECT2_INT_CONTROL, hpd2); if (ASIC_IS_DCE2(rdev)) WREG32(R_007408_HDMI0_AUDIO_PACKET_CONTROL, hdmi0); + + /* posting read */ + RREG32(R_000040_GEN_INT_CNTL); + return 0; } diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 52b64ad285d6..2f2deccb3b78 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -5958,6 +5958,9 @@ int si_irq_set(struct radeon_device *rdev) WREG32(CG_THERMAL_INT, thermal_int); + /* posting read */ + RREG32(SRBM_STATUS); + return 0; } @@ -6875,8 +6878,7 @@ int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk) WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK); if (!vclk || !dclk) { - /* keep the Bypass mode, put PLL to sleep */ - WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK); + /* keep the Bypass mode */ return 0; } @@ -6892,8 +6894,7 @@ int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk) /* set VCO_MODE to 1 */ WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK); - /* toggle UPLL_SLEEP to 1 then back to 0 */ - WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK); + /* disable sleep mode */ WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK); /* deassert UPLL_RESET */ diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index fb7c36e93fd4..0771dcbf9ed0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -733,32 +733,6 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) goto out_err1; } - ret = ttm_bo_init_mm(&dev_priv->bdev, TTM_PL_VRAM, - (dev_priv->vram_size >> PAGE_SHIFT)); - if (unlikely(ret != 0)) { - DRM_ERROR("Failed initializing memory manager for VRAM.\n"); - goto out_err2; - } - - dev_priv->has_gmr = true; - if (((dev_priv->capabilities & (SVGA_CAP_GMR | SVGA_CAP_GMR2)) == 0) || - refuse_dma || ttm_bo_init_mm(&dev_priv->bdev, VMW_PL_GMR, - VMW_PL_GMR) != 0) { - DRM_INFO("No GMR memory available. " - "Graphics memory resources are very limited.\n"); - dev_priv->has_gmr = false; - } - - if (dev_priv->capabilities & SVGA_CAP_GBOBJECTS) { - dev_priv->has_mob = true; - if (ttm_bo_init_mm(&dev_priv->bdev, VMW_PL_MOB, - VMW_PL_MOB) != 0) { - DRM_INFO("No MOB memory available. " - "3D will be disabled.\n"); - dev_priv->has_mob = false; - } - } - dev_priv->mmio_mtrr = arch_phys_wc_add(dev_priv->mmio_start, dev_priv->mmio_size); @@ -821,6 +795,33 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) goto out_no_fman; } + + ret = ttm_bo_init_mm(&dev_priv->bdev, TTM_PL_VRAM, + (dev_priv->vram_size >> PAGE_SHIFT)); + if (unlikely(ret != 0)) { + DRM_ERROR("Failed initializing memory manager for VRAM.\n"); + goto out_no_vram; + } + + dev_priv->has_gmr = true; + if (((dev_priv->capabilities & (SVGA_CAP_GMR | SVGA_CAP_GMR2)) == 0) || + refuse_dma || ttm_bo_init_mm(&dev_priv->bdev, VMW_PL_GMR, + VMW_PL_GMR) != 0) { + DRM_INFO("No GMR memory available. " + "Graphics memory resources are very limited.\n"); + dev_priv->has_gmr = false; + } + + if (dev_priv->capabilities & SVGA_CAP_GBOBJECTS) { + dev_priv->has_mob = true; + if (ttm_bo_init_mm(&dev_priv->bdev, VMW_PL_MOB, + VMW_PL_MOB) != 0) { + DRM_INFO("No MOB memory available. " + "3D will be disabled.\n"); + dev_priv->has_mob = false; + } + } + vmw_kms_save_vga(dev_priv); /* Start kms and overlay systems, needs fifo. */ @@ -846,6 +847,12 @@ out_no_fifo: vmw_kms_close(dev_priv); out_no_kms: vmw_kms_restore_vga(dev_priv); + if (dev_priv->has_mob) + (void) ttm_bo_clean_mm(&dev_priv->bdev, VMW_PL_MOB); + if (dev_priv->has_gmr) + (void) ttm_bo_clean_mm(&dev_priv->bdev, VMW_PL_GMR); + (void)ttm_bo_clean_mm(&dev_priv->bdev, TTM_PL_VRAM); +out_no_vram: vmw_fence_manager_takedown(dev_priv->fman); out_no_fman: if (dev_priv->capabilities & SVGA_CAP_IRQMASK) @@ -861,12 +868,6 @@ out_err4: iounmap(dev_priv->mmio_virt); out_err3: arch_phys_wc_del(dev_priv->mmio_mtrr); - if (dev_priv->has_mob) - (void) ttm_bo_clean_mm(&dev_priv->bdev, VMW_PL_MOB); - if (dev_priv->has_gmr) - (void) ttm_bo_clean_mm(&dev_priv->bdev, VMW_PL_GMR); - (void)ttm_bo_clean_mm(&dev_priv->bdev, TTM_PL_VRAM); -out_err2: (void)ttm_bo_device_release(&dev_priv->bdev); out_err1: vmw_ttm_global_release(dev_priv); @@ -896,6 +897,13 @@ static int vmw_driver_unload(struct drm_device *dev) } vmw_kms_close(dev_priv); vmw_overlay_close(dev_priv); + + if (dev_priv->has_mob) + (void) ttm_bo_clean_mm(&dev_priv->bdev, VMW_PL_MOB); + if (dev_priv->has_gmr) + (void)ttm_bo_clean_mm(&dev_priv->bdev, VMW_PL_GMR); + (void)ttm_bo_clean_mm(&dev_priv->bdev, TTM_PL_VRAM); + vmw_fence_manager_takedown(dev_priv->fman); if (dev_priv->capabilities & SVGA_CAP_IRQMASK) drm_irq_uninstall(dev_priv->dev); @@ -907,11 +915,6 @@ static int vmw_driver_unload(struct drm_device *dev) ttm_object_device_release(&dev_priv->tdev); iounmap(dev_priv->mmio_virt); arch_phys_wc_del(dev_priv->mmio_mtrr); - if (dev_priv->has_mob) - (void) ttm_bo_clean_mm(&dev_priv->bdev, VMW_PL_MOB); - if (dev_priv->has_gmr) - (void)ttm_bo_clean_mm(&dev_priv->bdev, VMW_PL_GMR); - (void)ttm_bo_clean_mm(&dev_priv->bdev, TTM_PL_VRAM); (void)ttm_bo_device_release(&dev_priv->bdev); vmw_ttm_global_release(dev_priv); diff --git a/drivers/iio/imu/adis_trigger.c b/drivers/iio/imu/adis_trigger.c index e0017c22bb9c..f53e9a803a0e 100644 --- a/drivers/iio/imu/adis_trigger.c +++ b/drivers/iio/imu/adis_trigger.c @@ -60,7 +60,7 @@ int adis_probe_trigger(struct adis *adis, struct iio_dev *indio_dev) iio_trigger_set_drvdata(adis->trig, adis); ret = iio_trigger_register(adis->trig); - indio_dev->trig = adis->trig; + indio_dev->trig = iio_trigger_get(adis->trig); if (ret) goto error_free_irq; diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c index 429517117eff..30fce6723e61 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c @@ -25,6 +25,16 @@ #include <linux/poll.h> #include "inv_mpu_iio.h" +static void inv_clear_kfifo(struct inv_mpu6050_state *st) +{ + unsigned long flags; + + /* take the spin lock sem to avoid interrupt kick in */ + spin_lock_irqsave(&st->time_stamp_lock, flags); + kfifo_reset(&st->timestamps); + spin_unlock_irqrestore(&st->time_stamp_lock, flags); +} + int inv_reset_fifo(struct iio_dev *indio_dev) { int result; @@ -51,6 +61,10 @@ int inv_reset_fifo(struct iio_dev *indio_dev) INV_MPU6050_BIT_FIFO_RST); if (result) goto reset_fifo_fail; + + /* clear timestamps fifo */ + inv_clear_kfifo(st); + /* enable interrupt */ if (st->chip_config.accl_fifo_enable || st->chip_config.gyro_fifo_enable) { @@ -84,16 +98,6 @@ reset_fifo_fail: return result; } -static void inv_clear_kfifo(struct inv_mpu6050_state *st) -{ - unsigned long flags; - - /* take the spin lock sem to avoid interrupt kick in */ - spin_lock_irqsave(&st->time_stamp_lock, flags); - kfifo_reset(&st->timestamps); - spin_unlock_irqrestore(&st->time_stamp_lock, flags); -} - /** * inv_mpu6050_irq_handler() - Cache a timestamp at each data ready interrupt. */ @@ -185,7 +189,6 @@ end_session: flush_fifo: /* Flush HW and SW FIFOs. */ inv_reset_fifo(indio_dev); - inv_clear_kfifo(st); mutex_unlock(&indio_dev->mlock); iio_trigger_notify_done(indio_dev->trig); diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index a84112322071..055ebebc07dd 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -94,6 +94,14 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, if (dmasync) dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs); + /* + * If the combination of the addr and size requested for this memory + * region causes an integer overflow, return error. + */ + if ((PAGE_ALIGN(addr + size) <= size) || + (PAGE_ALIGN(addr + size) <= addr)) + return ERR_PTR(-EINVAL); + if (!can_do_mlock()) return ERR_PTR(-EPERM); diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 08219fb3338b..7a515c867674 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -476,6 +476,7 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file, entry->desc.async.element = element; entry->desc.async.event_type = event; + entry->desc.async.reserved = 0; entry->counter = counter; list_add_tail(&entry->list, &file->async_file->event_list); diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index f2a3f48107e7..2592ab5f21b1 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -64,6 +64,14 @@ enum { #define GUID_TBL_BLK_NUM_ENTRIES 8 #define GUID_TBL_BLK_SIZE (GUID_TBL_ENTRY_SIZE * GUID_TBL_BLK_NUM_ENTRIES) +/* Counters should be saturate once they reach their maximum value */ +#define ASSIGN_32BIT_COUNTER(counter, value) do {\ + if ((value) > U32_MAX) \ + counter = cpu_to_be32(U32_MAX); \ + else \ + counter = cpu_to_be32(value); \ +} while (0) + struct mlx4_mad_rcv_buf { struct ib_grh grh; u8 payload[256]; @@ -730,10 +738,14 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, static void edit_counter(struct mlx4_counter *cnt, struct ib_pma_portcounters *pma_cnt) { - pma_cnt->port_xmit_data = cpu_to_be32((be64_to_cpu(cnt->tx_bytes)>>2)); - pma_cnt->port_rcv_data = cpu_to_be32((be64_to_cpu(cnt->rx_bytes)>>2)); - pma_cnt->port_xmit_packets = cpu_to_be32(be64_to_cpu(cnt->tx_frames)); - pma_cnt->port_rcv_packets = cpu_to_be32(be64_to_cpu(cnt->rx_frames)); + ASSIGN_32BIT_COUNTER(pma_cnt->port_xmit_data, + (be64_to_cpu(cnt->tx_bytes) >> 2)); + ASSIGN_32BIT_COUNTER(pma_cnt->port_rcv_data, + (be64_to_cpu(cnt->rx_bytes) >> 2)); + ASSIGN_32BIT_COUNTER(pma_cnt->port_xmit_packets, + be64_to_cpu(cnt->tx_frames)); + ASSIGN_32BIT_COUNTER(pma_cnt->port_rcv_packets, + be64_to_cpu(cnt->rx_frames)); } static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index d2a8d64f8526..080e767250d3 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -291,9 +291,16 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, struct request_queue *q = bdev_get_queue(where->bdev); unsigned short logical_block_size = queue_logical_block_size(q); sector_t num_sectors; + unsigned int uninitialized_var(special_cmd_max_sectors); - /* Reject unsupported discard requests */ - if ((rw & REQ_DISCARD) && !blk_queue_discard(q)) { + /* + * Reject unsupported discard and write same requests. + */ + if (rw & REQ_DISCARD) + special_cmd_max_sectors = q->limits.max_discard_sectors; + else if (rw & REQ_WRITE_SAME) + special_cmd_max_sectors = q->limits.max_write_same_sectors; + if ((rw & (REQ_DISCARD | REQ_WRITE_SAME)) && special_cmd_max_sectors == 0) { dec_count(io, region, -EOPNOTSUPP); return; } @@ -319,7 +326,7 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, store_io_and_region_in_bio(bio, io, region); if (rw & REQ_DISCARD) { - num_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining); + num_sectors = min_t(sector_t, special_cmd_max_sectors, remaining); bio->bi_iter.bi_size = num_sectors << SECTOR_SHIFT; remaining -= num_sectors; } else if (rw & REQ_WRITE_SAME) { @@ -328,7 +335,7 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, */ dp->get_page(dp, &page, &len, &offset); bio_add_page(bio, page, logical_block_size, offset); - num_sectors = min_t(sector_t, q->limits.max_write_same_sectors, remaining); + num_sectors = min_t(sector_t, special_cmd_max_sectors, remaining); bio->bi_iter.bi_size = num_sectors << SECTOR_SHIFT; offset = 0; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index ba00b135ee82..c4b9e575ce21 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2352,10 +2352,16 @@ static void __dm_destroy(struct mapped_device *md, bool wait) set_bit(DMF_FREEING, &md->flags); spin_unlock(&_minor_lock); + /* + * Take suspend_lock so that presuspend and postsuspend methods + * do not race with internal suspend. + */ + mutex_lock(&md->suspend_lock); if (!dm_suspended_md(md)) { dm_table_presuspend_targets(map); dm_table_postsuspend_targets(map); } + mutex_unlock(&md->suspend_lock); /* dm_put_live_table must be before msleep, otherwise deadlock is possible */ dm_put_live_table(md, srcu_idx); diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc_common.h b/drivers/media/platform/s5p-mfc/s5p_mfc_common.h index f723f1f2f578..ab851278d9d0 100644 --- a/drivers/media/platform/s5p-mfc/s5p_mfc_common.h +++ b/drivers/media/platform/s5p-mfc/s5p_mfc_common.h @@ -30,7 +30,7 @@ /* Offset base used to differentiate between CAPTURE and OUTPUT * while mmaping */ -#define DST_QUEUE_OFF_BASE (TASK_SIZE / 2) +#define DST_QUEUE_OFF_BASE (1 << 30) #define MFC_BANK1_ALLOC_CTX 0 #define MFC_BANK2_ALLOC_CTX 1 diff --git a/drivers/media/platform/sh_veu.c b/drivers/media/platform/sh_veu.c index 744e43b480bc..f698e322a1cd 100644 --- a/drivers/media/platform/sh_veu.c +++ b/drivers/media/platform/sh_veu.c @@ -1183,6 +1183,7 @@ static int sh_veu_probe(struct platform_device *pdev) } *vdev = sh_veu_videodev; + vdev->v4l2_dev = &veu->v4l2_dev; spin_lock_init(&veu->lock); mutex_init(&veu->fop_lock); vdev->lock = &veu->fop_lock; diff --git a/drivers/mfd/kempld-core.c b/drivers/mfd/kempld-core.c index 38917a822335..2df3cbc968d1 100644 --- a/drivers/mfd/kempld-core.c +++ b/drivers/mfd/kempld-core.c @@ -629,7 +629,7 @@ static int __init kempld_init(void) if (force_device_id[0]) { for (id = kempld_dmi_table; id->matches[0].slot != DMI_NONE; id++) if (strstr(id->ident, force_device_id)) - if (id->callback && id->callback(id)) + if (id->callback && !id->callback(id)) break; if (id->matches[0].slot == DMI_NONE) return -ENODEV; diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c index 51e15fd53108..d058b00ba218 100644 --- a/drivers/mtd/nand/pxa3xx_nand.c +++ b/drivers/mtd/nand/pxa3xx_nand.c @@ -481,6 +481,42 @@ static void disable_int(struct pxa3xx_nand_info *info, uint32_t int_mask) nand_writel(info, NDCR, ndcr | int_mask); } +static void drain_fifo(struct pxa3xx_nand_info *info, void *data, int len) +{ + if (info->ecc_bch) { + int timeout; + + /* + * According to the datasheet, when reading from NDDB + * with BCH enabled, after each 32 bytes reads, we + * have to make sure that the NDSR.RDDREQ bit is set. + * + * Drain the FIFO 8 32 bits reads at a time, and skip + * the polling on the last read. + */ + while (len > 8) { + __raw_readsl(info->mmio_base + NDDB, data, 8); + + for (timeout = 0; + !(nand_readl(info, NDSR) & NDSR_RDDREQ); + timeout++) { + if (timeout >= 5) { + dev_err(&info->pdev->dev, + "Timeout on RDDREQ while draining the FIFO\n"); + return; + } + + mdelay(1); + } + + data += 32; + len -= 8; + } + } + + __raw_readsl(info->mmio_base + NDDB, data, len); +} + static void handle_data_pio(struct pxa3xx_nand_info *info) { unsigned int do_bytes = min(info->data_size, info->chunk_size); @@ -497,14 +533,14 @@ static void handle_data_pio(struct pxa3xx_nand_info *info) DIV_ROUND_UP(info->oob_size, 4)); break; case STATE_PIO_READING: - __raw_readsl(info->mmio_base + NDDB, - info->data_buff + info->data_buff_pos, - DIV_ROUND_UP(do_bytes, 4)); + drain_fifo(info, + info->data_buff + info->data_buff_pos, + DIV_ROUND_UP(do_bytes, 4)); if (info->oob_size > 0) - __raw_readsl(info->mmio_base + NDDB, - info->oob_buff + info->oob_buff_pos, - DIV_ROUND_UP(info->oob_size, 4)); + drain_fifo(info, + info->oob_buff + info->oob_buff_pos, + DIV_ROUND_UP(info->oob_size, 4)); break; default: dev_err(&info->pdev->dev, "%s: invalid state %d\n", __func__, diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 1468c4658804..84ad2b44377c 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -502,6 +502,14 @@ struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf) skb->pkt_type = PACKET_BROADCAST; skb->ip_summed = CHECKSUM_UNNECESSARY; + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 61376abdab39..dbd8d21a8f10 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -1095,12 +1095,19 @@ static int flexcan_probe(struct platform_device *pdev) const struct flexcan_devtype_data *devtype_data; struct net_device *dev; struct flexcan_priv *priv; + struct regulator *reg_xceiver; struct resource *mem; struct clk *clk_ipg = NULL, *clk_per = NULL; void __iomem *base; int err, irq; u32 clock_freq = 0; + reg_xceiver = devm_regulator_get(&pdev->dev, "xceiver"); + if (PTR_ERR(reg_xceiver) == -EPROBE_DEFER) + return -EPROBE_DEFER; + else if (IS_ERR(reg_xceiver)) + reg_xceiver = NULL; + if (pdev->dev.of_node) of_property_read_u32(pdev->dev.of_node, "clock-frequency", &clock_freq); @@ -1162,9 +1169,7 @@ static int flexcan_probe(struct platform_device *pdev) priv->pdata = dev_get_platdata(&pdev->dev); priv->devtype_data = devtype_data; - priv->reg_xceiver = devm_regulator_get(&pdev->dev, "xceiver"); - if (IS_ERR(priv->reg_xceiver)) - priv->reg_xceiver = NULL; + priv->reg_xceiver = reg_xceiver; netif_napi_add(dev, &priv->napi, flexcan_poll, FLEXCAN_NAPI_WEIGHT); diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c index 9339cccfe05a..ad0e71c7a607 100644 --- a/drivers/net/ethernet/amd/pcnet32.c +++ b/drivers/net/ethernet/amd/pcnet32.c @@ -1516,7 +1516,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) { struct pcnet32_private *lp; int i, media; - int fdx, mii, fset, dxsuflo; + int fdx, mii, fset, dxsuflo, sram; int chip_version; char *chipname; struct net_device *dev; @@ -1553,7 +1553,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) } /* initialize variables */ - fdx = mii = fset = dxsuflo = 0; + fdx = mii = fset = dxsuflo = sram = 0; chip_version = (chip_version >> 12) & 0xffff; switch (chip_version) { @@ -1586,6 +1586,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) chipname = "PCnet/FAST III 79C973"; /* PCI */ fdx = 1; mii = 1; + sram = 1; break; case 0x2626: chipname = "PCnet/Home 79C978"; /* PCI */ @@ -1609,6 +1610,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) chipname = "PCnet/FAST III 79C975"; /* PCI */ fdx = 1; mii = 1; + sram = 1; break; case 0x2628: chipname = "PCnet/PRO 79C976"; @@ -1637,6 +1639,31 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) dxsuflo = 1; } + /* + * The Am79C973/Am79C975 controllers come with 12K of SRAM + * which we can use for the Tx/Rx buffers but most importantly, + * the use of SRAM allow us to use the BCR18:NOUFLO bit to avoid + * Tx fifo underflows. + */ + if (sram) { + /* + * The SRAM is being configured in two steps. First we + * set the SRAM size in the BCR25:SRAM_SIZE bits. According + * to the datasheet, each bit corresponds to a 512-byte + * page so we can have at most 24 pages. The SRAM_SIZE + * holds the value of the upper 8 bits of the 16-bit SRAM size. + * The low 8-bits start at 0x00 and end at 0xff. So the + * address range is from 0x0000 up to 0x17ff. Therefore, + * the SRAM_SIZE is set to 0x17. The next step is to set + * the BCR26:SRAM_BND midway through so the Tx and Rx + * buffers can share the SRAM equally. + */ + a->write_bcr(ioaddr, 25, 0x17); + a->write_bcr(ioaddr, 26, 0xc); + /* And finally enable the NOUFLO bit */ + a->write_bcr(ioaddr, 18, a->read_bcr(ioaddr, 18) | (1 << 11)); + } + dev = alloc_etherdev(sizeof(*lp)); if (!dev) { ret = -ENOMEM; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 7d4382286457..242874041ba4 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -12395,6 +12395,9 @@ static int bnx2x_init_dev(struct bnx2x *bp, struct pci_dev *pdev, pci_write_config_dword(bp->pdev, PCICFG_GRC_ADDRESS, PCICFG_VENDOR_ID_OFFSET); + /* Set PCIe reset type to fundamental for EEH recovery */ + pdev->needs_freset = 1; + /* AER (Advanced Error reporting) configuration */ rc = pci_enable_pcie_error_reporting(pdev); if (!rc) diff --git a/drivers/net/usb/cx82310_eth.c b/drivers/net/usb/cx82310_eth.c index 3eed708a6182..fe48f4c51373 100644 --- a/drivers/net/usb/cx82310_eth.c +++ b/drivers/net/usb/cx82310_eth.c @@ -300,9 +300,18 @@ static const struct driver_info cx82310_info = { .tx_fixup = cx82310_tx_fixup, }; +#define USB_DEVICE_CLASS(vend, prod, cl, sc, pr) \ + .match_flags = USB_DEVICE_ID_MATCH_DEVICE | \ + USB_DEVICE_ID_MATCH_DEV_INFO, \ + .idVendor = (vend), \ + .idProduct = (prod), \ + .bDeviceClass = (cl), \ + .bDeviceSubClass = (sc), \ + .bDeviceProtocol = (pr) + static const struct usb_device_id products[] = { { - USB_DEVICE_AND_INTERFACE_INFO(0x0572, 0xcb01, 0xff, 0, 0), + USB_DEVICE_CLASS(0x0572, 0xcb01, 0xff, 0, 0), .driver_info = (unsigned long) &cx82310_info }, { }, diff --git a/drivers/net/wireless/iwlwifi/dvm/dev.h b/drivers/net/wireless/iwlwifi/dvm/dev.h index 3441f70d0ff9..6e8cdb8a0cc5 100644 --- a/drivers/net/wireless/iwlwifi/dvm/dev.h +++ b/drivers/net/wireless/iwlwifi/dvm/dev.h @@ -708,7 +708,6 @@ struct iwl_priv { unsigned long reload_jiffies; int reload_count; bool ucode_loaded; - bool init_ucode_run; /* Don't run init uCode again */ u8 plcp_delta_threshold; diff --git a/drivers/net/wireless/iwlwifi/dvm/ucode.c b/drivers/net/wireless/iwlwifi/dvm/ucode.c index cf03ef5619d9..8b2dedc30159 100644 --- a/drivers/net/wireless/iwlwifi/dvm/ucode.c +++ b/drivers/net/wireless/iwlwifi/dvm/ucode.c @@ -418,9 +418,6 @@ int iwl_run_init_ucode(struct iwl_priv *priv) if (!priv->fw->img[IWL_UCODE_INIT].sec[0].len) return 0; - if (priv->init_ucode_run) - return 0; - iwl_init_notification_wait(&priv->notif_wait, &calib_wait, calib_complete, ARRAY_SIZE(calib_complete), iwlagn_wait_calib, priv); @@ -440,8 +437,6 @@ int iwl_run_init_ucode(struct iwl_priv *priv) */ ret = iwl_wait_notification(&priv->notif_wait, &calib_wait, UCODE_CALIB_TIMEOUT); - if (!ret) - priv->init_ucode_run = true; goto out; diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c index a04174607e97..4a3b8b72c8af 100644 --- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c @@ -594,7 +594,7 @@ static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw, ret = iwl_mvm_mac_ctxt_add(mvm, vif); if (ret) - goto out_remove_mac; + goto out_release; iwl_mvm_power_disable(mvm, vif); diff --git a/drivers/of/irq.c b/drivers/of/irq.c index 48f20ff1add9..bbff99dcbaea 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -290,7 +290,7 @@ int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_ar struct device_node *p; const __be32 *intspec, *tmp, *addr; u32 intsize, intlen; - int i, res = -EINVAL; + int i, res; pr_debug("of_irq_parse_one: dev=%s, index=%d\n", of_node_full_name(device), index); @@ -323,15 +323,19 @@ int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_ar /* Get size of interrupt specifier */ tmp = of_get_property(p, "#interrupt-cells", NULL); - if (tmp == NULL) + if (tmp == NULL) { + res = -EINVAL; goto out; + } intsize = be32_to_cpu(*tmp); pr_debug(" intsize=%d intlen=%d\n", intsize, intlen); /* Check index */ - if ((index + 1) * intsize > intlen) + if ((index + 1) * intsize > intlen) { + res = -EINVAL; goto out; + } /* Copy intspec into irq structure */ intspec += index * intsize; diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c index 34ff7026440c..5d3b45640181 100644 --- a/drivers/pci/pcie/aer/aerdrv_errprint.c +++ b/drivers/pci/pcie/aer/aerdrv_errprint.c @@ -127,16 +127,8 @@ static const char *aer_agent_string[] = { static void __print_tlp_header(struct pci_dev *dev, struct aer_header_log_regs *t) { - unsigned char *tlp = (unsigned char *)&t; - - dev_err(&dev->dev, " TLP Header:" - " %02x%02x%02x%02x %02x%02x%02x%02x" - " %02x%02x%02x%02x %02x%02x%02x%02x\n", - *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp, - *(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4), - *(tlp + 11), *(tlp + 10), *(tlp + 9), - *(tlp + 8), *(tlp + 15), *(tlp + 14), - *(tlp + 13), *(tlp + 12)); + dev_err(&dev->dev, " TLP Header: %08x %08x %08x %08x\n", + t->dw0, t->dw1, t->dw2, t->dw3); } static void __aer_print_error(struct pci_dev *dev, diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c index 34d56f7864d6..86592dd24da5 100644 --- a/drivers/phy/phy-core.c +++ b/drivers/phy/phy-core.c @@ -50,7 +50,9 @@ static void devm_phy_consume(struct device *dev, void *res) static int devm_phy_match(struct device *dev, void *res, void *match_data) { - return res == match_data; + struct phy **phy = res; + + return *phy == match_data; } static struct phy *phy_lookup(struct device *device, const char *port) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index b7984044232d..5d8d2dcd975e 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1764,10 +1764,12 @@ static int _regulator_do_enable(struct regulator_dev *rdev) trace_regulator_enable(rdev_get_name(rdev)); if (rdev->ena_pin) { - ret = regulator_ena_gpio_ctrl(rdev, true); - if (ret < 0) - return ret; - rdev->ena_gpio_state = 1; + if (!rdev->ena_gpio_state) { + ret = regulator_ena_gpio_ctrl(rdev, true); + if (ret < 0) + return ret; + rdev->ena_gpio_state = 1; + } } else if (rdev->desc->ops->enable) { ret = rdev->desc->ops->enable(rdev); if (ret < 0) @@ -1897,10 +1899,12 @@ static int _regulator_do_disable(struct regulator_dev *rdev) trace_regulator_disable(rdev_get_name(rdev)); if (rdev->ena_pin) { - ret = regulator_ena_gpio_ctrl(rdev, false); - if (ret < 0) - return ret; - rdev->ena_gpio_state = 0; + if (rdev->ena_gpio_state) { + ret = regulator_ena_gpio_ctrl(rdev, false); + if (ret < 0) + return ret; + rdev->ena_gpio_state = 0; + } } else if (rdev->desc->ops->disable) { ret = rdev->desc->ops->disable(rdev); @@ -3454,12 +3458,6 @@ regulator_register(const struct regulator_desc *regulator_desc, config->ena_gpio, ret); goto wash; } - - if (config->ena_gpio_flags & GPIOF_OUT_INIT_HIGH) - rdev->ena_gpio_state = 1; - - if (config->ena_gpio_invert) - rdev->ena_gpio_state = !rdev->ena_gpio_state; } /* set regulator constraints */ @@ -3631,9 +3629,11 @@ int regulator_suspend_finish(void) list_for_each_entry(rdev, ®ulator_list, list) { mutex_lock(&rdev->mutex); if (rdev->use_count > 0 || rdev->constraints->always_on) { - error = _regulator_do_enable(rdev); - if (error) - ret = error; + if (!_regulator_is_enabled(rdev)) { + error = _regulator_do_enable(rdev); + if (error) + ret = error; + } } else { if (!have_full_constraints()) goto unlock; diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c index 953bd0bfdf0d..19ddd43a00cf 100644 --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -5684,9 +5684,9 @@ free_port: hba_free: if (phba->msix_enabled) pci_disable_msix(phba->pcidev); - iscsi_host_remove(phba->shost); pci_dev_put(phba->pcidev); iscsi_host_free(phba->shost); + pci_set_drvdata(pcidev, NULL); disable_pci: pci_disable_device(pcidev); return ret; diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c index d2895836f9fa..c85e07ab51e4 100644 --- a/drivers/scsi/libsas/sas_ata.c +++ b/drivers/scsi/libsas/sas_ata.c @@ -191,7 +191,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) /* TODO: audit callers to ensure they are ready for qc_issue to * unconditionally re-enable interrupts */ - local_irq_save(flags); + local_irq_save_nort(flags); spin_unlock(ap->lock); /* If the device fell off, no sense in issuing commands */ @@ -261,7 +261,7 @@ static unsigned int sas_ata_qc_issue(struct ata_queued_cmd *qc) out: spin_lock(ap->lock); - local_irq_restore(flags); + local_irq_restore_nort(flags); return ret; } diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c index 62b58d38ce2e..60de66252fa2 100644 --- a/drivers/scsi/libsas/sas_discover.c +++ b/drivers/scsi/libsas/sas_discover.c @@ -500,6 +500,7 @@ static void sas_revalidate_domain(struct work_struct *work) struct sas_discovery_event *ev = to_sas_discovery_event(work); struct asd_sas_port *port = ev->port; struct sas_ha_struct *ha = port->ha; + struct domain_device *ddev = port->port_dev; /* prevent revalidation from finding sata links in recovery */ mutex_lock(&ha->disco_mutex); @@ -514,8 +515,9 @@ static void sas_revalidate_domain(struct work_struct *work) SAS_DPRINTK("REVALIDATING DOMAIN on port %d, pid:%d\n", port->id, task_pid_nr(current)); - if (port->port_dev) - res = sas_ex_revalidate_domain(port->port_dev); + if (ddev && (ddev->dev_type == SAS_FANOUT_EXPANDER_DEVICE || + ddev->dev_type == SAS_EDGE_EXPANDER_DEVICE)) + res = sas_ex_revalidate_domain(ddev); SAS_DPRINTK("done REVALIDATING DOMAIN on port %d, pid:%d, res 0x%x\n", port->id, task_pid_nr(current), res); diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index 9d81f7693f99..1817f3f2b02d 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -1515,7 +1515,7 @@ static int tcm_qla2xxx_check_initiator_node_acl( /* * Finally register the new FC Nexus with TCM */ - __transport_register_session(se_nacl->se_tpg, se_nacl, se_sess, sess); + transport_register_session(se_nacl->se_tpg, se_nacl, se_sess, sess); return 0; } diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 64e487a8bf59..719bd8257520 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1258,9 +1258,11 @@ int scsi_prep_state_check(struct scsi_device *sdev, struct request *req) "rejecting I/O to dead device\n"); ret = BLKPREP_KILL; break; - case SDEV_QUIESCE: case SDEV_BLOCK: case SDEV_CREATED_BLOCK: + ret = BLKPREP_DEFER; + break; + case SDEV_QUIESCE: /* * If the devices is blocked we defer normal commands. */ diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c index 5d7b07f08326..5f8c6d2f4df7 100644 --- a/drivers/spi/spi-atmel.c +++ b/drivers/spi/spi-atmel.c @@ -781,17 +781,17 @@ static void atmel_spi_pdc_next_xfer(struct spi_master *master, (unsigned long long)xfer->rx_dma); } - /* REVISIT: We're waiting for ENDRX before we start the next + /* REVISIT: We're waiting for RXBUFF before we start the next * transfer because we need to handle some difficult timing - * issues otherwise. If we wait for ENDTX in one transfer and - * then starts waiting for ENDRX in the next, it's difficult - * to tell the difference between the ENDRX interrupt we're - * actually waiting for and the ENDRX interrupt of the + * issues otherwise. If we wait for TXBUFE in one transfer and + * then starts waiting for RXBUFF in the next, it's difficult + * to tell the difference between the RXBUFF interrupt we're + * actually waiting for and the RXBUFF interrupt of the * previous transfer. * * It should be doable, though. Just not now... */ - spi_writel(as, IER, SPI_BIT(ENDRX) | SPI_BIT(OVRES)); + spi_writel(as, IER, SPI_BIT(RXBUFF) | SPI_BIT(OVRES)); spi_writel(as, PTCR, SPI_BIT(TXTEN) | SPI_BIT(RXTEN)); } diff --git a/drivers/spi/spi-pl022.c b/drivers/spi/spi-pl022.c index 971855e859c7..fe091a87fd6d 100644 --- a/drivers/spi/spi-pl022.c +++ b/drivers/spi/spi-pl022.c @@ -503,12 +503,12 @@ static void giveback(struct pl022 *pl022) pl022->cur_msg = NULL; pl022->cur_transfer = NULL; pl022->cur_chip = NULL; - spi_finalize_current_message(pl022->master); /* disable the SPI/SSP operation */ writew((readw(SSP_CR1(pl022->virtbase)) & (~SSP_CR1_MASK_SSE)), SSP_CR1(pl022->virtbase)); + spi_finalize_current_message(pl022->master); } /** diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index fbf3b22efe5a..d6563ec700d4 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -846,13 +846,14 @@ void spi_finalize_current_message(struct spi_master *master) "failed to unprepare message: %d\n", ret); } } + + trace_spi_message_done(mesg); + master->cur_msg_prepared = false; mesg->state = NULL; if (mesg->complete) mesg->complete(mesg->context); - - trace_spi_message_done(mesg); } EXPORT_SYMBOL_GPL(spi_finalize_current_message); diff --git a/drivers/staging/iio/adc/mxs-lradc.c b/drivers/staging/iio/adc/mxs-lradc.c index be89260c23a6..27e1a6e62d06 100644 --- a/drivers/staging/iio/adc/mxs-lradc.c +++ b/drivers/staging/iio/adc/mxs-lradc.c @@ -1159,7 +1159,6 @@ static irqreturn_t mxs_lradc_handle_irq(int irq, void *data) LRADC_CTRL1_LRADC_IRQ(TOUCHSCREEN_VCHANNEL2)); } - if (iio_buffer_enabled(iio)) if (iio_buffer_enabled(iio)) { if (reg & lradc->buffer_vchans) iio_trigger_poll(iio->trig, iio_get_time_ns()); diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h index eefdb8d061b1..81cc7a0134bb 100644 --- a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h +++ b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h @@ -105,8 +105,8 @@ static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt, #define ll_vfs_unlink(inode,entry,mnt) vfs_unlink(inode,entry) #define ll_vfs_mknod(dir,entry,mnt,mode,dev) vfs_mknod(dir,entry,mode,dev) #define ll_security_inode_unlink(dir,entry,mnt) security_inode_unlink(dir,entry) -#define ll_vfs_rename(old,old_dir,mnt,new,new_dir,mnt1,delegated_inode) \ - vfs_rename(old,old_dir,new,new_dir,delegated_inode) +#define ll_vfs_rename(old, old_dir, mnt, new, new_dir, mnt1) \ + vfs_rename(old, old_dir, new, new_dir, NULL, 0) #define cfs_bio_io_error(a,b) bio_io_error((a)) #define cfs_bio_endio(a,b,c) bio_endio((a),(c)) diff --git a/drivers/staging/lustre/lustre/llite/llite_nfs.c b/drivers/staging/lustre/lustre/llite/llite_nfs.c index 1767c741fb72..ed35a88c6dd9 100644 --- a/drivers/staging/lustre/lustre/llite/llite_nfs.c +++ b/drivers/staging/lustre/lustre/llite/llite_nfs.c @@ -205,13 +205,15 @@ static int ll_encode_fh(struct inode *inode, __u32 *fh, int *plen, return LUSTRE_NFS_FID; } -static int ll_nfs_get_name_filldir(void *cookie, const char *name, int namelen, - loff_t hash, u64 ino, unsigned type) +static int ll_nfs_get_name_filldir(struct dir_context *ctx, const char *name, + int namelen, loff_t hash, u64 ino, + unsigned type) { /* It is hack to access lde_fid for comparison with lgd_fid. * So the input 'name' must be part of the 'lu_dirent'. */ struct lu_dirent *lde = container_of0(name, struct lu_dirent, lde_name); - struct ll_getname_data *lgd = cookie; + struct ll_getname_data *lgd = + container_of(ctx, struct ll_getname_data, ctx); struct lu_fid fid; fid_le_to_cpu(&fid, &lde->lde_fid); diff --git a/drivers/staging/lustre/lustre/llite/symlink.c b/drivers/staging/lustre/lustre/llite/symlink.c index ab06891f7fc7..80d48b5ae247 100644 --- a/drivers/staging/lustre/lustre/llite/symlink.c +++ b/drivers/staging/lustre/lustre/llite/symlink.c @@ -115,27 +115,6 @@ failed: return rc; } -static int ll_readlink(struct dentry *dentry, char *buffer, int buflen) -{ - struct inode *inode = dentry->d_inode; - struct ptlrpc_request *request; - char *symname; - int rc; - - CDEBUG(D_VFSTRACE, "VFS Op\n"); - - ll_inode_size_lock(inode); - rc = ll_readlink_internal(inode, &request, &symname); - if (rc) - GOTO(out, rc); - - rc = vfs_readlink(dentry, buffer, buflen, symname); - out: - ptlrpc_req_finished(request); - ll_inode_size_unlock(inode); - return rc; -} - static void *ll_follow_link(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; @@ -175,7 +154,7 @@ static void ll_put_link(struct dentry *dentry, struct nameidata *nd, void *cooki } struct inode_operations ll_fast_symlink_inode_operations = { - .readlink = ll_readlink, + .readlink = generic_readlink, .setattr = ll_setattr, .follow_link = ll_follow_link, .put_link = ll_put_link, diff --git a/drivers/staging/lustre/lustre/lvfs/lvfs_linux.c b/drivers/staging/lustre/lustre/lvfs/lvfs_linux.c index 428ffd8c37b7..d50822be3230 100644 --- a/drivers/staging/lustre/lustre/lvfs/lvfs_linux.c +++ b/drivers/staging/lustre/lustre/lvfs/lvfs_linux.c @@ -223,7 +223,7 @@ int lustre_rename(struct dentry *dir, struct vfsmount *mnt, GOTO(put_old, err = PTR_ERR(dchild_new)); err = ll_vfs_rename(dir->d_inode, dchild_old, mnt, - dir->d_inode, dchild_new, mnt, NULL); + dir->d_inode, dchild_new, mnt); dput(dchild_new); put_old: diff --git a/drivers/staging/rtl8821ae/Kconfig b/drivers/staging/rtl8821ae/Kconfig index abccc9dabd65..50a407f71717 100644 --- a/drivers/staging/rtl8821ae/Kconfig +++ b/drivers/staging/rtl8821ae/Kconfig @@ -2,6 +2,7 @@ config R8821AE tristate "RealTek RTL8821AE Wireless LAN NIC driver" depends on PCI && WLAN && MAC80211 depends on m + depends on BROKEN select WIRELESS_EXT select WEXT_PRIV select EEPROM_93CX6 diff --git a/drivers/staging/vt6655/rf.c b/drivers/staging/vt6655/rf.c index edb1b2768b17..dbd9d44919ac 100644 --- a/drivers/staging/vt6655/rf.c +++ b/drivers/staging/vt6655/rf.c @@ -936,6 +936,7 @@ bool RFbSetPower( break; case RATE_6M: case RATE_9M: + case RATE_12M: case RATE_18M: byPwr = pDevice->abyOFDMPwrTbl[uCH]; if (pDevice->byRFType == RF_UW2452) { diff --git a/drivers/staging/vt6656/rf.c b/drivers/staging/vt6656/rf.c index 1e8f64bff03c..2dc48d4ff433 100644 --- a/drivers/staging/vt6656/rf.c +++ b/drivers/staging/vt6656/rf.c @@ -752,6 +752,7 @@ int RFbSetPower(struct vnt_private *priv, u32 rate, u32 channel) break; case RATE_6M: case RATE_9M: + case RATE_12M: case RATE_18M: case RATE_24M: case RATE_36M: diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 104f29e6b290..b61c555a5a8f 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -1165,7 +1165,7 @@ iscsit_handle_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, * traditional iSCSI block I/O. */ if (iscsit_allocate_iovecs(cmd) < 0) { - return iscsit_add_reject_cmd(cmd, + return iscsit_reject_cmd(cmd, ISCSI_REASON_BOOKMARK_NO_RESOURCES, buf); } immed_data = cmd->immediate_data; @@ -4196,11 +4196,17 @@ int iscsit_close_connection( pr_debug("Closing iSCSI connection CID %hu on SID:" " %u\n", conn->cid, sess->sid); /* - * Always up conn_logout_comp just in case the RX Thread is sleeping - * and the logout response never got sent because the connection - * failed. + * Always up conn_logout_comp for the traditional TCP case just in case + * the RX Thread in iscsi_target_rx_opcode() is sleeping and the logout + * response never got sent because the connection failed. + * + * However for iser-target, isert_wait4logout() is using conn_logout_comp + * to signal logout response TX interrupt completion. Go ahead and skip + * this for iser since isert_rx_opcode() does not wait on logout failure, + * and to avoid iscsi_conn pointer dereference in iser-target code. */ - complete(&conn->conn_logout_comp); + if (conn->conn_transport->transport_type == ISCSI_TCP) + complete(&conn->conn_logout_comp); iscsi_release_thread_set(conn); diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 26ae6886ac59..093b8cb85de7 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -1591,8 +1591,6 @@ int target_configure_device(struct se_device *dev) ret = dev->transport->configure_device(dev); if (ret) goto out; - dev->dev_flags |= DF_CONFIGURED; - /* * XXX: there is not much point to have two different values here.. */ @@ -1654,6 +1652,8 @@ int target_configure_device(struct se_device *dev) list_add_tail(&dev->g_dev_node, &g_device_list); mutex_unlock(&g_device_mutex); + dev->dev_flags |= DF_CONFIGURED; + return 0; out_free_alua: diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 0fccdcfd1015..70cb37516d40 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -76,7 +76,7 @@ enum preempt_type { }; static void __core_scsi3_complete_pro_release(struct se_device *, struct se_node_acl *, - struct t10_pr_registration *, int); + struct t10_pr_registration *, int, int); static sense_reason_t target_scsi2_reservation_check(struct se_cmd *cmd) @@ -528,6 +528,18 @@ static int core_scsi3_pr_seq_non_holder( return 0; } + } else if (we && registered_nexus) { + /* + * Reads are allowed for Write Exclusive locks + * from all registrants. + */ + if (cmd->data_direction == DMA_FROM_DEVICE) { + pr_debug("Allowing READ CDB: 0x%02x for %s" + " reservation\n", cdb[0], + core_scsi3_pr_dump_type(pr_reg_type)); + + return 0; + } } pr_debug("%s Conflict for %sregistered nexus %s CDB: 0x%2x" " for %s reservation\n", transport_dump_cmd_direction(cmd), @@ -1186,7 +1198,7 @@ static int core_scsi3_check_implicit_release( * service action with the SERVICE ACTION RESERVATION KEY * field set to zero (see 5.7.11.3). */ - __core_scsi3_complete_pro_release(dev, nacl, pr_reg, 0); + __core_scsi3_complete_pro_release(dev, nacl, pr_reg, 0, 1); ret = 1; /* * For 'All Registrants' reservation types, all existing @@ -1228,7 +1240,8 @@ static void __core_scsi3_free_registration( pr_reg->pr_reg_deve->def_pr_registered = 0; pr_reg->pr_reg_deve->pr_res_key = 0; - list_del(&pr_reg->pr_reg_list); + if (!list_empty(&pr_reg->pr_reg_list)) + list_del(&pr_reg->pr_reg_list); /* * Caller accessing *pr_reg using core_scsi3_locate_pr_reg(), * so call core_scsi3_put_pr_reg() to decrement our reference. @@ -1280,6 +1293,7 @@ void core_scsi3_free_pr_reg_from_nacl( { struct t10_reservation *pr_tmpl = &dev->t10_pr; struct t10_pr_registration *pr_reg, *pr_reg_tmp, *pr_res_holder; + bool free_reg = false; /* * If the passed se_node_acl matches the reservation holder, * release the reservation. @@ -1287,13 +1301,18 @@ void core_scsi3_free_pr_reg_from_nacl( spin_lock(&dev->dev_reservation_lock); pr_res_holder = dev->dev_pr_res_holder; if ((pr_res_holder != NULL) && - (pr_res_holder->pr_reg_nacl == nacl)) - __core_scsi3_complete_pro_release(dev, nacl, pr_res_holder, 0); + (pr_res_holder->pr_reg_nacl == nacl)) { + __core_scsi3_complete_pro_release(dev, nacl, pr_res_holder, 0, 1); + free_reg = true; + } spin_unlock(&dev->dev_reservation_lock); /* * Release any registration associated with the struct se_node_acl. */ spin_lock(&pr_tmpl->registration_lock); + if (pr_res_holder && free_reg) + __core_scsi3_free_registration(dev, pr_res_holder, NULL, 0); + list_for_each_entry_safe(pr_reg, pr_reg_tmp, &pr_tmpl->registration_list, pr_reg_list) { @@ -1316,7 +1335,7 @@ void core_scsi3_free_all_registrations( if (pr_res_holder != NULL) { struct se_node_acl *pr_res_nacl = pr_res_holder->pr_reg_nacl; __core_scsi3_complete_pro_release(dev, pr_res_nacl, - pr_res_holder, 0); + pr_res_holder, 0, 0); } spin_unlock(&dev->dev_reservation_lock); @@ -2126,13 +2145,13 @@ core_scsi3_emulate_pro_register(struct se_cmd *cmd, u64 res_key, u64 sa_res_key, /* * sa_res_key=0 Unregister Reservation Key for registered I_T Nexus. */ - pr_holder = core_scsi3_check_implicit_release( - cmd->se_dev, pr_reg); + type = pr_reg->pr_res_type; + pr_holder = core_scsi3_check_implicit_release(cmd->se_dev, + pr_reg); if (pr_holder < 0) { ret = TCM_RESERVATION_CONFLICT; goto out; } - type = pr_reg->pr_res_type; spin_lock(&pr_tmpl->registration_lock); /* @@ -2290,6 +2309,7 @@ core_scsi3_pro_reserve(struct se_cmd *cmd, int type, int scope, u64 res_key) spin_lock(&dev->dev_reservation_lock); pr_res_holder = dev->dev_pr_res_holder; if (pr_res_holder) { + int pr_res_type = pr_res_holder->pr_res_type; /* * From spc4r17 Section 5.7.9: Reserving: * @@ -2300,7 +2320,9 @@ core_scsi3_pro_reserve(struct se_cmd *cmd, int type, int scope, u64 res_key) * the logical unit, then the command shall be completed with * RESERVATION CONFLICT status. */ - if (pr_res_holder != pr_reg) { + if ((pr_res_holder != pr_reg) && + (pr_res_type != PR_TYPE_WRITE_EXCLUSIVE_ALLREG) && + (pr_res_type != PR_TYPE_EXCLUSIVE_ACCESS_ALLREG)) { struct se_node_acl *pr_res_nacl = pr_res_holder->pr_reg_nacl; pr_err("SPC-3 PR: Attempted RESERVE from" " [%s]: %s while reservation already held by" @@ -2406,23 +2428,59 @@ static void __core_scsi3_complete_pro_release( struct se_device *dev, struct se_node_acl *se_nacl, struct t10_pr_registration *pr_reg, - int explicit) + int explicit, + int unreg) { struct target_core_fabric_ops *tfo = se_nacl->se_tpg->se_tpg_tfo; char i_buf[PR_REG_ISID_ID_LEN]; + int pr_res_type = 0, pr_res_scope = 0; memset(i_buf, 0, PR_REG_ISID_ID_LEN); core_pr_dump_initiator_port(pr_reg, i_buf, PR_REG_ISID_ID_LEN); /* * Go ahead and release the current PR reservation holder. + * If an All Registrants reservation is currently active and + * a unregister operation is requested, replace the current + * dev_pr_res_holder with another active registration. */ - dev->dev_pr_res_holder = NULL; + if (dev->dev_pr_res_holder) { + pr_res_type = dev->dev_pr_res_holder->pr_res_type; + pr_res_scope = dev->dev_pr_res_holder->pr_res_scope; + dev->dev_pr_res_holder->pr_res_type = 0; + dev->dev_pr_res_holder->pr_res_scope = 0; + dev->dev_pr_res_holder->pr_res_holder = 0; + dev->dev_pr_res_holder = NULL; + } + if (!unreg) + goto out; - pr_debug("SPC-3 PR [%s] Service Action: %s RELEASE cleared" - " reservation holder TYPE: %s ALL_TG_PT: %d\n", - tfo->get_fabric_name(), (explicit) ? "explicit" : "implicit", - core_scsi3_pr_dump_type(pr_reg->pr_res_type), - (pr_reg->pr_reg_all_tg_pt) ? 1 : 0); + spin_lock(&dev->t10_pr.registration_lock); + list_del_init(&pr_reg->pr_reg_list); + /* + * If the I_T nexus is a reservation holder, the persistent reservation + * is of an all registrants type, and the I_T nexus is the last remaining + * registered I_T nexus, then the device server shall also release the + * persistent reservation. + */ + if (!list_empty(&dev->t10_pr.registration_list) && + ((pr_res_type == PR_TYPE_WRITE_EXCLUSIVE_ALLREG) || + (pr_res_type == PR_TYPE_EXCLUSIVE_ACCESS_ALLREG))) { + dev->dev_pr_res_holder = + list_entry(dev->t10_pr.registration_list.next, + struct t10_pr_registration, pr_reg_list); + dev->dev_pr_res_holder->pr_res_type = pr_res_type; + dev->dev_pr_res_holder->pr_res_scope = pr_res_scope; + dev->dev_pr_res_holder->pr_res_holder = 1; + } + spin_unlock(&dev->t10_pr.registration_lock); +out: + if (!dev->dev_pr_res_holder) { + pr_debug("SPC-3 PR [%s] Service Action: %s RELEASE cleared" + " reservation holder TYPE: %s ALL_TG_PT: %d\n", + tfo->get_fabric_name(), (explicit) ? "explicit" : + "implicit", core_scsi3_pr_dump_type(pr_res_type), + (pr_reg->pr_reg_all_tg_pt) ? 1 : 0); + } pr_debug("SPC-3 PR [%s] RELEASE Node: %s%s\n", tfo->get_fabric_name(), se_nacl->initiatorname, i_buf); @@ -2553,7 +2611,7 @@ core_scsi3_emulate_pro_release(struct se_cmd *cmd, int type, int scope, * server shall not establish a unit attention condition. */ __core_scsi3_complete_pro_release(dev, se_sess->se_node_acl, - pr_reg, 1); + pr_reg, 1, 0); spin_unlock(&dev->dev_reservation_lock); @@ -2641,7 +2699,7 @@ core_scsi3_emulate_pro_clear(struct se_cmd *cmd, u64 res_key) if (pr_res_holder) { struct se_node_acl *pr_res_nacl = pr_res_holder->pr_reg_nacl; __core_scsi3_complete_pro_release(dev, pr_res_nacl, - pr_res_holder, 0); + pr_res_holder, 0, 0); } spin_unlock(&dev->dev_reservation_lock); /* @@ -2700,7 +2758,7 @@ static void __core_scsi3_complete_pro_preempt( */ if (dev->dev_pr_res_holder) __core_scsi3_complete_pro_release(dev, nacl, - dev->dev_pr_res_holder, 0); + dev->dev_pr_res_holder, 0, 0); dev->dev_pr_res_holder = pr_reg; pr_reg->pr_res_holder = 1; @@ -2944,8 +3002,8 @@ core_scsi3_pro_preempt(struct se_cmd *cmd, int type, int scope, u64 res_key, */ if (pr_reg_n != pr_res_holder) __core_scsi3_complete_pro_release(dev, - pr_res_holder->pr_reg_nacl, - dev->dev_pr_res_holder, 0); + pr_res_holder->pr_reg_nacl, + dev->dev_pr_res_holder, 0, 0); /* * b) Remove the registrations for all I_T nexuses identified * by the SERVICE ACTION RESERVATION KEY field, except the @@ -3415,7 +3473,7 @@ after_iport_check: * holder (i.e., the I_T nexus on which the */ __core_scsi3_complete_pro_release(dev, pr_res_nacl, - dev->dev_pr_res_holder, 0); + dev->dev_pr_res_holder, 0, 0); /* * g) Move the persistent reservation to the specified I_T nexus using * the same scope and type as the persistent reservation released in @@ -3855,7 +3913,8 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd) unsigned char *buf; u32 add_desc_len = 0, add_len = 0, desc_len, exp_desc_len; u32 off = 8; /* off into first Full Status descriptor */ - int format_code = 0; + int format_code = 0, pr_res_type = 0, pr_res_scope = 0; + bool all_reg = false; if (cmd->data_length < 8) { pr_err("PRIN SA READ_FULL_STATUS SCSI Data Length: %u" @@ -3872,6 +3931,19 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd) buf[2] = ((dev->t10_pr.pr_generation >> 8) & 0xff); buf[3] = (dev->t10_pr.pr_generation & 0xff); + spin_lock(&dev->dev_reservation_lock); + if (dev->dev_pr_res_holder) { + struct t10_pr_registration *pr_holder = dev->dev_pr_res_holder; + + if (pr_holder->pr_res_type == PR_TYPE_WRITE_EXCLUSIVE_ALLREG || + pr_holder->pr_res_type == PR_TYPE_EXCLUSIVE_ACCESS_ALLREG) { + all_reg = true; + pr_res_type = pr_holder->pr_res_type; + pr_res_scope = pr_holder->pr_res_scope; + } + } + spin_unlock(&dev->dev_reservation_lock); + spin_lock(&pr_tmpl->registration_lock); list_for_each_entry_safe(pr_reg, pr_reg_tmp, &pr_tmpl->registration_list, pr_reg_list) { @@ -3921,14 +3993,20 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd) * reservation holder for PR_HOLDER bit. * * Also, if this registration is the reservation - * holder, fill in SCOPE and TYPE in the next byte. + * holder or there is an All Registrants reservation + * active, fill in SCOPE and TYPE in the next byte. */ if (pr_reg->pr_res_holder) { buf[off++] |= 0x01; buf[off++] = (pr_reg->pr_res_scope & 0xf0) | (pr_reg->pr_res_type & 0x0f); - } else + } else if (all_reg) { + buf[off++] |= 0x01; + buf[off++] = (pr_res_scope & 0xf0) | + (pr_res_type & 0x0f); + } else { off += 2; + } off += 4; /* Skip over reserved area */ /* diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index 0f199f6a0738..29f28808fc03 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -1111,7 +1111,7 @@ static u32 pscsi_get_device_type(struct se_device *dev) struct pscsi_dev_virt *pdv = PSCSI_DEV(dev); struct scsi_device *sd = pdv->pdv_sd; - return sd->type; + return (sd) ? sd->type : TYPE_NO_LUN; } static sector_t pscsi_get_blocks(struct se_device *dev) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index e6463ef33cd2..9e54c0fe718d 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2327,6 +2327,10 @@ int target_get_sess_cmd(struct se_session *se_sess, struct se_cmd *se_cmd, list_add_tail(&se_cmd->se_cmd_list, &se_sess->sess_cmd_list); out: spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); + + if (ret && ack_kref) + target_put_sess_cmd(se_sess, se_cmd); + return ret; } EXPORT_SYMBOL(target_get_sess_cmd); diff --git a/drivers/target/tcm_fc/tfc_io.c b/drivers/target/tcm_fc/tfc_io.c index e415af32115a..c67d3795db4a 100644 --- a/drivers/target/tcm_fc/tfc_io.c +++ b/drivers/target/tcm_fc/tfc_io.c @@ -346,7 +346,7 @@ void ft_invl_hw_context(struct ft_cmd *cmd) ep = fc_seq_exch(seq); if (ep) { lport = ep->lp; - if (lport && (ep->xid <= lport->lro_xid)) + if (lport && (ep->xid <= lport->lro_xid)) { /* * "ddp_done" trigger invalidation of HW * specific DDP context @@ -361,6 +361,7 @@ void ft_invl_hw_context(struct ft_cmd *cmd) * identified using ep->xid) */ cmd->was_ddp_setup = 0; + } } } } diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c index 081fd7e6a9f0..f297ddd3365c 100644 --- a/drivers/thermal/x86_pkg_temp_thermal.c +++ b/drivers/thermal/x86_pkg_temp_thermal.c @@ -29,6 +29,7 @@ #include <linux/pm.h> #include <linux/thermal.h> #include <linux/debugfs.h> +#include <linux/work-simple.h> #include <asm/cpu_device_id.h> #include <asm/mce.h> @@ -352,7 +353,7 @@ static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work) } } -static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) +static void platform_thermal_notify_work(struct swork_event *event) { unsigned long flags; int cpu = smp_processor_id(); @@ -369,7 +370,7 @@ static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) pkg_work_scheduled[phy_id]) { disable_pkg_thres_interrupt(); spin_unlock_irqrestore(&pkg_work_lock, flags); - return -EINVAL; + return; } pkg_work_scheduled[phy_id] = 1; spin_unlock_irqrestore(&pkg_work_lock, flags); @@ -378,9 +379,48 @@ static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) schedule_delayed_work_on(cpu, &per_cpu(pkg_temp_thermal_threshold_work, cpu), msecs_to_jiffies(notify_delay_ms)); +} + +#ifdef CONFIG_PREEMPT_RT_FULL +static struct swork_event notify_work; + +static int thermal_notify_work_init(void) +{ + int err; + + err = swork_get(); + if (err) + return err; + + INIT_SWORK(¬ify_work, platform_thermal_notify_work); return 0; } +static void thermal_notify_work_cleanup(void) +{ + swork_put(); +} + +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) +{ + swork_queue(¬ify_work); + return 0; +} + +#else /* !CONFIG_PREEMPT_RT_FULL */ + +static int thermal_notify_work_init(void) { return 0; } + +static int thermal_notify_work_cleanup(void) { } + +static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) +{ + platform_thermal_notify_work(NULL); + + return 0; +} +#endif /* CONFIG_PREEMPT_RT_FULL */ + static int find_siblings_cpu(int cpu) { int i; @@ -584,6 +624,9 @@ static int __init pkg_temp_thermal_init(void) if (!x86_match_cpu(pkg_temp_thermal_ids)) return -ENODEV; + if (!thermal_notify_work_init()) + return -ENODEV; + spin_lock_init(&pkg_work_lock); platform_thermal_package_notify = pkg_temp_thermal_platform_thermal_notify; @@ -608,7 +651,7 @@ err_ret: kfree(pkg_work_scheduled); platform_thermal_package_notify = NULL; platform_thermal_package_rate_control = NULL; - + thermal_notify_work_cleanup(); return -ENODEV; } @@ -633,6 +676,7 @@ static void __exit pkg_temp_thermal_exit(void) mutex_unlock(&phy_dev_list_mutex); platform_thermal_package_notify = NULL; platform_thermal_package_rate_control = NULL; + thermal_notify_work_cleanup(); for_each_online_cpu(i) cancel_delayed_work_sync( &per_cpu(pkg_temp_thermal_threshold_work, i)); diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 850e232d086e..8ab46ad40f28 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -247,8 +247,6 @@ static void n_tty_write_wakeup(struct tty_struct *tty) static void n_tty_check_throttle(struct tty_struct *tty) { - if (tty->driver->type == TTY_DRIVER_TYPE_PTY) - return; /* * Check the remaining room for the input canonicalization * mode. We don't want to throttle the driver if we're in @@ -1512,23 +1510,6 @@ n_tty_receive_char_lnext(struct tty_struct *tty, unsigned char c, char flag) n_tty_receive_char_flagged(tty, c, flag); } -/** - * n_tty_receive_buf - data receive - * @tty: terminal device - * @cp: buffer - * @fp: flag buffer - * @count: characters - * - * Called by the terminal driver when a block of characters has - * been received. This function must be called from soft contexts - * not from interrupt context. The driver is responsible for making - * calls one at a time and in order (or using flush_to_ldisc) - * - * n_tty_receive_buf()/producer path: - * claims non-exclusive termios_rwsem - * publishes read_head and canon_head - */ - static void n_tty_receive_buf_real_raw(struct tty_struct *tty, const unsigned char *cp, char *fp, int count) @@ -1684,24 +1665,85 @@ static void __receive_buf(struct tty_struct *tty, const unsigned char *cp, } } +/** + * n_tty_receive_buf_common - process input + * @tty: device to receive input + * @cp: input chars + * @fp: flags for each char (if NULL, all chars are TTY_NORMAL) + * @count: number of input chars in @cp + * + * Called by the terminal driver when a block of characters has + * been received. This function must be called from soft contexts + * not from interrupt context. The driver is responsible for making + * calls one at a time and in order (or using flush_to_ldisc) + * + * Returns the # of input chars from @cp which were processed. + * + * In canonical mode, the maximum line length is 4096 chars (including + * the line termination char); lines longer than 4096 chars are + * truncated. After 4095 chars, input data is still processed but + * not stored. Overflow processing ensures the tty can always + * receive more input until at least one line can be read. + * + * In non-canonical mode, the read buffer will only accept 4095 chars; + * this provides the necessary space for a newline char if the input + * mode is switched to canonical. + * + * Note it is possible for the read buffer to _contain_ 4096 chars + * in non-canonical mode: the read buffer could already contain the + * maximum canon line of 4096 chars when the mode is switched to + * non-canonical. + * + * n_tty_receive_buf()/producer path: + * claims non-exclusive termios_rwsem + * publishes commit_head or canon_head + */ static int n_tty_receive_buf_common(struct tty_struct *tty, const unsigned char *cp, char *fp, int count, int flow) { struct n_tty_data *ldata = tty->disc_data; - int room, n, rcvd = 0; + int room, n, rcvd = 0, overflow; down_read(&tty->termios_rwsem); while (1) { - room = receive_room(tty); + /* + * When PARMRK is set, each input char may take up to 3 chars + * in the read buf; reduce the buffer space avail by 3x + * + * If we are doing input canonicalization, and there are no + * pending newlines, let characters through without limit, so + * that erase characters will be handled. Other excess + * characters will be beeped. + * + * paired with store in *_copy_from_read_buf() -- guarantees + * the consumer has loaded the data in read_buf up to the new + * read_tail (so this producer will not overwrite unread data) + */ + size_t tail = ldata->read_tail; + + room = N_TTY_BUF_SIZE - (ldata->read_head - tail); + if (I_PARMRK(tty)) + room = (room + 2) / 3; + room--; + if (room <= 0) { + overflow = ldata->icanon && ldata->canon_head == tail; + if (overflow && room < 0) + ldata->read_head--; + room = overflow; + ldata->no_room = flow && !room; + } else + overflow = 0; + n = min(count, room); - if (!n) { - if (flow && !room) - ldata->no_room = 1; + if (!n) break; - } - __receive_buf(tty, cp, fp, n); + + /* ignore parity errors if handling overflow */ + if (!overflow || !fp || *fp != TTY_PARITY) + __receive_buf(tty, cp, fp, n); + cp += n; if (fp) fp += n; @@ -1710,7 +1752,17 @@ n_tty_receive_buf_common(struct tty_struct *tty, const unsigned char *cp, } tty->receive_room = room; - n_tty_check_throttle(tty); + + /* Unthrottle if handling overflow on pty */ + if (tty->driver->type == TTY_DRIVER_TYPE_PTY) { + if (overflow) { + tty_set_flow_change(tty, TTY_UNTHROTTLE_SAFE); + tty_unthrottle_safe(tty); + __tty_set_flow_change(tty, 0); + } + } else + n_tty_check_throttle(tty); + up_read(&tty->termios_rwsem); return rcvd; diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index feda34404ed0..5892eab03874 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -66,7 +66,7 @@ static void moan_device(const char *str, struct pci_dev *dev) "Please send the output of lspci -vv, this\n" "message (0x%04x,0x%04x,0x%04x,0x%04x), the\n" "manufacturer and name of serial board or\n" - "modem board to rmk+serial@arm.linux.org.uk.\n", + "modem board to <linux-serial@vger.kernel.org>.\n", pci_name(dev), str, dev->vendor, dev->device, dev->subsystem_vendor, dev->subsystem_device); } diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 175f123f4f09..501c465feb59 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -362,6 +362,9 @@ static void lpuart_setup_watermark(struct lpuart_port *sport) writeb(val | UARTPFIFO_TXFE | UARTPFIFO_RXFE, sport->port.membase + UARTPFIFO); + /* explicitly clear RDRF */ + readb(sport->port.membase + UARTSR1); + /* flush Tx and Rx FIFO */ writeb(UARTCFIFO_TXFLUSH | UARTCFIFO_RXFLUSH, sport->port.membase + UARTCFIFO); diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 93fe089cd51a..b9e16abb0fab 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -383,6 +383,10 @@ static void xhci_clear_port_change_bit(struct xhci_hcd *xhci, u16 wValue, status = PORT_PLC; port_change_bit = "link state"; break; + case USB_PORT_FEAT_C_PORT_CONFIG_ERROR: + status = PORT_CEC; + port_change_bit = "config error"; + break; default: /* Should never happen */ return; @@ -584,6 +588,8 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd, status |= USB_PORT_STAT_C_LINK_STATE << 16; if ((raw_port_status & PORT_WRC)) status |= USB_PORT_STAT_C_BH_RESET << 16; + if ((raw_port_status & PORT_CEC)) + status |= USB_PORT_STAT_C_CONFIG_ERROR << 16; } if (hcd->speed != HCD_USB3) { @@ -999,6 +1005,7 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, case USB_PORT_FEAT_C_OVER_CURRENT: case USB_PORT_FEAT_C_ENABLE: case USB_PORT_FEAT_C_PORT_LINK_STATE: + case USB_PORT_FEAT_C_PORT_CONFIG_ERROR: xhci_clear_port_change_bit(xhci, wValue, wIndex, port_array[wIndex], temp); break; @@ -1063,7 +1070,7 @@ int xhci_hub_status_data(struct usb_hcd *hcd, char *buf) */ status = bus_state->resuming_ports; - mask = PORT_CSC | PORT_PEC | PORT_OCC | PORT_PLC | PORT_WRC; + mask = PORT_CSC | PORT_PEC | PORT_OCC | PORT_PLC | PORT_WRC | PORT_CEC; spin_lock_irqsave(&xhci->lock, flags); /* For each port, did anything change? If so, set that bit in buf. */ diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 73c43e5e231b..eb3399f4c1ed 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -108,6 +108,7 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) if (pdev->vendor == PCI_VENDOR_ID_INTEL) { xhci->quirks |= XHCI_LPM_SUPPORT; xhci->quirks |= XHCI_INTEL_HOST; + xhci->quirks |= XHCI_AVOID_BEI; } if (pdev->vendor == PCI_VENDOR_ID_INTEL && pdev->device == PCI_DEVICE_ID_INTEL_PANTHERPOINT_XHCI) { @@ -123,7 +124,6 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) * PPT chipsets. */ xhci->quirks |= XHCI_SPURIOUS_REBOOT; - xhci->quirks |= XHCI_AVOID_BEI; } if (pdev->vendor == PCI_VENDOR_ID_INTEL && (pdev->device == PCI_DEVICE_ID_INTEL_LYNXPOINT_XHCI || diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 923500595357..2d858f81ab33 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -617,6 +617,7 @@ static const struct usb_device_id id_table_combined[] = { .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, FTDI_NT_ORIONLXM_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, + { USB_DEVICE(FTDI_VID, FTDI_SYNAPSE_SS200_PID) }, /* * ELV devices: */ @@ -1901,8 +1902,12 @@ static int ftdi_8u2232c_probe(struct usb_serial *serial) { struct usb_device *udev = serial->dev; - if ((udev->manufacturer && !strcmp(udev->manufacturer, "CALAO Systems")) || - (udev->product && !strcmp(udev->product, "BeagleBone/XDS100V2"))) + if (udev->manufacturer && !strcmp(udev->manufacturer, "CALAO Systems")) + return ftdi_jtag_probe(serial); + + if (udev->product && + (!strcmp(udev->product, "BeagleBone/XDS100V2") || + !strcmp(udev->product, "SNAP Connect E10"))) return ftdi_jtag_probe(serial); return 0; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 56b1b55c4751..4e4f46f3c89c 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -561,6 +561,12 @@ */ #define FTDI_NT_ORIONLXM_PID 0x7c90 /* OrionLXm Substation Automation Platform */ +/* + * Synapse Wireless product ids (FTDI_VID) + * http://www.synapse-wireless.com + */ +#define FTDI_SYNAPSE_SS200_PID 0x9090 /* SS200 - SNAP Stick 200 */ + /********************************/ /** third-party VID/PID combos **/ diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index f4a9e3311297..c8860a8757ac 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -547,20 +547,26 @@ static unsigned int __startup_pirq(unsigned int irq) pirq_query_unmask(irq); rc = set_evtchn_to_irq(evtchn, irq); - if (rc != 0) { - pr_err("irq%d: Failed to set port to irq mapping (%d)\n", - irq, rc); - xen_evtchn_close(evtchn); - return 0; - } + if (rc) + goto err; + bind_evtchn_to_cpu(evtchn, 0); info->evtchn = evtchn; + rc = xen_evtchn_port_setup(info); + if (rc) + goto err; + out: unmask_evtchn(evtchn); eoi_pirq(irq_get_irq_data(irq)); return 0; + +err: + pr_err("irq%d: Failed to set port to irq mapping (%d)\n", irq, rc); + xen_evtchn_close(evtchn); + return 0; } static unsigned int startup_pirq(struct irq_data *data) diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c index 46ae0f9f02ad..75fe3d466515 100644 --- a/drivers/xen/xen-pciback/conf_space.c +++ b/drivers/xen/xen-pciback/conf_space.c @@ -16,7 +16,7 @@ #include "conf_space.h" #include "conf_space_quirks.h" -static bool permissive; +bool permissive; module_param(permissive, bool, 0644); /* This is where xen_pcibk_read_config_byte, xen_pcibk_read_config_word, diff --git a/drivers/xen/xen-pciback/conf_space.h b/drivers/xen/xen-pciback/conf_space.h index e56c934ad137..2e1d73d1d5d0 100644 --- a/drivers/xen/xen-pciback/conf_space.h +++ b/drivers/xen/xen-pciback/conf_space.h @@ -64,6 +64,8 @@ struct config_field_entry { void *data; }; +extern bool permissive; + #define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset) /* Add fields to a device - the add_fields macro expects to get a pointer to diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c index c5ee82587e8c..2d7369391472 100644 --- a/drivers/xen/xen-pciback/conf_space_header.c +++ b/drivers/xen/xen-pciback/conf_space_header.c @@ -11,6 +11,10 @@ #include "pciback.h" #include "conf_space.h" +struct pci_cmd_info { + u16 val; +}; + struct pci_bar_info { u32 val; u32 len_val; @@ -20,22 +24,36 @@ struct pci_bar_info { #define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO)) #define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER) -static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data) +/* Bits guests are allowed to control in permissive mode. */ +#define PCI_COMMAND_GUEST (PCI_COMMAND_MASTER|PCI_COMMAND_SPECIAL| \ + PCI_COMMAND_INVALIDATE|PCI_COMMAND_VGA_PALETTE| \ + PCI_COMMAND_WAIT|PCI_COMMAND_FAST_BACK) + +static void *command_init(struct pci_dev *dev, int offset) { - int i; - int ret; - - ret = xen_pcibk_read_config_word(dev, offset, value, data); - if (!pci_is_enabled(dev)) - return ret; - - for (i = 0; i < PCI_ROM_RESOURCE; i++) { - if (dev->resource[i].flags & IORESOURCE_IO) - *value |= PCI_COMMAND_IO; - if (dev->resource[i].flags & IORESOURCE_MEM) - *value |= PCI_COMMAND_MEMORY; + struct pci_cmd_info *cmd = kmalloc(sizeof(*cmd), GFP_KERNEL); + int err; + + if (!cmd) + return ERR_PTR(-ENOMEM); + + err = pci_read_config_word(dev, PCI_COMMAND, &cmd->val); + if (err) { + kfree(cmd); + return ERR_PTR(err); } + return cmd; +} + +static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data) +{ + int ret = pci_read_config_word(dev, offset, value); + const struct pci_cmd_info *cmd = data; + + *value &= PCI_COMMAND_GUEST; + *value |= cmd->val & ~PCI_COMMAND_GUEST; + return ret; } @@ -43,6 +61,8 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data) { struct xen_pcibk_dev_data *dev_data; int err; + u16 val; + struct pci_cmd_info *cmd = data; dev_data = pci_get_drvdata(dev); if (!pci_is_enabled(dev) && is_enable_cmd(value)) { @@ -83,6 +103,19 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data) } } + cmd->val = value; + + if (!permissive && (!dev_data || !dev_data->permissive)) + return 0; + + /* Only allow the guest to control certain bits. */ + err = pci_read_config_word(dev, offset, &val); + if (err || val == value) + return err; + + value &= PCI_COMMAND_GUEST; + value |= val & ~PCI_COMMAND_GUEST; + return pci_write_config_word(dev, offset, value); } @@ -282,6 +315,8 @@ static const struct config_field header_common[] = { { .offset = PCI_COMMAND, .size = 2, + .init = command_init, + .release = bar_release, .u.w.read = command_read, .u.w.write = command_write, }, diff --git a/fs/Kconfig b/fs/Kconfig index 7385e54be4b9..6b40fd8d719e 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -67,6 +67,7 @@ source "fs/quota/Kconfig" source "fs/autofs4/Kconfig" source "fs/fuse/Kconfig" +source "fs/overlayfs/Kconfig" menu "Caches" diff --git a/fs/Makefile b/fs/Makefile index 47ac07bb4acc..b5954404e02d 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -104,6 +104,7 @@ obj-$(CONFIG_QNX6FS_FS) += qnx6/ obj-$(CONFIG_AUTOFS4_FS) += autofs4/ obj-$(CONFIG_ADFS_FS) += adfs/ obj-$(CONFIG_FUSE_FS) += fuse/ +obj-$(CONFIG_OVERLAY_FS) += overlayfs/ obj-$(CONFIG_UDF_FS) += udf/ obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ obj-$(CONFIG_OMFS_FS) += omfs/ diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 529300327f45..5479af6de6ce 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -26,7 +26,7 @@ static int afs_readdir(struct file *file, struct dir_context *ctx); static int afs_d_revalidate(struct dentry *dentry, unsigned int flags); static int afs_d_delete(const struct dentry *dentry); static void afs_d_release(struct dentry *dentry); -static int afs_lookup_filldir(void *_cookie, const char *name, int nlen, +static int afs_lookup_filldir(struct dir_context *ctx, const char *name, int nlen, loff_t fpos, u64 ino, unsigned dtype); static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl); @@ -391,10 +391,11 @@ static int afs_readdir(struct file *file, struct dir_context *ctx) * - if afs_dir_iterate_block() spots this function, it'll pass the FID * uniquifier through dtype */ -static int afs_lookup_filldir(void *_cookie, const char *name, int nlen, - loff_t fpos, u64 ino, unsigned dtype) +static int afs_lookup_filldir(struct dir_context *ctx, const char *name, + int nlen, loff_t fpos, u64 ino, unsigned dtype) { - struct afs_lookup_cookie *cookie = _cookie; + struct afs_lookup_cookie *cookie = + container_of(ctx, struct afs_lookup_cookie, ctx); _enter("{%s,%u},%s,%u,,%llu,%u", cookie->name.name, cookie->name.len, name, nlen, @@ -40,6 +40,7 @@ #include <linux/ramfs.h> #include <linux/percpu-refcount.h> #include <linux/mount.h> +#include <linux/work-simple.h> #include <asm/kmap_types.h> #include <asm/uaccess.h> @@ -110,7 +111,7 @@ struct kioctx { struct page **ring_pages; long nr_pages; - struct work_struct free_work; + struct swork_event free_work; /* * signals when all in-flight requests are done @@ -227,6 +228,7 @@ static int __init aio_setup(void) .mount = aio_mount, .kill_sb = kill_anon_super, }; + BUG_ON(swork_get()); aio_mnt = kern_mount(&aio_fs); if (IS_ERR(aio_mnt)) panic("Failed to create aio fs mount."); @@ -506,9 +508,9 @@ static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb) return cancel(kiocb); } -static void free_ioctx(struct work_struct *work) +static void free_ioctx(struct swork_event *sev) { - struct kioctx *ctx = container_of(work, struct kioctx, free_work); + struct kioctx *ctx = container_of(sev, struct kioctx, free_work); pr_debug("freeing %p\n", ctx); @@ -525,8 +527,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref) if (ctx->requests_done) complete(ctx->requests_done); - INIT_WORK(&ctx->free_work, free_ioctx); - schedule_work(&ctx->free_work); + INIT_SWORK(&ctx->free_work, free_ioctx); + swork_queue(&ctx->free_work); } /* @@ -534,9 +536,9 @@ static void free_ioctx_reqs(struct percpu_ref *ref) * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted - * now it's safe to cancel any that need to be. */ -static void free_ioctx_users(struct percpu_ref *ref) +static void free_ioctx_users_work(struct swork_event *sev) { - struct kioctx *ctx = container_of(ref, struct kioctx, users); + struct kioctx *ctx = container_of(sev, struct kioctx, free_work); struct kiocb *req; spin_lock_irq(&ctx->ctx_lock); @@ -555,6 +557,14 @@ static void free_ioctx_users(struct percpu_ref *ref) percpu_ref_put(&ctx->reqs); } +static void free_ioctx_users(struct percpu_ref *ref) +{ + struct kioctx *ctx = container_of(ref, struct kioctx, users); + + INIT_SWORK(&ctx->free_work, free_ioctx_users_work); + swork_queue(&ctx->free_work); +} + static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) { unsigned i, new_nr; @@ -719,6 +729,9 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) err_cleanup: aio_nr_sub(ctx->max_reqs); err_ctx: + atomic_set(&ctx->dead, 1); + if (ctx->mmap_size) + vm_munmap(ctx->mmap_base, ctx->mmap_size); aio_free_ring(ctx); err: mutex_unlock(&ctx->ring_lock); diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 93de3ba994e7..47bc9e613245 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -80,7 +80,7 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, { int i; -#ifdef CONFIG_DEBUG_LOCK_ALLOC +#if (defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_PREEMPT_RT_BASE)) /* lockdep really cares that we take all of these spinlocks * in the right order. If any of the locks in the path are not * currently blocking, it is going to complain. So, make really @@ -107,7 +107,7 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, } } -#ifdef CONFIG_DEBUG_LOCK_ALLOC +#if (defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_PREEMPT_RT_BASE)) if (held) btrfs_clear_lock_blocking_rw(held, held_rw); #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d2f1c011d73a..73035cf6681a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6940,14 +6940,6 @@ again: goto again; } - if (btrfs_test_opt(root, ENOSPC_DEBUG)) { - static DEFINE_RATELIMIT_STATE(_rs, - DEFAULT_RATELIMIT_INTERVAL * 10, - /*DEFAULT_RATELIMIT_BURST*/ 1); - if (__ratelimit(&_rs)) - WARN(1, KERN_DEBUG - "BTRFS: block rsv returned %d\n", ret); - } try_reserve: ret = reserve_metadata_bytes(root, block_rsv, blocksize, BTRFS_RESERVE_NO_FLUSH); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 0b72006aecbe..96b95e6f9f91 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -682,23 +682,6 @@ out: return ret; } -/* copy of check_sticky in fs/namei.c() -* It's inline, so penalty for filesystems that don't use sticky bit is -* minimal. -*/ -static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode) -{ - kuid_t fsuid = current_fsuid(); - - if (!(dir->i_mode & S_ISVTX)) - return 0; - if (uid_eq(inode->i_uid, fsuid)) - return 0; - if (uid_eq(dir->i_uid, fsuid)) - return 0; - return !capable(CAP_FOWNER); -} - /* copy of may_delete in fs/namei.c() * Check whether we can remove a link victim from directory dir, check * whether the type of victim is right. @@ -734,8 +717,7 @@ static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir) return error; if (IS_APPEND(dir)) return -EPERM; - if (btrfs_check_sticky(dir, victim->d_inode)|| - IS_APPEND(victim->d_inode)|| + if (check_sticky(dir, victim->d_inode) || IS_APPEND(victim->d_inode) || IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode)) return -EPERM; if (isdir) { diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index a7f32bfdd5e7..ec8b6542c8bd 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1235,21 +1235,13 @@ out: } static int insert_orphan_item(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 offset) + struct btrfs_root *root, u64 ino) { int ret; - struct btrfs_path *path; - - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - ret = btrfs_find_item(root, path, BTRFS_ORPHAN_OBJECTID, - offset, BTRFS_ORPHAN_ITEM_KEY, NULL); - if (ret > 0) - ret = btrfs_insert_orphan_item(trans, root, offset); - - btrfs_free_path(path); + ret = btrfs_insert_orphan_item(trans, root, ino); + if (ret == -EEXIST) + ret = 0; return ret; } diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c index 622f4696e484..5b99bafc31d1 100644 --- a/fs/cachefiles/bind.c +++ b/fs/cachefiles/bind.c @@ -124,7 +124,6 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache) /* check parameters */ ret = -EOPNOTSUPP; if (!root->d_inode || - !root->d_inode->i_op || !root->d_inode->i_op->lookup || !root->d_inode->i_op->mkdir || !root->d_inode->i_op->setxattr || diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index ca65f39dc8dc..c0a681705104 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -391,12 +391,12 @@ try_again: path.dentry = dir; path_to_graveyard.mnt = cache->mnt; path_to_graveyard.dentry = cache->graveyard; - ret = security_path_rename(&path, rep, &path_to_graveyard, grave); + ret = security_path_rename(&path, rep, &path_to_graveyard, grave, 0); if (ret < 0) { cachefiles_io_error(cache, "Rename security error %d", ret); } else { ret = vfs_rename(dir->d_inode, rep, - cache->graveyard->d_inode, grave, NULL); + cache->graveyard->d_inode, grave, NULL, 0); if (ret != 0 && ret != -ENOMEM) cachefiles_io_error(cache, "Rename failed with error %d", ret); @@ -779,8 +779,7 @@ struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache, } ret = -EPERM; - if (!subdir->d_inode->i_op || - !subdir->d_inode->i_op->setxattr || + if (!subdir->d_inode->i_op->setxattr || !subdir->d_inode->i_op->getxattr || !subdir->d_inode->i_op->lookup || !subdir->d_inode->i_op->mkdir || diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 0218a9b23b38..40ddb6e93912 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1821,6 +1821,7 @@ refind_writable: cifsFileInfo_put(inv_file); spin_lock(&cifs_file_list_lock); ++refind; + inv_file = NULL; goto refind_writable; } } diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 34a17d425be6..30f3eb5bc022 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -630,7 +630,8 @@ smb2_clone_range(const unsigned int xid, /* No need to change MaxChunks since already set to 1 */ chunk_sizes_updated = true; - } + } else + goto cchunk_out; } cchunk_out: diff --git a/fs/compat.c b/fs/compat.c index 6af20de2c1a3..14da9b327ffa 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -837,10 +837,12 @@ struct compat_readdir_callback { int result; }; -static int compat_fillonedir(void *__buf, const char *name, int namlen, - loff_t offset, u64 ino, unsigned int d_type) +static int compat_fillonedir(struct dir_context *ctx, const char *name, + int namlen, loff_t offset, u64 ino, + unsigned int d_type) { - struct compat_readdir_callback *buf = __buf; + struct compat_readdir_callback *buf = + container_of(ctx, struct compat_readdir_callback, ctx); struct compat_old_linux_dirent __user *dirent; compat_ulong_t d_ino; @@ -905,11 +907,12 @@ struct compat_getdents_callback { int error; }; -static int compat_filldir(void *__buf, const char *name, int namlen, +static int compat_filldir(struct dir_context *ctx, const char *name, int namlen, loff_t offset, u64 ino, unsigned int d_type) { struct compat_linux_dirent __user * dirent; - struct compat_getdents_callback *buf = __buf; + struct compat_getdents_callback *buf = + container_of(ctx, struct compat_getdents_callback, ctx); compat_ulong_t d_ino; int reclen = ALIGN(offsetof(struct compat_linux_dirent, d_name) + namlen + 2, sizeof(compat_long_t)); @@ -991,11 +994,13 @@ struct compat_getdents_callback64 { int error; }; -static int compat_filldir64(void * __buf, const char * name, int namlen, loff_t offset, - u64 ino, unsigned int d_type) +static int compat_filldir64(struct dir_context *ctx, const char *name, + int namlen, loff_t offset, u64 ino, + unsigned int d_type) { struct linux_dirent64 __user *dirent; - struct compat_getdents_callback64 *buf = __buf; + struct compat_getdents_callback64 *buf = + container_of(ctx, struct compat_getdents_callback64, ctx); int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1, sizeof(u64)); u64 off; diff --git a/fs/dcache.c b/fs/dcache.c index 22fdd1674171..cdd820e976f2 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2482,12 +2482,14 @@ static void switch_names(struct dentry *dentry, struct dentry *target) dentry->d_name.name = dentry->d_iname; } else { /* - * Both are internal. Just copy target to dentry + * Both are internal. */ - memcpy(dentry->d_iname, target->d_name.name, - target->d_name.len + 1); - dentry->d_name.len = target->d_name.len; - return; + unsigned int i; + BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long))); + for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) { + swap(((long *) &dentry->d_iname)[i], + ((long *) &target->d_iname)[i]); + } } } swap(dentry->d_name.len, target->d_name.len); @@ -2544,13 +2546,15 @@ static void dentry_unlock_parents_for_move(struct dentry *dentry, * __d_move - move a dentry * @dentry: entry to move * @target: new dentry + * @exchange: exchange the two dentries * * Update the dcache to reflect the move of a file name. Negative * dcache entries should not be moved in this way. Caller must hold * rename_lock, the i_mutex of the source and target directories, * and the sb->s_vfs_rename_mutex if they differ. See lock_rename(). */ -static void __d_move(struct dentry * dentry, struct dentry * target) +static void __d_move(struct dentry *dentry, struct dentry *target, + bool exchange) { if (!dentry->d_inode) printk(KERN_WARNING "VFS: moving negative dcache entry\n"); @@ -2572,8 +2576,15 @@ static void __d_move(struct dentry * dentry, struct dentry * target) __d_drop(dentry); __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash)); - /* Unhash the target: dput() will then get rid of it */ + /* + * Unhash the target (d_delete() is not usable here). If exchanging + * the two dentries, then rehash onto the other's hash queue. + */ __d_drop(target); + if (exchange) { + __d_rehash(target, + d_hash(dentry->d_parent, dentry->d_name.hash)); + } list_del(&dentry->d_u.d_child); list_del(&target->d_u.d_child); @@ -2600,6 +2611,8 @@ static void __d_move(struct dentry * dentry, struct dentry * target) write_seqcount_end(&dentry->d_seq); dentry_unlock_parents_for_move(dentry, target); + if (exchange) + fsnotify_d_move(target); spin_unlock(&target->d_lock); fsnotify_d_move(dentry); spin_unlock(&dentry->d_lock); @@ -2617,11 +2630,30 @@ static void __d_move(struct dentry * dentry, struct dentry * target) void d_move(struct dentry *dentry, struct dentry *target) { write_seqlock(&rename_lock); - __d_move(dentry, target); + __d_move(dentry, target, false); write_sequnlock(&rename_lock); } EXPORT_SYMBOL(d_move); +/* + * d_exchange - exchange two dentries + * @dentry1: first dentry + * @dentry2: second dentry + */ +void d_exchange(struct dentry *dentry1, struct dentry *dentry2) +{ + write_seqlock(&rename_lock); + + WARN_ON(!dentry1->d_inode); + WARN_ON(!dentry2->d_inode); + WARN_ON(IS_ROOT(dentry1)); + WARN_ON(IS_ROOT(dentry2)); + + __d_move(dentry1, dentry2, true); + + write_sequnlock(&rename_lock); +} + /** * d_ancestor - search for an ancestor * @p1: ancestor dentry @@ -2669,7 +2701,7 @@ static struct dentry *__d_unalias(struct inode *inode, m2 = &alias->d_parent->d_inode->i_mutex; out_unalias: if (likely(!d_mountpoint(alias))) { - __d_move(alias, dentry); + __d_move(alias, dentry, false); ret = alias; } out_err: diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 03df50211c48..121a94876d40 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -76,11 +76,11 @@ struct ecryptfs_getdents_callback { /* Inspired by generic filldir in fs/readdir.c */ static int -ecryptfs_filldir(void *dirent, const char *lower_name, int lower_namelen, - loff_t offset, u64 ino, unsigned int d_type) +ecryptfs_filldir(struct dir_context *ctx, const char *lower_name, + int lower_namelen, loff_t offset, u64 ino, unsigned int d_type) { struct ecryptfs_getdents_callback *buf = - (struct ecryptfs_getdents_callback *)dirent; + container_of(ctx, struct ecryptfs_getdents_callback, ctx); size_t name_size; char *name; int rc; diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index a85ceb7c91bc..57ee4c53b4f8 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -641,7 +641,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, } rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry, lower_new_dir_dentry->d_inode, lower_new_dentry, - NULL); + NULL, 0); if (rc) goto out_lock; if (target_inode) diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 34eb8433d93f..d9eb84bda559 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -576,6 +576,13 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags s->s_maxbytes = path.dentry->d_sb->s_maxbytes; s->s_blocksize = path.dentry->d_sb->s_blocksize; s->s_magic = ECRYPTFS_SUPER_MAGIC; + s->s_stack_depth = path.dentry->d_sb->s_stack_depth + 1; + + rc = -EINVAL; + if (s->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { + pr_err("eCryptfs: maximum fs stacking depth exceeded\n"); + goto out_free; + } inode = ecryptfs_get_inode(path.dentry->d_inode, s); rc = PTR_ERR(inode); diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 48a359dd286e..59d339c417c9 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -241,10 +241,11 @@ struct getdents_callback { * A rather strange filldir function to capture * the name matching the specified inode number. */ -static int filldir_one(void * __buf, const char * name, int len, +static int filldir_one(struct dir_context *ctx, const char *name, int len, loff_t pos, u64 ino, unsigned int d_type) { - struct getdents_callback *buf = __buf; + struct getdents_callback *buf = + container_of(ctx, struct getdents_callback, ctx); int result = 0; buf->sequence++; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 2dcbfb6245d8..98de4b391e79 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -3191,6 +3191,16 @@ end_rename: return retval; } +static int ext4_rename2(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + if (flags & ~RENAME_NOREPLACE) + return -EINVAL; + + return ext4_rename(old_dir, old_dentry, new_dir, new_dentry); +} + /* * directories can handle most operations... */ @@ -3204,7 +3214,7 @@ const struct inode_operations ext4_dir_inode_operations = { .rmdir = ext4_rmdir, .mknod = ext4_mknod, .tmpfile = ext4_tmpfile, - .rename = ext4_rename, + .rename2 = ext4_rename2, .setattr = ext4_setattr, .setxattr = generic_setxattr, .getxattr = generic_getxattr, diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 3963ede84eb0..c5d6bb939d19 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -702,10 +702,11 @@ static int fat_readdir(struct file *file, struct dir_context *ctx) } #define FAT_IOCTL_FILLDIR_FUNC(func, dirent_type) \ -static int func(void *__buf, const char *name, int name_len, \ +static int func(struct dir_context *ctx, const char *name, int name_len, \ loff_t offset, u64 ino, unsigned int d_type) \ { \ - struct fat_ioctl_filldir_callback *buf = __buf; \ + struct fat_ioctl_filldir_callback *buf = \ + container_of(ctx, struct fat_ioctl_filldir_callback, ctx); \ struct dirent_type __user *d1 = buf->dirent; \ struct dirent_type __user *d2 = d1 + 1; \ \ diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 6eb13c621a14..499155ca3e84 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -819,8 +819,8 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) newpage = buf->page; - if (WARN_ON(!PageUptodate(newpage))) - return -EIO; + if (!PageUptodate(newpage)) + SetPageUptodate(newpage); ClearPageMappedToDisk(newpage); @@ -1726,6 +1726,9 @@ copy_finish: static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, unsigned int size, struct fuse_copy_state *cs) { + /* Don't try to move pages (yet) */ + cs->move_pages = 0; + switch (code) { case FUSE_NOTIFY_POLL: return fuse_notify_poll(fc, size, cs); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 342f0239fcbf..33dec83eccfb 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -680,6 +680,14 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry, return create_new_entry(fc, req, dir, entry, S_IFLNK); } +static inline void fuse_update_ctime(struct inode *inode) +{ + if (!IS_NOCMTIME(inode)) { + inode->i_ctime = current_fs_time(inode->i_sb); + mark_inode_dirty_sync(inode); + } +} + static int fuse_unlink(struct inode *dir, struct dentry *entry) { int err; @@ -714,6 +722,7 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) fuse_invalidate_attr(inode); fuse_invalidate_attr(dir); fuse_invalidate_entry_cache(entry); + fuse_update_ctime(inode); } else if (err == -EINTR) fuse_invalidate_entry(entry); return err; @@ -744,23 +753,26 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) return err; } -static int fuse_rename(struct inode *olddir, struct dentry *oldent, - struct inode *newdir, struct dentry *newent) +static int fuse_rename_common(struct inode *olddir, struct dentry *oldent, + struct inode *newdir, struct dentry *newent, + unsigned int flags, int opcode, size_t argsize) { int err; - struct fuse_rename_in inarg; + struct fuse_rename2_in inarg; struct fuse_conn *fc = get_fuse_conn(olddir); - struct fuse_req *req = fuse_get_req_nopages(fc); + struct fuse_req *req; + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) return PTR_ERR(req); - memset(&inarg, 0, sizeof(inarg)); + memset(&inarg, 0, argsize); inarg.newdir = get_node_id(newdir); - req->in.h.opcode = FUSE_RENAME; + inarg.flags = flags; + req->in.h.opcode = opcode; req->in.h.nodeid = get_node_id(olddir); req->in.numargs = 3; - req->in.args[0].size = sizeof(inarg); + req->in.args[0].size = argsize; req->in.args[0].value = &inarg; req->in.args[1].size = oldent->d_name.len + 1; req->in.args[1].value = oldent->d_name.name; @@ -772,15 +784,22 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent, if (!err) { /* ctime changes */ fuse_invalidate_attr(oldent->d_inode); + fuse_update_ctime(oldent->d_inode); + + if (flags & RENAME_EXCHANGE) { + fuse_invalidate_attr(newent->d_inode); + fuse_update_ctime(newent->d_inode); + } fuse_invalidate_attr(olddir); if (olddir != newdir) fuse_invalidate_attr(newdir); /* newent will end up negative */ - if (newent->d_inode) { + if (!(flags & RENAME_EXCHANGE) && newent->d_inode) { fuse_invalidate_attr(newent->d_inode); fuse_invalidate_entry_cache(newent); + fuse_update_ctime(newent->d_inode); } } else if (err == -EINTR) { /* If request was interrupted, DEITY only knows if the @@ -796,6 +815,36 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent, return err; } +static int fuse_rename2(struct inode *olddir, struct dentry *oldent, + struct inode *newdir, struct dentry *newent, + unsigned int flags) +{ + struct fuse_conn *fc = get_fuse_conn(olddir); + int err; + + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) + return -EINVAL; + + if (flags) { + if (fc->no_rename2 || fc->minor < 23) + return -EINVAL; + + err = fuse_rename_common(olddir, oldent, newdir, newent, flags, + FUSE_RENAME2, + sizeof(struct fuse_rename2_in)); + if (err == -ENOSYS) { + fc->no_rename2 = 1; + err = -EINVAL; + } + } else { + err = fuse_rename_common(olddir, oldent, newdir, newent, 0, + FUSE_RENAME, + sizeof(struct fuse_rename_in)); + } + + return err; +} + static int fuse_link(struct dentry *entry, struct inode *newdir, struct dentry *newent) { @@ -830,6 +879,7 @@ static int fuse_link(struct dentry *entry, struct inode *newdir, inc_nlink(inode); spin_unlock(&fc->lock); fuse_invalidate_attr(inode); + fuse_update_ctime(inode); } else if (err == -EINTR) { fuse_invalidate_attr(inode); } @@ -840,6 +890,16 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr, struct kstat *stat) { unsigned int blkbits; + struct fuse_conn *fc = get_fuse_conn(inode); + + /* see the comment in fuse_change_attributes() */ + if (fc->writeback_cache && S_ISREG(inode->i_mode)) { + attr->size = i_size_read(inode); + attr->mtime = inode->i_mtime.tv_sec; + attr->mtimensec = inode->i_mtime.tv_nsec; + attr->ctime = inode->i_ctime.tv_sec; + attr->ctimensec = inode->i_ctime.tv_nsec; + } stat->dev = inode->i_sb->s_dev; stat->ino = attr->ino; @@ -1478,12 +1538,16 @@ static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd, FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR); } -static bool update_mtime(unsigned ivalid) +static bool update_mtime(unsigned ivalid, bool trust_local_mtime) { /* Always update if mtime is explicitly set */ if (ivalid & ATTR_MTIME_SET) return true; + /* Or if kernel i_mtime is the official one */ + if (trust_local_mtime) + return true; + /* If it's an open(O_TRUNC) or an ftruncate(), don't update */ if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE))) return false; @@ -1492,7 +1556,8 @@ static bool update_mtime(unsigned ivalid) return true; } -static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg) +static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg, + bool trust_local_mtime) { unsigned ivalid = iattr->ia_valid; @@ -1511,11 +1576,11 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg) if (!(ivalid & ATTR_ATIME_SET)) arg->valid |= FATTR_ATIME_NOW; } - if ((ivalid & ATTR_MTIME) && update_mtime(ivalid)) { + if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_mtime)) { arg->valid |= FATTR_MTIME; arg->mtime = iattr->ia_mtime.tv_sec; arg->mtimensec = iattr->ia_mtime.tv_nsec; - if (!(ivalid & ATTR_MTIME_SET)) + if (!(ivalid & ATTR_MTIME_SET) && !trust_local_mtime) arg->valid |= FATTR_MTIME_NOW; } } @@ -1564,6 +1629,63 @@ void fuse_release_nowrite(struct inode *inode) spin_unlock(&fc->lock); } +static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_req *req, + struct inode *inode, + struct fuse_setattr_in *inarg_p, + struct fuse_attr_out *outarg_p) +{ + req->in.h.opcode = FUSE_SETATTR; + req->in.h.nodeid = get_node_id(inode); + req->in.numargs = 1; + req->in.args[0].size = sizeof(*inarg_p); + req->in.args[0].value = inarg_p; + req->out.numargs = 1; + if (fc->minor < 9) + req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE; + else + req->out.args[0].size = sizeof(*outarg_p); + req->out.args[0].value = outarg_p; +} + +/* + * Flush inode->i_mtime to the server + */ +int fuse_flush_mtime(struct file *file, bool nofail) +{ + struct inode *inode = file->f_mapping->host; + struct fuse_inode *fi = get_fuse_inode(inode); + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_req *req = NULL; + struct fuse_setattr_in inarg; + struct fuse_attr_out outarg; + int err; + + if (nofail) { + req = fuse_get_req_nofail_nopages(fc, file); + } else { + req = fuse_get_req_nopages(fc); + if (IS_ERR(req)) + return PTR_ERR(req); + } + + memset(&inarg, 0, sizeof(inarg)); + memset(&outarg, 0, sizeof(outarg)); + + inarg.valid |= FATTR_MTIME; + inarg.mtime = inode->i_mtime.tv_sec; + inarg.mtimensec = inode->i_mtime.tv_nsec; + + fuse_setattr_fill(fc, req, inode, &inarg, &outarg); + fuse_request_send(fc, req); + err = req->out.h.error; + fuse_put_request(fc, req); + + if (!err) + clear_bit(FUSE_I_MTIME_DIRTY, &fi->state); + + return err; +} + /* * Set attributes, and at the same time refresh them. * @@ -1581,8 +1703,10 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, struct fuse_setattr_in inarg; struct fuse_attr_out outarg; bool is_truncate = false; + bool is_wb = fc->writeback_cache; loff_t oldsize; int err; + bool trust_local_mtime = is_wb && S_ISREG(inode->i_mode); if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) attr->ia_valid |= ATTR_FORCE; @@ -1611,7 +1735,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, memset(&inarg, 0, sizeof(inarg)); memset(&outarg, 0, sizeof(outarg)); - iattr_to_fattr(attr, &inarg); + iattr_to_fattr(attr, &inarg, trust_local_mtime); if (file) { struct fuse_file *ff = file->private_data; inarg.valid |= FATTR_FH; @@ -1622,17 +1746,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, inarg.valid |= FATTR_LOCKOWNER; inarg.lock_owner = fuse_lock_owner_id(fc, current->files); } - req->in.h.opcode = FUSE_SETATTR; - req->in.h.nodeid = get_node_id(inode); - req->in.numargs = 1; - req->in.args[0].size = sizeof(inarg); - req->in.args[0].value = &inarg; - req->out.numargs = 1; - if (fc->minor < 9) - req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE; - else - req->out.args[0].size = sizeof(outarg); - req->out.args[0].value = &outarg; + fuse_setattr_fill(fc, req, inode, &inarg, &outarg); fuse_request_send(fc, req); err = req->out.h.error; fuse_put_request(fc, req); @@ -1649,10 +1763,18 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, } spin_lock(&fc->lock); + /* the kernel maintains i_mtime locally */ + if (trust_local_mtime && (attr->ia_valid & ATTR_MTIME)) { + inode->i_mtime = attr->ia_mtime; + clear_bit(FUSE_I_MTIME_DIRTY, &fi->state); + } + fuse_change_attributes_common(inode, &outarg.attr, attr_timeout(&outarg)); oldsize = inode->i_size; - i_size_write(inode, outarg.attr.size); + /* see the comment in fuse_change_attributes() */ + if (!is_wb || is_truncate || !S_ISREG(inode->i_mode)) + i_size_write(inode, outarg.attr.size); if (is_truncate) { /* NOTE: this may release/reacquire fc->lock */ @@ -1664,7 +1786,8 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, * Only call invalidate_inode_pages2() after removing * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock. */ - if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) { + if ((is_truncate || !is_wb) && + S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) { truncate_pagecache(inode, outarg.attr.size); invalidate_inode_pages2(inode->i_mapping); } @@ -1740,8 +1863,10 @@ static int fuse_setxattr(struct dentry *entry, const char *name, fc->no_setxattr = 1; err = -EOPNOTSUPP; } - if (!err) + if (!err) { fuse_invalidate_attr(inode); + fuse_update_ctime(inode); + } return err; } @@ -1871,18 +1996,31 @@ static int fuse_removexattr(struct dentry *entry, const char *name) fc->no_removexattr = 1; err = -EOPNOTSUPP; } - if (!err) + if (!err) { fuse_invalidate_attr(inode); + fuse_update_ctime(inode); + } return err; } +static int fuse_update_time(struct inode *inode, struct timespec *now, + int flags) +{ + if (flags & S_MTIME) { + inode->i_mtime = *now; + set_bit(FUSE_I_MTIME_DIRTY, &get_fuse_inode(inode)->state); + BUG_ON(!S_ISREG(inode->i_mode)); + } + return 0; +} + static const struct inode_operations fuse_dir_inode_operations = { .lookup = fuse_lookup, .mkdir = fuse_mkdir, .symlink = fuse_symlink, .unlink = fuse_unlink, .rmdir = fuse_rmdir, - .rename = fuse_rename, + .rename2 = fuse_rename2, .link = fuse_link, .setattr = fuse_setattr, .create = fuse_create, @@ -1915,6 +2053,7 @@ static const struct inode_operations fuse_common_inode_operations = { .getxattr = fuse_getxattr, .listxattr = fuse_listxattr, .removexattr = fuse_removexattr, + .update_time = fuse_update_time, }; static const struct inode_operations fuse_symlink_inode_operations = { diff --git a/fs/fuse/file.c b/fs/fuse/file.c index d8a60270581c..5916dc51599b 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -188,6 +188,22 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, } EXPORT_SYMBOL_GPL(fuse_do_open); +static void fuse_link_write_file(struct file *file) +{ + struct inode *inode = file_inode(file); + struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); + struct fuse_file *ff = file->private_data; + /* + * file may be written through mmap, so chain it onto the + * inodes's write_file list + */ + spin_lock(&fc->lock); + if (list_empty(&ff->write_entry)) + list_add(&ff->write_entry, &fi->write_files); + spin_unlock(&fc->lock); +} + void fuse_finish_open(struct inode *inode, struct file *file) { struct fuse_file *ff = file->private_data; @@ -292,6 +308,9 @@ static int fuse_open(struct inode *inode, struct file *file) static int fuse_release(struct inode *inode, struct file *file) { + if (test_bit(FUSE_I_MTIME_DIRTY, &get_fuse_inode(inode)->state)) + fuse_flush_mtime(file, true); + fuse_release_common(file, FUSE_RELEASE); /* return value is ignored by VFS */ @@ -459,6 +478,12 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end, fuse_sync_writes(inode); + if (test_bit(FUSE_I_MTIME_DIRTY, &get_fuse_inode(inode)->state)) { + int err = fuse_flush_mtime(file, false); + if (err) + goto out; + } + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) { err = PTR_ERR(req); @@ -655,6 +680,32 @@ static void fuse_read_update_size(struct inode *inode, loff_t size, spin_unlock(&fc->lock); } +static void fuse_short_read(struct fuse_req *req, struct inode *inode, + u64 attr_ver) +{ + size_t num_read = req->out.args[0].size; + struct fuse_conn *fc = get_fuse_conn(inode); + + if (fc->writeback_cache) { + /* + * A hole in a file. Some data after the hole are in page cache, + * but have not reached the client fs yet. So, the hole is not + * present there. + */ + int i; + int start_idx = num_read >> PAGE_CACHE_SHIFT; + size_t off = num_read & (PAGE_CACHE_SIZE - 1); + + for (i = start_idx; i < req->num_pages; i++) { + zero_user_segment(req->pages[i], off, PAGE_CACHE_SIZE); + off = 0; + } + } else { + loff_t pos = page_offset(req->pages[0]) + num_read; + fuse_read_update_size(inode, pos, attr_ver); + } +} + static int fuse_readpage(struct file *file, struct page *page) { struct fuse_io_priv io = { .async = 0, .file = file }; @@ -692,18 +743,18 @@ static int fuse_readpage(struct file *file, struct page *page) req->page_descs[0].length = count; num_read = fuse_send_read(req, &io, pos, count, NULL); err = req->out.h.error; - fuse_put_request(fc, req); if (!err) { /* * Short read means EOF. If file size is larger, truncate it */ if (num_read < count) - fuse_read_update_size(inode, pos + num_read, attr_ver); + fuse_short_read(req, inode, attr_ver); SetPageUptodate(page); } + fuse_put_request(fc, req); fuse_invalidate_atime(inode); out: unlock_page(page); @@ -726,13 +777,9 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req) /* * Short read means EOF. If file size is larger, truncate it */ - if (!req->out.h.error && num_read < count) { - loff_t pos; + if (!req->out.h.error && num_read < count) + fuse_short_read(req, inode, req->misc.read.attr_ver); - pos = page_offset(req->pages[0]) + num_read; - fuse_read_update_size(inode, pos, - req->misc.read.attr_ver); - } fuse_invalidate_atime(inode); } @@ -922,16 +969,21 @@ static size_t fuse_send_write(struct fuse_req *req, struct fuse_io_priv *io, return req->misc.write.out.size; } -void fuse_write_update_size(struct inode *inode, loff_t pos) +bool fuse_write_update_size(struct inode *inode, loff_t pos) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); + bool ret = false; spin_lock(&fc->lock); fi->attr_version = ++fc->attr_version; - if (pos > inode->i_size) + if (pos > inode->i_size) { i_size_write(inode, pos); + ret = true; + } spin_unlock(&fc->lock); + + return ret; } static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file, @@ -1942,20 +1994,9 @@ static const struct vm_operations_struct fuse_file_vm_ops = { static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) { - if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) { - struct inode *inode = file_inode(file); - struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_inode *fi = get_fuse_inode(inode); - struct fuse_file *ff = file->private_data; - /* - * file may be written through mmap, so chain it onto the - * inodes's write_file list - */ - spin_lock(&fc->lock); - if (list_empty(&ff->write_entry)) - list_add(&ff->write_entry, &fi->write_files); - spin_unlock(&fc->lock); - } + if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) + fuse_link_write_file(file); + file_accessed(file); vma->vm_ops = &fuse_file_vm_ops; return 0; @@ -2846,8 +2887,16 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, goto out; /* we could have extended the file */ - if (!(mode & FALLOC_FL_KEEP_SIZE)) - fuse_write_update_size(inode, offset + length); + if (!(mode & FALLOC_FL_KEEP_SIZE)) { + bool changed = fuse_write_update_size(inode, offset + length); + + if (changed && fc->writeback_cache) { + struct fuse_inode *fi = get_fuse_inode(inode); + + inode->i_mtime = current_fs_time(inode->i_sb); + set_bit(FUSE_I_MTIME_DIRTY, &fi->state); + } + } if (mode & FALLOC_FL_PUNCH_HOLE) truncate_pagecache_range(inode, offset, offset + length - 1); diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 2da5db2c8bdb..7cc58c976780 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -119,6 +119,8 @@ enum { FUSE_I_INIT_RDPLUS, /** An operation changing file size is in progress */ FUSE_I_SIZE_UNSTABLE, + /** i_mtime has been updated locally; a flush to userspace needed */ + FUSE_I_MTIME_DIRTY, }; struct fuse_conn; @@ -480,6 +482,9 @@ struct fuse_conn { /** Set if bdi is valid */ unsigned bdi_initialized:1; + /** write-back cache policy (default is write-through) */ + unsigned writeback_cache:1; + /* * The following bitfields are only for optimization purposes * and hence races in setting them will not cause malfunction @@ -539,6 +544,9 @@ struct fuse_conn { /** Is fallocate not implemented by fs? */ unsigned no_fallocate:1; + /** Is rename with flags implemented by fs? */ + unsigned no_rename2:1; + /** Use enhanced/automatic page cache invalidation. */ unsigned auto_inval_data:1; @@ -873,7 +881,9 @@ long fuse_ioctl_common(struct file *file, unsigned int cmd, unsigned fuse_file_poll(struct file *file, poll_table *wait); int fuse_dev_release(struct inode *inode, struct file *file); -void fuse_write_update_size(struct inode *inode, loff_t pos); +bool fuse_write_update_size(struct inode *inode, loff_t pos); + +int fuse_flush_mtime(struct file *file, bool nofail); int fuse_do_setattr(struct inode *inode, struct iattr *attr, struct file *file); diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 73f6bcb44ea8..8fe0b485908c 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -170,10 +170,13 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, inode->i_blocks = attr->blocks; inode->i_atime.tv_sec = attr->atime; inode->i_atime.tv_nsec = attr->atimensec; - inode->i_mtime.tv_sec = attr->mtime; - inode->i_mtime.tv_nsec = attr->mtimensec; - inode->i_ctime.tv_sec = attr->ctime; - inode->i_ctime.tv_nsec = attr->ctimensec; + /* mtime from server may be stale due to local buffered write */ + if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) { + inode->i_mtime.tv_sec = attr->mtime; + inode->i_mtime.tv_nsec = attr->mtimensec; + inode->i_ctime.tv_sec = attr->ctime; + inode->i_ctime.tv_nsec = attr->ctimensec; + } if (attr->blksize != 0) inode->i_blkbits = ilog2(attr->blksize); @@ -197,6 +200,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); + bool is_wb = fc->writeback_cache; loff_t oldsize; struct timespec old_mtime; @@ -211,10 +215,16 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, fuse_change_attributes_common(inode, attr, attr_valid); oldsize = inode->i_size; - i_size_write(inode, attr->size); + /* + * In case of writeback_cache enabled, the cached writes beyond EOF + * extend local i_size without keeping userspace server in sync. So, + * attr->size coming from server can be stale. We cannot trust it. + */ + if (!is_wb || !S_ISREG(inode->i_mode)) + i_size_write(inode, attr->size); spin_unlock(&fc->lock); - if (S_ISREG(inode->i_mode)) { + if (!is_wb && S_ISREG(inode->i_mode)) { bool inval = false; if (oldsize != attr->size) { @@ -243,6 +253,10 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr) { inode->i_mode = attr->mode & S_IFMT; inode->i_size = attr->size; + inode->i_mtime.tv_sec = attr->mtime; + inode->i_mtime.tv_nsec = attr->mtimensec; + inode->i_ctime.tv_sec = attr->ctime; + inode->i_ctime.tv_nsec = attr->ctimensec; if (S_ISREG(inode->i_mode)) { fuse_init_common(inode); fuse_init_file_inode(inode); @@ -289,7 +303,9 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, return NULL; if ((inode->i_state & I_NEW)) { - inode->i_flags |= S_NOATIME|S_NOCMTIME; + inode->i_flags |= S_NOATIME; + if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) + inode->i_flags |= S_NOCMTIME; inode->i_generation = generation; inode->i_data.backing_dev_info = &fc->bdi; fuse_init_inode(inode, attr); diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index 8b9b3775e2e7..c41d255b6a7b 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c @@ -69,10 +69,12 @@ struct get_name_filldir { char *name; }; -static int get_name_filldir(void *opaque, const char *name, int length, - loff_t offset, u64 inum, unsigned int type) +static int get_name_filldir(struct dir_context *ctx, const char *name, + int length, loff_t offset, u64 inum, + unsigned int type) { - struct get_name_filldir *gnfd = opaque; + struct get_name_filldir *gnfd = + container_of(ctx, struct get_name_filldir, ctx); if (inum != gnfd->inum.no_addr) return 0; diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c index 6e560d56094b..754fdf8c6356 100644 --- a/fs/hfsplus/brec.c +++ b/fs/hfsplus/brec.c @@ -131,13 +131,16 @@ skip: hfs_bnode_write(node, entry, data_off + key_len, entry_len); hfs_bnode_dump(node); - if (new_node) { - /* update parent key if we inserted a key - * at the start of the first node - */ - if (!rec && new_node != node) - hfs_brec_update_parent(fd); + /* + * update parent key if we inserted a key + * at the start of the node and it is not the new node + */ + if (!rec && new_node != node) { + hfs_bnode_read_key(node, fd->search_key, data_off + size); + hfs_brec_update_parent(fd); + } + if (new_node) { hfs_bnode_put(fd->bnode); if (!new_node->parent) { hfs_btree_inc_height(tree); @@ -168,9 +171,6 @@ skip: goto again; } - if (!rec) - hfs_brec_update_parent(fd); - return 0; } @@ -370,6 +370,8 @@ again: if (IS_ERR(parent)) return PTR_ERR(parent); __hfs_brec_find(parent, fd, hfs_find_rec_by_key); + if (fd->record < 0) + return -ENOENT; hfs_bnode_dump(parent); rec = fd->record; diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index 4338ff32959d..5f2755117ce7 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c @@ -548,10 +548,11 @@ struct hppfs_dirent { struct dentry *dentry; }; -static int hppfs_filldir(void *d, const char *name, int size, +static int hppfs_filldir(struct dir_context *ctx, const char *name, int size, loff_t offset, u64 inode, unsigned int type) { - struct hppfs_dirent *dirent = d; + struct hppfs_dirent *dirent = + container_of(ctx, struct hppfs_dirent, ctx); if (file_removed(dirent->dentry, name)) return 0; diff --git a/fs/internal.h b/fs/internal.h index 465742407466..dd41b12c13f1 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -42,7 +42,6 @@ extern void __init chrdev_init(void); /* * namei.c */ -extern int __inode_permission(struct inode *, int); extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *); extern int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct path *); @@ -135,12 +134,6 @@ extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *); extern int rw_verify_area(int, struct file *, const loff_t *, size_t); /* - * splice.c - */ -extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, - loff_t *opos, size_t len, unsigned int flags); - -/* * pipe.c */ extern const struct file_operations pipefifo_fops; diff --git a/fs/namei.c b/fs/namei.c index 0dd72c8e65fd..0fbf1504fb63 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -415,6 +415,7 @@ int __inode_permission(struct inode *inode, int mask) return security_inode_permission(inode, mask); } +EXPORT_SYMBOL(__inode_permission); /** * sb_permission - Check superblock-level permissions @@ -2359,22 +2360,17 @@ kern_path_mountpoint(int dfd, const char *name, struct path *path, } EXPORT_SYMBOL(kern_path_mountpoint); -/* - * It's inline, so penalty for filesystems that don't use sticky bit is - * minimal. - */ -static inline int check_sticky(struct inode *dir, struct inode *inode) +int __check_sticky(struct inode *dir, struct inode *inode) { kuid_t fsuid = current_fsuid(); - if (!(dir->i_mode & S_ISVTX)) - return 0; if (uid_eq(inode->i_uid, fsuid)) return 0; if (uid_eq(dir->i_uid, fsuid)) return 0; return !capable_wrt_inode_uidgid(inode, CAP_FOWNER); } +EXPORT_SYMBOL(__check_sticky); /* * Check whether we can remove a link victim from directory dir, check @@ -2477,7 +2473,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) } mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); + mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT2); return NULL; } @@ -3037,9 +3033,12 @@ finish_open_created: error = may_open(&nd->path, acc_mode, open_flag); if (error) goto out; - file->f_path.mnt = nd->path.mnt; - error = finish_open(file, nd->path.dentry, NULL, opened); - if (error) { + + BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ + error = vfs_open(&nd->path, file, current_cred()); + if (!error) { + *opened |= FILE_OPENED; + } else { if (error == -EOPENSTALE) goto stale_open; goto out; @@ -3977,7 +3976,28 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname return sys_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0); } -/* +/** + * vfs_rename - rename a filesystem object + * @old_dir: parent of source + * @old_dentry: source + * @new_dir: parent of destination + * @new_dentry: destination + * @delegated_inode: returns an inode needing a delegation break + * @flags: rename flags + * + * The caller must hold multiple mutexes--see lock_rename()). + * + * If vfs_rename discovers a delegation in need of breaking at either + * the source or destination, it will return -EWOULDBLOCK and return a + * reference to the inode in delegated_inode. The caller should then + * break the delegation and retry. Because breaking a delegation may + * take a long time, the caller should drop all locks before doing + * so. + * + * Alternatively, a caller may pass NULL for delegated_inode. This may + * be appropriate for callers that expect the underlying filesystem not + * to be NFS exported. + * * The worst of all namespace operations - renaming directory. "Perverted" * doesn't even start to describe it. Somebody in UCB had a heck of a trip... * Problems: @@ -4005,163 +4025,140 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname * ->i_mutex on parents, which works but leads to some truly excessive * locking]. */ -static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) +int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + struct inode **delegated_inode, unsigned int flags) { - int error = 0; + int error; + bool is_dir = d_is_dir(old_dentry); + const unsigned char *old_name; + struct inode *source = old_dentry->d_inode; struct inode *target = new_dentry->d_inode; + bool new_is_dir = false; unsigned max_links = new_dir->i_sb->s_max_links; + if (source == target) + return 0; + + error = may_delete(old_dir, old_dentry, is_dir); + if (error) + return error; + + if (!target) { + error = may_create(new_dir, new_dentry); + } else { + new_is_dir = d_is_dir(new_dentry); + + if (!(flags & RENAME_EXCHANGE)) + error = may_delete(new_dir, new_dentry, is_dir); + else + error = may_delete(new_dir, new_dentry, new_is_dir); + } + if (error) + return error; + + if (!old_dir->i_op->rename && !old_dir->i_op->rename2) + return -EPERM; + + if (flags && !old_dir->i_op->rename2) + return -EINVAL; + /* * If we are going to change the parent - check write permissions, * we'll need to flip '..'. */ if (new_dir != old_dir) { - error = inode_permission(old_dentry->d_inode, MAY_WRITE); - if (error) - return error; + if (is_dir) { + error = inode_permission(source, MAY_WRITE); + if (error) + return error; + } + if ((flags & RENAME_EXCHANGE) && new_is_dir) { + error = inode_permission(target, MAY_WRITE); + if (error) + return error; + } } - error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); + error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry, + flags); if (error) return error; + old_name = fsnotify_oldname_init(old_dentry->d_name.name); dget(new_dentry); - if (target) + if (!is_dir || (flags & RENAME_EXCHANGE)) + lock_two_nondirectories(source, target); + else if (target) mutex_lock(&target->i_mutex); error = -EBUSY; if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry)) goto out; - error = -EMLINK; - if (max_links && !target && new_dir != old_dir && - new_dir->i_nlink >= max_links) - goto out; - - if (target) + if (max_links && new_dir != old_dir) { + error = -EMLINK; + if (is_dir && !new_is_dir && new_dir->i_nlink >= max_links) + goto out; + if ((flags & RENAME_EXCHANGE) && !is_dir && new_is_dir && + old_dir->i_nlink >= max_links) + goto out; + } + if (is_dir && !(flags & RENAME_EXCHANGE) && target) shrink_dcache_parent(new_dentry); - error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); - if (error) - goto out; - - if (target) { - target->i_flags |= S_DEAD; - dont_mount(new_dentry); + if (!is_dir) { + error = try_break_deleg(source, delegated_inode); + if (error) + goto out; } -out: - if (target) - mutex_unlock(&target->i_mutex); - dput(new_dentry); - if (!error) - if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) - d_move(old_dentry,new_dentry); - return error; -} - -static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry, - struct inode **delegated_inode) -{ - struct inode *target = new_dentry->d_inode; - struct inode *source = old_dentry->d_inode; - int error; - - error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); - if (error) - return error; - - dget(new_dentry); - lock_two_nondirectories(source, target); - - error = -EBUSY; - if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) - goto out; - - error = try_break_deleg(source, delegated_inode); - if (error) - goto out; - if (target) { + if (target && !new_is_dir) { error = try_break_deleg(target, delegated_inode); if (error) goto out; } - error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); + if (!old_dir->i_op->rename2) { + error = old_dir->i_op->rename(old_dir, old_dentry, + new_dir, new_dentry); + } else { + WARN_ON(old_dir->i_op->rename != NULL); + error = old_dir->i_op->rename2(old_dir, old_dentry, + new_dir, new_dentry, flags); + } if (error) goto out; - if (target) + if (!(flags & RENAME_EXCHANGE) && target) { + if (is_dir) + target->i_flags |= S_DEAD; dont_mount(new_dentry); - if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) - d_move(old_dentry, new_dentry); + } + if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) { + if (!(flags & RENAME_EXCHANGE)) + d_move(old_dentry, new_dentry); + else + d_exchange(old_dentry, new_dentry); + } out: - unlock_two_nondirectories(source, target); + if (!is_dir || (flags & RENAME_EXCHANGE)) + unlock_two_nondirectories(source, target); + else if (target) + mutex_unlock(&target->i_mutex); dput(new_dentry); - return error; -} - -/** - * vfs_rename - rename a filesystem object - * @old_dir: parent of source - * @old_dentry: source - * @new_dir: parent of destination - * @new_dentry: destination - * @delegated_inode: returns an inode needing a delegation break - * - * The caller must hold multiple mutexes--see lock_rename()). - * - * If vfs_rename discovers a delegation in need of breaking at either - * the source or destination, it will return -EWOULDBLOCK and return a - * reference to the inode in delegated_inode. The caller should then - * break the delegation and retry. Because breaking a delegation may - * take a long time, the caller should drop all locks before doing - * so. - * - * Alternatively, a caller may pass NULL for delegated_inode. This may - * be appropriate for callers that expect the underlying filesystem not - * to be NFS exported. - */ -int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry, - struct inode **delegated_inode) -{ - int error; - int is_dir = d_is_dir(old_dentry); - const unsigned char *old_name; - - if (old_dentry->d_inode == new_dentry->d_inode) - return 0; - - error = may_delete(old_dir, old_dentry, is_dir); - if (error) - return error; - - if (!new_dentry->d_inode) - error = may_create(new_dir, new_dentry); - else - error = may_delete(new_dir, new_dentry, is_dir); - if (error) - return error; - - if (!old_dir->i_op->rename) - return -EPERM; - - old_name = fsnotify_oldname_init(old_dentry->d_name.name); - - if (is_dir) - error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); - else - error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,delegated_inode); - if (!error) + if (!error) { fsnotify_move(old_dir, new_dir, old_name, is_dir, - new_dentry->d_inode, old_dentry); + !(flags & RENAME_EXCHANGE) ? target : NULL, old_dentry); + if (flags & RENAME_EXCHANGE) { + fsnotify_move(new_dir, old_dir, old_dentry->d_name.name, + new_is_dir, NULL, new_dentry); + } + } fsnotify_oldname_free(old_name); return error; } -SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, - int, newdfd, const char __user *, newname) +SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, + int, newdfd, const char __user *, newname, unsigned int, flags) { struct dentry *old_dir, *new_dir; struct dentry *old_dentry, *new_dentry; @@ -4173,6 +4170,17 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, unsigned int lookup_flags = 0; bool should_retry = false; int error; + + if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) + return -EINVAL; + + if ((flags & (RENAME_NOREPLACE | RENAME_WHITEOUT)) && + (flags & RENAME_EXCHANGE)) + return -EINVAL; + + if ((flags & RENAME_WHITEOUT) && !capable(CAP_MKNOD)) + return -EPERM; + retry: from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags); if (IS_ERR(from)) { @@ -4196,6 +4204,8 @@ retry: goto exit2; new_dir = newnd.path.dentry; + if (flags & RENAME_NOREPLACE) + error = -EEXIST; if (newnd.last_type != LAST_NORM) goto exit2; @@ -4205,7 +4215,8 @@ retry: oldnd.flags &= ~LOOKUP_PARENT; newnd.flags &= ~LOOKUP_PARENT; - newnd.flags |= LOOKUP_RENAME_TARGET; + if (!(flags & RENAME_EXCHANGE)) + newnd.flags |= LOOKUP_RENAME_TARGET; retry_deleg: trap = lock_rename(new_dir, old_dir); @@ -4218,34 +4229,49 @@ retry_deleg: error = -ENOENT; if (d_is_negative(old_dentry)) goto exit4; + new_dentry = lookup_hash(&newnd); + error = PTR_ERR(new_dentry); + if (IS_ERR(new_dentry)) + goto exit4; + error = -EEXIST; + if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry)) + goto exit5; + if (flags & RENAME_EXCHANGE) { + error = -ENOENT; + if (d_is_negative(new_dentry)) + goto exit5; + + if (!d_is_dir(new_dentry)) { + error = -ENOTDIR; + if (newnd.last.name[newnd.last.len]) + goto exit5; + } + } /* unless the source is a directory trailing slashes give -ENOTDIR */ if (!d_is_dir(old_dentry)) { error = -ENOTDIR; if (oldnd.last.name[oldnd.last.len]) - goto exit4; - if (newnd.last.name[newnd.last.len]) - goto exit4; + goto exit5; + if (!(flags & RENAME_EXCHANGE) && newnd.last.name[newnd.last.len]) + goto exit5; } /* source should not be ancestor of target */ error = -EINVAL; if (old_dentry == trap) - goto exit4; - new_dentry = lookup_hash(&newnd); - error = PTR_ERR(new_dentry); - if (IS_ERR(new_dentry)) - goto exit4; + goto exit5; /* target should not be an ancestor of source */ - error = -ENOTEMPTY; + if (!(flags & RENAME_EXCHANGE)) + error = -ENOTEMPTY; if (new_dentry == trap) goto exit5; error = security_path_rename(&oldnd.path, old_dentry, - &newnd.path, new_dentry); + &newnd.path, new_dentry, flags); if (error) goto exit5; error = vfs_rename(old_dir->d_inode, old_dentry, - new_dir->d_inode, new_dentry, - &delegated_inode); + new_dir->d_inode, new_dentry, + &delegated_inode, flags); exit5: dput(new_dentry); exit4: @@ -4275,16 +4301,34 @@ exit: return error; } +SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, + int, newdfd, const char __user *, newname) +{ + return sys_renameat2(olddfd, oldname, newdfd, newname, 0); +} + SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname) { - return sys_renameat(AT_FDCWD, oldname, AT_FDCWD, newname); + return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0); } -int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link) +int vfs_whiteout(struct inode *dir, struct dentry *dentry) { - int len; + int error = may_create(dir, dentry); + if (error) + return error; - len = PTR_ERR(link); + if (!dir->i_op->mknod) + return -EPERM; + + return dir->i_op->mknod(dir, dentry, + S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV); +} +EXPORT_SYMBOL(vfs_whiteout); + +int readlink_copy(char __user *buffer, int buflen, const char *link) +{ + int len = PTR_ERR(link); if (IS_ERR(link)) goto out; @@ -4313,7 +4357,7 @@ int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen) if (IS_ERR(cookie)) return PTR_ERR(cookie); - res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd)); + res = readlink_copy(buffer, buflen, nd_get_link(&nd)); if (dentry->d_inode->i_op->put_link) dentry->d_inode->i_op->put_link(dentry, &nd, cookie); return res; @@ -4337,8 +4381,7 @@ static char *page_getlink(struct dentry * dentry, struct page **ppage) int page_readlink(struct dentry *dentry, char __user *buffer, int buflen) { struct page *page = NULL; - char *s = page_getlink(dentry, &page); - int res = vfs_readlink(dentry,buffer,buflen,s); + int res = readlink_copy(buffer, buflen, page_getlink(dentry, &page)); if (page) { kunmap(page); page_cache_release(page); @@ -4434,7 +4477,6 @@ EXPORT_SYMBOL(vfs_link); EXPORT_SYMBOL(vfs_mkdir); EXPORT_SYMBOL(vfs_mknod); EXPORT_SYMBOL(generic_permission); -EXPORT_SYMBOL(vfs_readlink); EXPORT_SYMBOL(vfs_rename); EXPORT_SYMBOL(vfs_rmdir); EXPORT_SYMBOL(vfs_symlink); diff --git a/fs/namespace.c b/fs/namespace.c index 7764b1a3bd24..546122367c81 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1590,6 +1590,33 @@ void drop_collected_mounts(struct vfsmount *mnt) namespace_unlock(); } +/** + * clone_private_mount - create a private clone of a path + * + * This creates a new vfsmount, which will be the clone of @path. The new will + * not be attached anywhere in the namespace and will be private (i.e. changes + * to the originating mount won't be propagated into this). + * + * Release with mntput(). + */ +struct vfsmount *clone_private_mount(struct path *path) +{ + struct mount *old_mnt = real_mount(path->mnt); + struct mount *new_mnt; + + if (IS_MNT_UNBINDABLE(old_mnt)) + return ERR_PTR(-EINVAL); + + down_read(&namespace_sem); + new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE); + up_read(&namespace_sem); + if (IS_ERR(new_mnt)) + return ERR_CAST(new_mnt); + + return &new_mnt->mnt; +} +EXPORT_SYMBOL_GPL(clone_private_mount); + int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg, struct vfsmount *root) { diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 9c271f42604a..674a5d529e09 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -244,10 +244,11 @@ struct nfs4_dir_ctx { }; static int -nfsd4_build_namelist(void *arg, const char *name, int namlen, +nfsd4_build_namelist(struct dir_context *__ctx, const char *name, int namlen, loff_t offset, u64 ino, unsigned int d_type) { - struct nfs4_dir_ctx *ctx = arg; + struct nfs4_dir_ctx *ctx = + container_of(__ctx, struct nfs4_dir_ctx, ctx); struct name_list *entry; if (namlen != HEXDIR_LEN - 1) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index eea5ad188984..3953a20db16d 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1703,7 +1703,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry) goto out_dput_new; - host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL); + host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0); if (!host_err) { host_err = commit_metadata(tfhp); if (!host_err) @@ -1808,10 +1808,12 @@ struct readdir_data { int full; }; -static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen, - loff_t offset, u64 ino, unsigned int d_type) +static int nfsd_buffered_filldir(struct dir_context *ctx, const char *name, + int namlen, loff_t offset, u64 ino, + unsigned int d_type) { - struct readdir_data *buf = __buf; + struct readdir_data *buf = + container_of(ctx, struct readdir_data, ctx); struct buffered_dirent *de = (void *)(buf->dirent + buf->used); unsigned int reclen; @@ -1831,7 +1833,7 @@ static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen, return 0; } -static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func, +static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func, struct readdir_cd *cdp, loff_t *offsetp) { struct buffered_dirent *de; @@ -1915,7 +1917,7 @@ static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func, */ __be32 nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, - struct readdir_cd *cdp, filldir_t func) + struct readdir_cd *cdp, nfsd_filldir_t func) { __be32 err; struct file *file; diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index fbe90bdb2214..ea760b17ecbd 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -36,7 +36,7 @@ /* * Callback function for readdir */ -typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int); +typedef int (*nfsd_filldir_t)(void *, const char *, int, loff_t, u64, unsigned); /* nfsd/vfs.c */ int nfsd_racache_init(int); @@ -89,7 +89,7 @@ __be32 nfsd_rename(struct svc_rqst *, __be32 nfsd_unlink(struct svc_rqst *, struct svc_fh *, int type, char *name, int len); __be32 nfsd_readdir(struct svc_rqst *, struct svc_fh *, - loff_t *, struct readdir_cd *, filldir_t); + loff_t *, struct readdir_cd *, nfsd_filldir_t); __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *, struct kstatfs *, int access); diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 5bee81674d53..14538a865102 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1906,6 +1906,7 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci, struct the_nilfs *nilfs) { struct nilfs_inode_info *ii, *n; + int during_mount = !(sci->sc_super->s_flags & MS_ACTIVE); int defer_iput = false; spin_lock(&nilfs->ns_inode_lock); @@ -1918,10 +1919,10 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci, brelse(ii->i_bh); ii->i_bh = NULL; list_del_init(&ii->i_dirty); - if (!ii->vfs_inode.i_nlink) { + if (!ii->vfs_inode.i_nlink || during_mount) { /* - * Defer calling iput() to avoid a deadlock - * over I_SYNC flag for inodes with i_nlink == 0 + * Defer calling iput() to avoid deadlocks if + * i_nlink == 0 or mount is not yet finished. */ list_add_tail(&ii->i_dirty, &sci->sc_iput_queue); defer_iput = true; diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 91a7e85ac8fd..478e14d7be54 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -2073,10 +2073,12 @@ struct ocfs2_empty_dir_priv { unsigned seen_other; unsigned dx_dir; }; -static int ocfs2_empty_dir_filldir(void *priv, const char *name, int name_len, - loff_t pos, u64 ino, unsigned type) +static int ocfs2_empty_dir_filldir(struct dir_context *ctx, const char *name, + int name_len, loff_t pos, u64 ino, + unsigned type) { - struct ocfs2_empty_dir_priv *p = priv; + struct ocfs2_empty_dir_priv *p = + container_of(ctx, struct ocfs2_empty_dir_priv, ctx); /* * Check the positions of "." and ".." records to be sure diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 51632c40e896..7fe30f655aa5 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2391,10 +2391,14 @@ out_dio: /* buffered aio wouldn't have proper lock coverage today */ BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); + if (unlikely(written <= 0)) + goto no_sync; + if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || ((file->f_flags & O_DIRECT) && !direct_io)) { - ret = filemap_fdatawrite_range(file->f_mapping, *ppos, - *ppos + count - 1); + ret = filemap_fdatawrite_range(file->f_mapping, + iocb->ki_pos - written, + iocb->ki_pos - 1); if (ret < 0) written = ret; @@ -2407,10 +2411,12 @@ out_dio: } if (!ret) - ret = filemap_fdatawait_range(file->f_mapping, *ppos, - *ppos + count - 1); + ret = filemap_fdatawait_range(file->f_mapping, + iocb->ki_pos - written, + iocb->ki_pos - 1); } +no_sync: /* * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io * function pointer which is called when o_direct io completes so that diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 44fc3e530c3d..a1b7dca075dd 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -1981,10 +1981,12 @@ struct ocfs2_orphan_filldir_priv { struct ocfs2_super *osb; }; -static int ocfs2_orphan_filldir(void *priv, const char *name, int name_len, - loff_t pos, u64 ino, unsigned type) +static int ocfs2_orphan_filldir(struct dir_context *ctx, const char *name, + int name_len, loff_t pos, u64 ino, + unsigned type) { - struct ocfs2_orphan_filldir_priv *p = priv; + struct ocfs2_orphan_filldir_priv *p = + container_of(ctx, struct ocfs2_orphan_filldir_priv, ctx); struct inode *iter; if (name_len == 1 && !strncmp(".", name, 1)) diff --git a/fs/open.c b/fs/open.c index 2ed7325f713e..c92c6ef8bf6d 100644 --- a/fs/open.c +++ b/fs/open.c @@ -822,8 +822,7 @@ struct file *dentry_open(const struct path *path, int flags, f = get_empty_filp(); if (!IS_ERR(f)) { f->f_flags = flags; - f->f_path = *path; - error = do_dentry_open(f, NULL, cred); + error = vfs_open(path, f, cred); if (!error) { /* from now on we need fput() to dispose of f */ error = open_check_o_direct(f); @@ -840,6 +839,26 @@ struct file *dentry_open(const struct path *path, int flags, } EXPORT_SYMBOL(dentry_open); +/** + * vfs_open - open the file at the given path + * @path: path to open + * @filp: newly allocated file with f_flag initialized + * @cred: credentials to use + */ +int vfs_open(const struct path *path, struct file *filp, + const struct cred *cred) +{ + struct inode *inode = path->dentry->d_inode; + + if (inode->i_op->dentry_open) + return inode->i_op->dentry_open(path->dentry, filp, cred); + else { + filp->f_path = *path; + return do_dentry_open(filp, NULL, cred); + } +} +EXPORT_SYMBOL(vfs_open); + static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op) { int lookup_flags = 0; diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig new file mode 100644 index 000000000000..34355818a2e0 --- /dev/null +++ b/fs/overlayfs/Kconfig @@ -0,0 +1,10 @@ +config OVERLAY_FS + tristate "Overlay filesystem support" + help + An overlay filesystem combines two filesystems - an 'upper' filesystem + and a 'lower' filesystem. When a name exists in both filesystems, the + object in the 'upper' filesystem is visible while the object in the + 'lower' filesystem is either hidden or, in the case of directories, + merged with the 'upper' object. + + For more information see Documentation/filesystems/overlayfs.txt diff --git a/fs/overlayfs/Makefile b/fs/overlayfs/Makefile new file mode 100644 index 000000000000..900daed3e91d --- /dev/null +++ b/fs/overlayfs/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the overlay filesystem. +# + +obj-$(CONFIG_OVERLAY_FS) += overlay.o + +overlay-objs := super.o inode.o dir.o readdir.o copy_up.o diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c new file mode 100644 index 000000000000..24f640441bd9 --- /dev/null +++ b/fs/overlayfs/copy_up.c @@ -0,0 +1,413 @@ +/* + * + * Copyright (C) 2011 Novell Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/file.h> +#include <linux/splice.h> +#include <linux/xattr.h> +#include <linux/security.h> +#include <linux/uaccess.h> +#include <linux/sched.h> +#include <linux/namei.h> +#include "overlayfs.h" + +#define OVL_COPY_UP_CHUNK_SIZE (1 << 20) + +int ovl_copy_xattr(struct dentry *old, struct dentry *new) +{ + ssize_t list_size, size; + char *buf, *name, *value; + int error; + + if (!old->d_inode->i_op->getxattr || + !new->d_inode->i_op->getxattr) + return 0; + + list_size = vfs_listxattr(old, NULL, 0); + if (list_size <= 0) { + if (list_size == -EOPNOTSUPP) + return 0; + return list_size; + } + + buf = kzalloc(list_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + error = -ENOMEM; + value = kmalloc(XATTR_SIZE_MAX, GFP_KERNEL); + if (!value) + goto out; + + list_size = vfs_listxattr(old, buf, list_size); + if (list_size <= 0) { + error = list_size; + goto out_free_value; + } + + for (name = buf; name < (buf + list_size); name += strlen(name) + 1) { + size = vfs_getxattr(old, name, value, XATTR_SIZE_MAX); + if (size <= 0) { + error = size; + goto out_free_value; + } + error = vfs_setxattr(new, name, value, size, 0); + if (error) + goto out_free_value; + } + +out_free_value: + kfree(value); +out: + kfree(buf); + return error; +} + +static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len) +{ + struct file *old_file; + struct file *new_file; + loff_t old_pos = 0; + loff_t new_pos = 0; + int error = 0; + + if (len == 0) + return 0; + + old_file = ovl_path_open(old, O_RDONLY); + if (IS_ERR(old_file)) + return PTR_ERR(old_file); + + new_file = ovl_path_open(new, O_WRONLY); + if (IS_ERR(new_file)) { + error = PTR_ERR(new_file); + goto out_fput; + } + + /* FIXME: copy up sparse files efficiently */ + while (len) { + size_t this_len = OVL_COPY_UP_CHUNK_SIZE; + long bytes; + + if (len < this_len) + this_len = len; + + if (signal_pending_state(TASK_KILLABLE, current)) { + error = -EINTR; + break; + } + + bytes = do_splice_direct(old_file, &old_pos, + new_file, &new_pos, + this_len, SPLICE_F_MOVE); + if (bytes <= 0) { + error = bytes; + break; + } + WARN_ON(old_pos != new_pos); + + len -= bytes; + } + + fput(new_file); +out_fput: + fput(old_file); + return error; +} + +static char *ovl_read_symlink(struct dentry *realdentry) +{ + int res; + char *buf; + struct inode *inode = realdentry->d_inode; + mm_segment_t old_fs; + + res = -EINVAL; + if (!inode->i_op->readlink) + goto err; + + res = -ENOMEM; + buf = (char *) __get_free_page(GFP_KERNEL); + if (!buf) + goto err; + + old_fs = get_fs(); + set_fs(get_ds()); + /* The cast to a user pointer is valid due to the set_fs() */ + res = inode->i_op->readlink(realdentry, + (char __user *)buf, PAGE_SIZE - 1); + set_fs(old_fs); + if (res < 0) { + free_page((unsigned long) buf); + goto err; + } + buf[res] = '\0'; + + return buf; + +err: + return ERR_PTR(res); +} + +static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat) +{ + struct iattr attr = { + .ia_valid = + ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET, + .ia_atime = stat->atime, + .ia_mtime = stat->mtime, + }; + + return notify_change(upperdentry, &attr, NULL); +} + +int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat) +{ + int err = 0; + + if (!S_ISLNK(stat->mode)) { + struct iattr attr = { + .ia_valid = ATTR_MODE, + .ia_mode = stat->mode, + }; + err = notify_change(upperdentry, &attr, NULL); + } + if (!err) { + struct iattr attr = { + .ia_valid = ATTR_UID | ATTR_GID, + .ia_uid = stat->uid, + .ia_gid = stat->gid, + }; + err = notify_change(upperdentry, &attr, NULL); + } + if (!err) + ovl_set_timestamps(upperdentry, stat); + + return err; +} + +static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, + struct dentry *dentry, struct path *lowerpath, + struct kstat *stat, struct iattr *attr, + const char *link) +{ + struct inode *wdir = workdir->d_inode; + struct inode *udir = upperdir->d_inode; + struct dentry *newdentry = NULL; + struct dentry *upper = NULL; + umode_t mode = stat->mode; + int err; + + newdentry = ovl_lookup_temp(workdir, dentry); + err = PTR_ERR(newdentry); + if (IS_ERR(newdentry)) + goto out; + + upper = lookup_one_len(dentry->d_name.name, upperdir, + dentry->d_name.len); + err = PTR_ERR(upper); + if (IS_ERR(upper)) + goto out1; + + /* Can't properly set mode on creation because of the umask */ + stat->mode &= S_IFMT; + err = ovl_create_real(wdir, newdentry, stat, link, NULL, true); + stat->mode = mode; + if (err) + goto out2; + + if (S_ISREG(stat->mode)) { + struct path upperpath; + ovl_path_upper(dentry, &upperpath); + BUG_ON(upperpath.dentry != NULL); + upperpath.dentry = newdentry; + + err = ovl_copy_up_data(lowerpath, &upperpath, stat->size); + if (err) + goto out_cleanup; + } + + err = ovl_copy_xattr(lowerpath->dentry, newdentry); + if (err) + goto out_cleanup; + + mutex_lock(&newdentry->d_inode->i_mutex); + err = ovl_set_attr(newdentry, stat); + if (!err && attr) + err = notify_change(newdentry, attr, NULL); + mutex_unlock(&newdentry->d_inode->i_mutex); + if (err) + goto out_cleanup; + + err = ovl_do_rename(wdir, newdentry, udir, upper, 0); + if (err) + goto out_cleanup; + + ovl_dentry_update(dentry, newdentry); + newdentry = NULL; + + /* + * Non-directores become opaque when copied up. + */ + if (!S_ISDIR(stat->mode)) + ovl_dentry_set_opaque(dentry, true); +out2: + dput(upper); +out1: + dput(newdentry); +out: + return err; + +out_cleanup: + ovl_cleanup(wdir, newdentry); + goto out; +} + +/* + * Copy up a single dentry + * + * Directory renames only allowed on "pure upper" (already created on + * upper filesystem, never copied up). Directories which are on lower or + * are merged may not be renamed. For these -EXDEV is returned and + * userspace has to deal with it. This means, when copying up a + * directory we can rely on it and ancestors being stable. + * + * Non-directory renames start with copy up of source if necessary. The + * actual rename will only proceed once the copy up was successful. Copy + * up uses upper parent i_mutex for exclusion. Since rename can change + * d_parent it is possible that the copy up will lock the old parent. At + * that point the file will have already been copied up anyway. + */ +int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, + struct path *lowerpath, struct kstat *stat, + struct iattr *attr) +{ + struct dentry *workdir = ovl_workdir(dentry); + int err; + struct kstat pstat; + struct path parentpath; + struct dentry *upperdir; + struct dentry *upperdentry; + const struct cred *old_cred; + struct cred *override_cred; + char *link = NULL; + + ovl_path_upper(parent, &parentpath); + upperdir = parentpath.dentry; + + err = vfs_getattr(&parentpath, &pstat); + if (err) + return err; + + if (S_ISLNK(stat->mode)) { + link = ovl_read_symlink(lowerpath->dentry); + if (IS_ERR(link)) + return PTR_ERR(link); + } + + err = -ENOMEM; + override_cred = prepare_creds(); + if (!override_cred) + goto out_free_link; + + override_cred->fsuid = stat->uid; + override_cred->fsgid = stat->gid; + /* + * CAP_SYS_ADMIN for copying up extended attributes + * CAP_DAC_OVERRIDE for create + * CAP_FOWNER for chmod, timestamp update + * CAP_FSETID for chmod + * CAP_CHOWN for chown + * CAP_MKNOD for mknod + */ + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); + cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); + cap_raise(override_cred->cap_effective, CAP_FOWNER); + cap_raise(override_cred->cap_effective, CAP_FSETID); + cap_raise(override_cred->cap_effective, CAP_CHOWN); + cap_raise(override_cred->cap_effective, CAP_MKNOD); + old_cred = override_creds(override_cred); + + err = -EIO; + if (lock_rename(workdir, upperdir) != NULL) { + pr_err("overlayfs: failed to lock workdir+upperdir\n"); + goto out_unlock; + } + upperdentry = ovl_dentry_upper(dentry); + if (upperdentry) { + unlock_rename(workdir, upperdir); + err = 0; + /* Raced with another copy-up? Do the setattr here */ + if (attr) { + mutex_lock(&upperdentry->d_inode->i_mutex); + err = notify_change(upperdentry, attr, NULL); + mutex_unlock(&upperdentry->d_inode->i_mutex); + } + goto out_put_cred; + } + + err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath, + stat, attr, link); + if (!err) { + /* Restore timestamps on parent (best effort) */ + ovl_set_timestamps(upperdir, &pstat); + } +out_unlock: + unlock_rename(workdir, upperdir); +out_put_cred: + revert_creds(old_cred); + put_cred(override_cred); + +out_free_link: + if (link) + free_page((unsigned long) link); + + return err; +} + +int ovl_copy_up(struct dentry *dentry) +{ + int err; + + err = 0; + while (!err) { + struct dentry *next; + struct dentry *parent; + struct path lowerpath; + struct kstat stat; + enum ovl_path_type type = ovl_path_type(dentry); + + if (OVL_TYPE_UPPER(type)) + break; + + next = dget(dentry); + /* find the topmost dentry not yet copied up */ + for (;;) { + parent = dget_parent(next); + + type = ovl_path_type(parent); + if (OVL_TYPE_UPPER(type)) + break; + + dput(next); + next = parent; + } + + ovl_path_lower(next, &lowerpath); + err = vfs_getattr(&lowerpath, &stat); + if (!err) + err = ovl_copy_up_one(parent, next, &lowerpath, &stat, NULL); + + dput(parent); + dput(next); + } + + return err; +} diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c new file mode 100644 index 000000000000..0dc4c33a0a1b --- /dev/null +++ b/fs/overlayfs/dir.c @@ -0,0 +1,928 @@ +/* + * + * Copyright (C) 2011 Novell Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#include <linux/fs.h> +#include <linux/namei.h> +#include <linux/xattr.h> +#include <linux/security.h> +#include <linux/cred.h> +#include "overlayfs.h" + +void ovl_cleanup(struct inode *wdir, struct dentry *wdentry) +{ + int err; + + dget(wdentry); + if (S_ISDIR(wdentry->d_inode->i_mode)) + err = ovl_do_rmdir(wdir, wdentry); + else + err = ovl_do_unlink(wdir, wdentry); + dput(wdentry); + + if (err) { + pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n", + wdentry, err); + } +} + +struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry) +{ + struct dentry *temp; + char name[20]; + + snprintf(name, sizeof(name), "#%lx", (unsigned long) dentry); + + temp = lookup_one_len(name, workdir, strlen(name)); + if (!IS_ERR(temp) && temp->d_inode) { + pr_err("overlayfs: workdir/%s already exists\n", name); + dput(temp); + temp = ERR_PTR(-EIO); + } + + return temp; +} + +/* caller holds i_mutex on workdir */ +static struct dentry *ovl_whiteout(struct dentry *workdir, + struct dentry *dentry) +{ + int err; + struct dentry *whiteout; + struct inode *wdir = workdir->d_inode; + + whiteout = ovl_lookup_temp(workdir, dentry); + if (IS_ERR(whiteout)) + return whiteout; + + err = ovl_do_whiteout(wdir, whiteout); + if (err) { + dput(whiteout); + whiteout = ERR_PTR(err); + } + + return whiteout; +} + +int ovl_create_real(struct inode *dir, struct dentry *newdentry, + struct kstat *stat, const char *link, + struct dentry *hardlink, bool debug) +{ + int err; + + if (newdentry->d_inode) + return -ESTALE; + + if (hardlink) { + err = ovl_do_link(hardlink, dir, newdentry, debug); + } else { + switch (stat->mode & S_IFMT) { + case S_IFREG: + err = ovl_do_create(dir, newdentry, stat->mode, debug); + break; + + case S_IFDIR: + err = ovl_do_mkdir(dir, newdentry, stat->mode, debug); + break; + + case S_IFCHR: + case S_IFBLK: + case S_IFIFO: + case S_IFSOCK: + err = ovl_do_mknod(dir, newdentry, + stat->mode, stat->rdev, debug); + break; + + case S_IFLNK: + err = ovl_do_symlink(dir, newdentry, link, debug); + break; + + default: + err = -EPERM; + } + } + if (!err && WARN_ON(!newdentry->d_inode)) { + /* + * Not quite sure if non-instantiated dentry is legal or not. + * VFS doesn't seem to care so check and warn here. + */ + err = -ENOENT; + } + return err; +} + +static int ovl_set_opaque(struct dentry *upperdentry) +{ + return ovl_do_setxattr(upperdentry, OVL_XATTR_OPAQUE, "y", 1, 0); +} + +static void ovl_remove_opaque(struct dentry *upperdentry) +{ + int err; + + err = ovl_do_removexattr(upperdentry, OVL_XATTR_OPAQUE); + if (err) { + pr_warn("overlayfs: failed to remove opaque from '%s' (%i)\n", + upperdentry->d_name.name, err); + } +} + +static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +{ + int err; + enum ovl_path_type type; + struct path realpath; + + type = ovl_path_real(dentry, &realpath); + err = vfs_getattr(&realpath, stat); + if (err) + return err; + + stat->dev = dentry->d_sb->s_dev; + stat->ino = dentry->d_inode->i_ino; + + /* + * It's probably not worth it to count subdirs to get the + * correct link count. nlink=1 seems to pacify 'find' and + * other utilities. + */ + if (OVL_TYPE_MERGE(type)) + stat->nlink = 1; + + return 0; +} + +static int ovl_create_upper(struct dentry *dentry, struct inode *inode, + struct kstat *stat, const char *link, + struct dentry *hardlink) +{ + struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); + struct inode *udir = upperdir->d_inode; + struct dentry *newdentry; + int err; + + mutex_lock_nested(&udir->i_mutex, I_MUTEX_PARENT); + newdentry = lookup_one_len(dentry->d_name.name, upperdir, + dentry->d_name.len); + err = PTR_ERR(newdentry); + if (IS_ERR(newdentry)) + goto out_unlock; + err = ovl_create_real(udir, newdentry, stat, link, hardlink, false); + if (err) + goto out_dput; + + ovl_dentry_version_inc(dentry->d_parent); + ovl_dentry_update(dentry, newdentry); + ovl_copyattr(newdentry->d_inode, inode); + d_instantiate(dentry, inode); + newdentry = NULL; +out_dput: + dput(newdentry); +out_unlock: + mutex_unlock(&udir->i_mutex); + return err; +} + +static int ovl_lock_rename_workdir(struct dentry *workdir, + struct dentry *upperdir) +{ + /* Workdir should not be the same as upperdir */ + if (workdir == upperdir) + goto err; + + /* Workdir should not be subdir of upperdir and vice versa */ + if (lock_rename(workdir, upperdir) != NULL) + goto err_unlock; + + return 0; + +err_unlock: + unlock_rename(workdir, upperdir); +err: + pr_err("overlayfs: failed to lock workdir+upperdir\n"); + return -EIO; +} + +static struct dentry *ovl_clear_empty(struct dentry *dentry, + struct list_head *list) +{ + struct dentry *workdir = ovl_workdir(dentry); + struct inode *wdir = workdir->d_inode; + struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); + struct inode *udir = upperdir->d_inode; + struct path upperpath; + struct dentry *upper; + struct dentry *opaquedir; + struct kstat stat; + int err; + + err = ovl_lock_rename_workdir(workdir, upperdir); + if (err) + goto out; + + ovl_path_upper(dentry, &upperpath); + err = vfs_getattr(&upperpath, &stat); + if (err) + goto out_unlock; + + err = -ESTALE; + if (!S_ISDIR(stat.mode)) + goto out_unlock; + upper = upperpath.dentry; + if (upper->d_parent->d_inode != udir) + goto out_unlock; + + opaquedir = ovl_lookup_temp(workdir, dentry); + err = PTR_ERR(opaquedir); + if (IS_ERR(opaquedir)) + goto out_unlock; + + err = ovl_create_real(wdir, opaquedir, &stat, NULL, NULL, true); + if (err) + goto out_dput; + + err = ovl_copy_xattr(upper, opaquedir); + if (err) + goto out_cleanup; + + err = ovl_set_opaque(opaquedir); + if (err) + goto out_cleanup; + + mutex_lock(&opaquedir->d_inode->i_mutex); + err = ovl_set_attr(opaquedir, &stat); + mutex_unlock(&opaquedir->d_inode->i_mutex); + if (err) + goto out_cleanup; + + err = ovl_do_rename(wdir, opaquedir, udir, upper, RENAME_EXCHANGE); + if (err) + goto out_cleanup; + + ovl_cleanup_whiteouts(upper, list); + ovl_cleanup(wdir, upper); + unlock_rename(workdir, upperdir); + + /* dentry's upper doesn't match now, get rid of it */ + d_drop(dentry); + + return opaquedir; + +out_cleanup: + ovl_cleanup(wdir, opaquedir); +out_dput: + dput(opaquedir); +out_unlock: + unlock_rename(workdir, upperdir); +out: + return ERR_PTR(err); +} + +static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry) +{ + int err; + struct dentry *ret = NULL; + LIST_HEAD(list); + + err = ovl_check_empty_dir(dentry, &list); + if (err) + ret = ERR_PTR(err); + else { + /* + * If no upperdentry then skip clearing whiteouts. + * + * Can race with copy-up, since we don't hold the upperdir + * mutex. Doesn't matter, since copy-up can't create a + * non-empty directory from an empty one. + */ + if (ovl_dentry_upper(dentry)) + ret = ovl_clear_empty(dentry, &list); + } + + ovl_cache_free(&list); + + return ret; +} + +static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, + struct kstat *stat, const char *link, + struct dentry *hardlink) +{ + struct dentry *workdir = ovl_workdir(dentry); + struct inode *wdir = workdir->d_inode; + struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); + struct inode *udir = upperdir->d_inode; + struct dentry *upper; + struct dentry *newdentry; + int err; + + err = ovl_lock_rename_workdir(workdir, upperdir); + if (err) + goto out; + + newdentry = ovl_lookup_temp(workdir, dentry); + err = PTR_ERR(newdentry); + if (IS_ERR(newdentry)) + goto out_unlock; + + upper = lookup_one_len(dentry->d_name.name, upperdir, + dentry->d_name.len); + err = PTR_ERR(upper); + if (IS_ERR(upper)) + goto out_dput; + + err = ovl_create_real(wdir, newdentry, stat, link, hardlink, true); + if (err) + goto out_dput2; + + if (S_ISDIR(stat->mode)) { + err = ovl_set_opaque(newdentry); + if (err) + goto out_cleanup; + + err = ovl_do_rename(wdir, newdentry, udir, upper, + RENAME_EXCHANGE); + if (err) + goto out_cleanup; + + ovl_cleanup(wdir, upper); + } else { + err = ovl_do_rename(wdir, newdentry, udir, upper, 0); + if (err) + goto out_cleanup; + } + ovl_dentry_version_inc(dentry->d_parent); + ovl_dentry_update(dentry, newdentry); + ovl_copyattr(newdentry->d_inode, inode); + d_instantiate(dentry, inode); + newdentry = NULL; +out_dput2: + dput(upper); +out_dput: + dput(newdentry); +out_unlock: + unlock_rename(workdir, upperdir); +out: + return err; + +out_cleanup: + ovl_cleanup(wdir, newdentry); + goto out_dput2; +} + +static int ovl_create_or_link(struct dentry *dentry, int mode, dev_t rdev, + const char *link, struct dentry *hardlink) +{ + int err; + struct inode *inode; + struct kstat stat = { + .mode = mode, + .rdev = rdev, + }; + + err = -ENOMEM; + inode = ovl_new_inode(dentry->d_sb, mode, dentry->d_fsdata); + if (!inode) + goto out; + + err = ovl_copy_up(dentry->d_parent); + if (err) + goto out_iput; + + if (!ovl_dentry_is_opaque(dentry)) { + err = ovl_create_upper(dentry, inode, &stat, link, hardlink); + } else { + const struct cred *old_cred; + struct cred *override_cred; + + err = -ENOMEM; + override_cred = prepare_creds(); + if (!override_cred) + goto out_iput; + + /* + * CAP_SYS_ADMIN for setting opaque xattr + * CAP_DAC_OVERRIDE for create in workdir, rename + * CAP_FOWNER for removing whiteout from sticky dir + */ + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); + cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); + cap_raise(override_cred->cap_effective, CAP_FOWNER); + old_cred = override_creds(override_cred); + + err = ovl_create_over_whiteout(dentry, inode, &stat, link, + hardlink); + + revert_creds(old_cred); + put_cred(override_cred); + } + + if (!err) + inode = NULL; +out_iput: + iput(inode); +out: + return err; +} + +static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev, + const char *link) +{ + int err; + + err = ovl_want_write(dentry); + if (!err) { + err = ovl_create_or_link(dentry, mode, rdev, link, NULL); + ovl_drop_write(dentry); + } + + return err; +} + +static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode, + bool excl) +{ + return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL); +} + +static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL); +} + +static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, + dev_t rdev) +{ + /* Don't allow creation of "whiteout" on overlay */ + if (S_ISCHR(mode) && rdev == WHITEOUT_DEV) + return -EPERM; + + return ovl_create_object(dentry, mode, rdev, NULL); +} + +static int ovl_symlink(struct inode *dir, struct dentry *dentry, + const char *link) +{ + return ovl_create_object(dentry, S_IFLNK, 0, link); +} + +static int ovl_link(struct dentry *old, struct inode *newdir, + struct dentry *new) +{ + int err; + struct dentry *upper; + + err = ovl_want_write(old); + if (err) + goto out; + + err = ovl_copy_up(old); + if (err) + goto out_drop_write; + + upper = ovl_dentry_upper(old); + err = ovl_create_or_link(new, upper->d_inode->i_mode, 0, NULL, upper); + +out_drop_write: + ovl_drop_write(old); +out: + return err; +} + +static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir) +{ + struct dentry *workdir = ovl_workdir(dentry); + struct inode *wdir = workdir->d_inode; + struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); + struct inode *udir = upperdir->d_inode; + struct dentry *whiteout; + struct dentry *upper; + struct dentry *opaquedir = NULL; + int err; + + if (is_dir && OVL_TYPE_MERGE_OR_LOWER(ovl_path_type(dentry))) { + opaquedir = ovl_check_empty_and_clear(dentry); + err = PTR_ERR(opaquedir); + if (IS_ERR(opaquedir)) + goto out; + } + + err = ovl_lock_rename_workdir(workdir, upperdir); + if (err) + goto out_dput; + + whiteout = ovl_whiteout(workdir, dentry); + err = PTR_ERR(whiteout); + if (IS_ERR(whiteout)) + goto out_unlock; + + upper = ovl_dentry_upper(dentry); + if (!upper) { + upper = lookup_one_len(dentry->d_name.name, upperdir, + dentry->d_name.len); + err = PTR_ERR(upper); + if (IS_ERR(upper)) + goto kill_whiteout; + + err = ovl_do_rename(wdir, whiteout, udir, upper, 0); + dput(upper); + if (err) + goto kill_whiteout; + } else { + int flags = 0; + + if (opaquedir) + upper = opaquedir; + err = -ESTALE; + if (upper->d_parent != upperdir) + goto kill_whiteout; + + if (is_dir) + flags |= RENAME_EXCHANGE; + + err = ovl_do_rename(wdir, whiteout, udir, upper, flags); + if (err) + goto kill_whiteout; + + if (is_dir) + ovl_cleanup(wdir, upper); + } + ovl_dentry_version_inc(dentry->d_parent); +out_d_drop: + d_drop(dentry); + dput(whiteout); +out_unlock: + unlock_rename(workdir, upperdir); +out_dput: + dput(opaquedir); +out: + return err; + +kill_whiteout: + ovl_cleanup(wdir, whiteout); + goto out_d_drop; +} + +static int ovl_remove_upper(struct dentry *dentry, bool is_dir) +{ + struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent); + struct inode *dir = upperdir->d_inode; + struct dentry *upper = ovl_dentry_upper(dentry); + int err; + + mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); + err = -ESTALE; + if (upper->d_parent == upperdir) { + /* Don't let d_delete() think it can reset d_inode */ + dget(upper); + if (is_dir) + err = vfs_rmdir(dir, upper); + else + err = vfs_unlink(dir, upper, NULL); + dput(upper); + ovl_dentry_version_inc(dentry->d_parent); + } + + /* + * Keeping this dentry hashed would mean having to release + * upperpath/lowerpath, which could only be done if we are the + * sole user of this dentry. Too tricky... Just unhash for + * now. + */ + d_drop(dentry); + mutex_unlock(&dir->i_mutex); + + return err; +} + +static inline int ovl_check_sticky(struct dentry *dentry) +{ + struct inode *dir = ovl_dentry_real(dentry->d_parent)->d_inode; + struct inode *inode = ovl_dentry_real(dentry)->d_inode; + + if (check_sticky(dir, inode)) + return -EPERM; + + return 0; +} + +static int ovl_do_remove(struct dentry *dentry, bool is_dir) +{ + enum ovl_path_type type; + int err; + + err = ovl_check_sticky(dentry); + if (err) + goto out; + + err = ovl_want_write(dentry); + if (err) + goto out; + + err = ovl_copy_up(dentry->d_parent); + if (err) + goto out_drop_write; + + type = ovl_path_type(dentry); + if (OVL_TYPE_PURE_UPPER(type)) { + err = ovl_remove_upper(dentry, is_dir); + } else { + const struct cred *old_cred; + struct cred *override_cred; + + err = -ENOMEM; + override_cred = prepare_creds(); + if (!override_cred) + goto out_drop_write; + + /* + * CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir + * CAP_DAC_OVERRIDE for create in workdir, rename + * CAP_FOWNER for removing whiteout from sticky dir + * CAP_FSETID for chmod of opaque dir + * CAP_CHOWN for chown of opaque dir + */ + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); + cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); + cap_raise(override_cred->cap_effective, CAP_FOWNER); + cap_raise(override_cred->cap_effective, CAP_FSETID); + cap_raise(override_cred->cap_effective, CAP_CHOWN); + old_cred = override_creds(override_cred); + + err = ovl_remove_and_whiteout(dentry, is_dir); + + revert_creds(old_cred); + put_cred(override_cred); + } +out_drop_write: + ovl_drop_write(dentry); +out: + return err; +} + +static int ovl_unlink(struct inode *dir, struct dentry *dentry) +{ + return ovl_do_remove(dentry, false); +} + +static int ovl_rmdir(struct inode *dir, struct dentry *dentry) +{ + return ovl_do_remove(dentry, true); +} + +static int ovl_rename2(struct inode *olddir, struct dentry *old, + struct inode *newdir, struct dentry *new, + unsigned int flags) +{ + int err; + enum ovl_path_type old_type; + enum ovl_path_type new_type; + struct dentry *old_upperdir; + struct dentry *new_upperdir; + struct dentry *olddentry; + struct dentry *newdentry; + struct dentry *trap; + bool old_opaque; + bool new_opaque; + bool new_create = false; + bool cleanup_whiteout = false; + bool overwrite = !(flags & RENAME_EXCHANGE); + bool is_dir = S_ISDIR(old->d_inode->i_mode); + bool new_is_dir = false; + struct dentry *opaquedir = NULL; + const struct cred *old_cred = NULL; + struct cred *override_cred = NULL; + + err = -EINVAL; + if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE)) + goto out; + + flags &= ~RENAME_NOREPLACE; + + err = ovl_check_sticky(old); + if (err) + goto out; + + /* Don't copy up directory trees */ + old_type = ovl_path_type(old); + err = -EXDEV; + if (OVL_TYPE_MERGE_OR_LOWER(old_type) && is_dir) + goto out; + + if (new->d_inode) { + err = ovl_check_sticky(new); + if (err) + goto out; + + if (S_ISDIR(new->d_inode->i_mode)) + new_is_dir = true; + + new_type = ovl_path_type(new); + err = -EXDEV; + if (!overwrite && OVL_TYPE_MERGE_OR_LOWER(new_type) && new_is_dir) + goto out; + + err = 0; + if (!OVL_TYPE_UPPER(new_type) && !OVL_TYPE_UPPER(old_type)) { + if (ovl_dentry_lower(old)->d_inode == + ovl_dentry_lower(new)->d_inode) + goto out; + } + if (OVL_TYPE_UPPER(new_type) && OVL_TYPE_UPPER(old_type)) { + if (ovl_dentry_upper(old)->d_inode == + ovl_dentry_upper(new)->d_inode) + goto out; + } + } else { + if (ovl_dentry_is_opaque(new)) + new_type = __OVL_PATH_UPPER; + else + new_type = __OVL_PATH_UPPER | __OVL_PATH_PURE; + } + + err = ovl_want_write(old); + if (err) + goto out; + + err = ovl_copy_up(old); + if (err) + goto out_drop_write; + + err = ovl_copy_up(new->d_parent); + if (err) + goto out_drop_write; + if (!overwrite) { + err = ovl_copy_up(new); + if (err) + goto out_drop_write; + } + + old_opaque = !OVL_TYPE_PURE_UPPER(old_type); + new_opaque = !OVL_TYPE_PURE_UPPER(new_type); + + if (old_opaque || new_opaque) { + err = -ENOMEM; + override_cred = prepare_creds(); + if (!override_cred) + goto out_drop_write; + + /* + * CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir + * CAP_DAC_OVERRIDE for create in workdir + * CAP_FOWNER for removing whiteout from sticky dir + * CAP_FSETID for chmod of opaque dir + * CAP_CHOWN for chown of opaque dir + */ + cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN); + cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); + cap_raise(override_cred->cap_effective, CAP_FOWNER); + cap_raise(override_cred->cap_effective, CAP_FSETID); + cap_raise(override_cred->cap_effective, CAP_CHOWN); + old_cred = override_creds(override_cred); + } + + if (overwrite && OVL_TYPE_MERGE_OR_LOWER(new_type) && new_is_dir) { + opaquedir = ovl_check_empty_and_clear(new); + err = PTR_ERR(opaquedir); + if (IS_ERR(opaquedir)) { + opaquedir = NULL; + goto out_revert_creds; + } + } + + if (overwrite) { + if (old_opaque) { + if (new->d_inode || !new_opaque) { + /* Whiteout source */ + flags |= RENAME_WHITEOUT; + } else { + /* Switch whiteouts */ + flags |= RENAME_EXCHANGE; + } + } else if (is_dir && !new->d_inode && new_opaque) { + flags |= RENAME_EXCHANGE; + cleanup_whiteout = true; + } + } + + old_upperdir = ovl_dentry_upper(old->d_parent); + new_upperdir = ovl_dentry_upper(new->d_parent); + + trap = lock_rename(new_upperdir, old_upperdir); + + olddentry = ovl_dentry_upper(old); + newdentry = ovl_dentry_upper(new); + if (newdentry) { + if (opaquedir) { + newdentry = opaquedir; + opaquedir = NULL; + } else { + dget(newdentry); + } + } else { + new_create = true; + newdentry = lookup_one_len(new->d_name.name, new_upperdir, + new->d_name.len); + err = PTR_ERR(newdentry); + if (IS_ERR(newdentry)) + goto out_unlock; + } + + err = -ESTALE; + if (olddentry->d_parent != old_upperdir) + goto out_dput; + if (newdentry->d_parent != new_upperdir) + goto out_dput; + if (olddentry == trap) + goto out_dput; + if (newdentry == trap) + goto out_dput; + + if (is_dir && !old_opaque && new_opaque) { + err = ovl_set_opaque(olddentry); + if (err) + goto out_dput; + } + if (!overwrite && new_is_dir && old_opaque && !new_opaque) { + err = ovl_set_opaque(newdentry); + if (err) + goto out_dput; + } + + if (old_opaque || new_opaque) { + err = ovl_do_rename(old_upperdir->d_inode, olddentry, + new_upperdir->d_inode, newdentry, + flags); + } else { + /* No debug for the plain case */ + BUG_ON(flags & ~RENAME_EXCHANGE); + err = vfs_rename(old_upperdir->d_inode, olddentry, + new_upperdir->d_inode, newdentry, + NULL, flags); + } + + if (err) { + if (is_dir && !old_opaque && new_opaque) + ovl_remove_opaque(olddentry); + if (!overwrite && new_is_dir && old_opaque && !new_opaque) + ovl_remove_opaque(newdentry); + goto out_dput; + } + + if (is_dir && old_opaque && !new_opaque) + ovl_remove_opaque(olddentry); + if (!overwrite && new_is_dir && !old_opaque && new_opaque) + ovl_remove_opaque(newdentry); + + if (old_opaque != new_opaque) { + ovl_dentry_set_opaque(old, new_opaque); + if (!overwrite) + ovl_dentry_set_opaque(new, old_opaque); + } + + if (cleanup_whiteout) + ovl_cleanup(old_upperdir->d_inode, newdentry); + + ovl_dentry_version_inc(old->d_parent); + ovl_dentry_version_inc(new->d_parent); + +out_dput: + dput(newdentry); +out_unlock: + unlock_rename(new_upperdir, old_upperdir); +out_revert_creds: + if (old_opaque || new_opaque) { + revert_creds(old_cred); + put_cred(override_cred); + } +out_drop_write: + ovl_drop_write(old); +out: + dput(opaquedir); + return err; +} + +const struct inode_operations ovl_dir_inode_operations = { + .lookup = ovl_lookup, + .mkdir = ovl_mkdir, + .symlink = ovl_symlink, + .unlink = ovl_unlink, + .rmdir = ovl_rmdir, + .rename2 = ovl_rename2, + .link = ovl_link, + .setattr = ovl_setattr, + .create = ovl_create, + .mknod = ovl_mknod, + .permission = ovl_permission, + .getattr = ovl_dir_getattr, + .setxattr = ovl_setxattr, + .getxattr = ovl_getxattr, + .listxattr = ovl_listxattr, + .removexattr = ovl_removexattr, +}; diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c new file mode 100644 index 000000000000..04f124884687 --- /dev/null +++ b/fs/overlayfs/inode.c @@ -0,0 +1,436 @@ +/* + * + * Copyright (C) 2011 Novell Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/xattr.h> +#include "overlayfs.h" + +static int ovl_copy_up_last(struct dentry *dentry, struct iattr *attr, + bool no_data) +{ + int err; + struct dentry *parent; + struct kstat stat; + struct path lowerpath; + + parent = dget_parent(dentry); + err = ovl_copy_up(parent); + if (err) + goto out_dput_parent; + + ovl_path_lower(dentry, &lowerpath); + err = vfs_getattr(&lowerpath, &stat); + if (err) + goto out_dput_parent; + + if (no_data) + stat.size = 0; + + err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat, attr); + +out_dput_parent: + dput(parent); + return err; +} + +int ovl_setattr(struct dentry *dentry, struct iattr *attr) +{ + int err; + struct dentry *upperdentry; + + err = ovl_want_write(dentry); + if (err) + goto out; + + upperdentry = ovl_dentry_upper(dentry); + if (upperdentry) { + mutex_lock(&upperdentry->d_inode->i_mutex); + err = notify_change(upperdentry, attr, NULL); + mutex_unlock(&upperdentry->d_inode->i_mutex); + } else { + err = ovl_copy_up_last(dentry, attr, false); + } + ovl_drop_write(dentry); +out: + return err; +} + +static int ovl_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat) +{ + struct path realpath; + + ovl_path_real(dentry, &realpath); + return vfs_getattr(&realpath, stat); +} + +int ovl_permission(struct inode *inode, int mask) +{ + struct ovl_entry *oe; + struct dentry *alias = NULL; + struct inode *realinode; + struct dentry *realdentry; + bool is_upper; + int err; + + if (S_ISDIR(inode->i_mode)) { + oe = inode->i_private; + } else if (mask & MAY_NOT_BLOCK) { + return -ECHILD; + } else { + /* + * For non-directories find an alias and get the info + * from there. + */ + alias = d_find_any_alias(inode); + if (WARN_ON(!alias)) + return -ENOENT; + + oe = alias->d_fsdata; + } + + realdentry = ovl_entry_real(oe, &is_upper); + + /* Careful in RCU walk mode */ + realinode = ACCESS_ONCE(realdentry->d_inode); + if (!realinode) { + WARN_ON(!(mask & MAY_NOT_BLOCK)); + err = -ENOENT; + goto out_dput; + } + + if (mask & MAY_WRITE) { + umode_t mode = realinode->i_mode; + + /* + * Writes will always be redirected to upper layer, so + * ignore lower layer being read-only. + * + * If the overlay itself is read-only then proceed + * with the permission check, don't return EROFS. + * This will only happen if this is the lower layer of + * another overlayfs. + * + * If upper fs becomes read-only after the overlay was + * constructed return EROFS to prevent modification of + * upper layer. + */ + err = -EROFS; + if (is_upper && !IS_RDONLY(inode) && IS_RDONLY(realinode) && + (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) + goto out_dput; + } + + err = __inode_permission(realinode, mask); +out_dput: + dput(alias); + return err; +} + + +struct ovl_link_data { + struct dentry *realdentry; + void *cookie; +}; + +static void *ovl_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + void *ret; + struct dentry *realdentry; + struct inode *realinode; + + realdentry = ovl_dentry_real(dentry); + realinode = realdentry->d_inode; + + if (WARN_ON(!realinode->i_op->follow_link)) + return ERR_PTR(-EPERM); + + ret = realinode->i_op->follow_link(realdentry, nd); + if (IS_ERR(ret)) + return ret; + + if (realinode->i_op->put_link) { + struct ovl_link_data *data; + + data = kmalloc(sizeof(struct ovl_link_data), GFP_KERNEL); + if (!data) { + realinode->i_op->put_link(realdentry, nd, ret); + return ERR_PTR(-ENOMEM); + } + data->realdentry = realdentry; + data->cookie = ret; + + return data; + } else { + return NULL; + } +} + +static void ovl_put_link(struct dentry *dentry, struct nameidata *nd, void *c) +{ + struct inode *realinode; + struct ovl_link_data *data = c; + + if (!data) + return; + + realinode = data->realdentry->d_inode; + realinode->i_op->put_link(data->realdentry, nd, data->cookie); + kfree(data); +} + +static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz) +{ + struct path realpath; + struct inode *realinode; + + ovl_path_real(dentry, &realpath); + realinode = realpath.dentry->d_inode; + + if (!realinode->i_op->readlink) + return -EINVAL; + + touch_atime(&realpath); + + return realinode->i_op->readlink(realpath.dentry, buf, bufsiz); +} + + +static bool ovl_is_private_xattr(const char *name) +{ + return strncmp(name, OVL_XATTR_PRE_NAME, OVL_XATTR_PRE_LEN) == 0; +} + +int ovl_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + int err; + struct dentry *upperdentry; + + err = ovl_want_write(dentry); + if (err) + goto out; + + err = -EPERM; + if (ovl_is_private_xattr(name)) + goto out_drop_write; + + err = ovl_copy_up(dentry); + if (err) + goto out_drop_write; + + upperdentry = ovl_dentry_upper(dentry); + err = vfs_setxattr(upperdentry, name, value, size, flags); + +out_drop_write: + ovl_drop_write(dentry); +out: + return err; +} + +static bool ovl_need_xattr_filter(struct dentry *dentry, + enum ovl_path_type type) +{ + if ((type & (__OVL_PATH_PURE | __OVL_PATH_UPPER)) == __OVL_PATH_UPPER) + return S_ISDIR(dentry->d_inode->i_mode); + else + return false; +} + +ssize_t ovl_getxattr(struct dentry *dentry, const char *name, + void *value, size_t size) +{ + struct path realpath; + enum ovl_path_type type = ovl_path_real(dentry, &realpath); + + if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name)) + return -ENODATA; + + return vfs_getxattr(realpath.dentry, name, value, size); +} + +ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) +{ + struct path realpath; + enum ovl_path_type type = ovl_path_real(dentry, &realpath); + ssize_t res; + int off; + + res = vfs_listxattr(realpath.dentry, list, size); + if (res <= 0 || size == 0) + return res; + + if (!ovl_need_xattr_filter(dentry, type)) + return res; + + /* filter out private xattrs */ + for (off = 0; off < res;) { + char *s = list + off; + size_t slen = strlen(s) + 1; + + BUG_ON(off + slen > res); + + if (ovl_is_private_xattr(s)) { + res -= slen; + memmove(s, s + slen, res - off); + } else { + off += slen; + } + } + + return res; +} + +int ovl_removexattr(struct dentry *dentry, const char *name) +{ + int err; + struct path realpath; + enum ovl_path_type type = ovl_path_real(dentry, &realpath); + + err = ovl_want_write(dentry); + if (err) + goto out; + + err = -ENODATA; + if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name)) + goto out_drop_write; + + if (!OVL_TYPE_UPPER(type)) { + err = vfs_getxattr(realpath.dentry, name, NULL, 0); + if (err < 0) + goto out_drop_write; + + err = ovl_copy_up(dentry); + if (err) + goto out_drop_write; + + ovl_path_upper(dentry, &realpath); + } + + err = vfs_removexattr(realpath.dentry, name); +out_drop_write: + ovl_drop_write(dentry); +out: + return err; +} + +static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type, + struct dentry *realdentry) +{ + if (OVL_TYPE_UPPER(type)) + return false; + + if (special_file(realdentry->d_inode->i_mode)) + return false; + + if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC)) + return false; + + return true; +} + +static int ovl_dentry_open(struct dentry *dentry, struct file *file, + const struct cred *cred) +{ + int err; + struct path realpath; + enum ovl_path_type type; + bool want_write = false; + + type = ovl_path_real(dentry, &realpath); + if (ovl_open_need_copy_up(file->f_flags, type, realpath.dentry)) { + want_write = true; + err = ovl_want_write(dentry); + if (err) + goto out; + + if (file->f_flags & O_TRUNC) + err = ovl_copy_up_last(dentry, NULL, true); + else + err = ovl_copy_up(dentry); + if (err) + goto out_drop_write; + + ovl_path_upper(dentry, &realpath); + } + + err = vfs_open(&realpath, file, cred); +out_drop_write: + if (want_write) + ovl_drop_write(dentry); +out: + return err; +} + +static const struct inode_operations ovl_file_inode_operations = { + .setattr = ovl_setattr, + .permission = ovl_permission, + .getattr = ovl_getattr, + .setxattr = ovl_setxattr, + .getxattr = ovl_getxattr, + .listxattr = ovl_listxattr, + .removexattr = ovl_removexattr, + .dentry_open = ovl_dentry_open, +}; + +static const struct inode_operations ovl_symlink_inode_operations = { + .setattr = ovl_setattr, + .follow_link = ovl_follow_link, + .put_link = ovl_put_link, + .readlink = ovl_readlink, + .getattr = ovl_getattr, + .setxattr = ovl_setxattr, + .getxattr = ovl_getxattr, + .listxattr = ovl_listxattr, + .removexattr = ovl_removexattr, +}; + +struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, + struct ovl_entry *oe) +{ + struct inode *inode; + + inode = new_inode(sb); + if (!inode) + return NULL; + + mode &= S_IFMT; + + inode->i_ino = get_next_ino(); + inode->i_mode = mode; + inode->i_flags |= S_NOATIME | S_NOCMTIME; + + switch (mode) { + case S_IFDIR: + inode->i_private = oe; + inode->i_op = &ovl_dir_inode_operations; + inode->i_fop = &ovl_dir_operations; + break; + + case S_IFLNK: + inode->i_op = &ovl_symlink_inode_operations; + break; + + case S_IFREG: + case S_IFSOCK: + case S_IFBLK: + case S_IFCHR: + case S_IFIFO: + inode->i_op = &ovl_file_inode_operations; + break; + + default: + WARN(1, "illegal file type: %i\n", mode); + iput(inode); + inode = NULL; + } + + return inode; +} diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h new file mode 100644 index 000000000000..17ac5afc9ffb --- /dev/null +++ b/fs/overlayfs/overlayfs.h @@ -0,0 +1,199 @@ +/* + * + * Copyright (C) 2011 Novell Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#include <linux/kernel.h> + +struct ovl_entry; + +enum ovl_path_type { + __OVL_PATH_PURE = (1 << 0), + __OVL_PATH_UPPER = (1 << 1), + __OVL_PATH_MERGE = (1 << 2), +}; + +#define OVL_TYPE_UPPER(type) ((type) & __OVL_PATH_UPPER) +#define OVL_TYPE_MERGE(type) ((type) & __OVL_PATH_MERGE) +#define OVL_TYPE_PURE_UPPER(type) ((type) & __OVL_PATH_PURE) +#define OVL_TYPE_MERGE_OR_LOWER(type) \ + (OVL_TYPE_MERGE(type) || !OVL_TYPE_UPPER(type)) + +#define OVL_XATTR_PRE_NAME "trusted.overlay." +#define OVL_XATTR_PRE_LEN 16 +#define OVL_XATTR_OPAQUE OVL_XATTR_PRE_NAME"opaque" + +static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry) +{ + int err = vfs_rmdir(dir, dentry); + pr_debug("rmdir(%pd2) = %i\n", dentry, err); + return err; +} + +static inline int ovl_do_unlink(struct inode *dir, struct dentry *dentry) +{ + int err = vfs_unlink(dir, dentry, NULL); + pr_debug("unlink(%pd2) = %i\n", dentry, err); + return err; +} + +static inline int ovl_do_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *new_dentry, bool debug) +{ + int err = vfs_link(old_dentry, dir, new_dentry, NULL); + if (debug) { + pr_debug("link(%pd2, %pd2) = %i\n", + old_dentry, new_dentry, err); + } + return err; +} + +static inline int ovl_do_create(struct inode *dir, struct dentry *dentry, + umode_t mode, bool debug) +{ + int err = vfs_create(dir, dentry, mode, true); + if (debug) + pr_debug("create(%pd2, 0%o) = %i\n", dentry, mode, err); + return err; +} + +static inline int ovl_do_mkdir(struct inode *dir, struct dentry *dentry, + umode_t mode, bool debug) +{ + int err = vfs_mkdir(dir, dentry, mode); + if (debug) + pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err); + return err; +} + +static inline int ovl_do_mknod(struct inode *dir, struct dentry *dentry, + umode_t mode, dev_t dev, bool debug) +{ + int err = vfs_mknod(dir, dentry, mode, dev); + if (debug) { + pr_debug("mknod(%pd2, 0%o, 0%o) = %i\n", + dentry, mode, dev, err); + } + return err; +} + +static inline int ovl_do_symlink(struct inode *dir, struct dentry *dentry, + const char *oldname, bool debug) +{ + int err = vfs_symlink(dir, dentry, oldname); + if (debug) + pr_debug("symlink(\"%s\", %pd2) = %i\n", oldname, dentry, err); + return err; +} + +static inline int ovl_do_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + int err = vfs_setxattr(dentry, name, value, size, flags); + pr_debug("setxattr(%pd2, \"%s\", \"%*s\", 0x%x) = %i\n", + dentry, name, (int) size, (char *) value, flags, err); + return err; +} + +static inline int ovl_do_removexattr(struct dentry *dentry, const char *name) +{ + int err = vfs_removexattr(dentry, name); + pr_debug("removexattr(%pd2, \"%s\") = %i\n", dentry, name, err); + return err; +} + +static inline int ovl_do_rename(struct inode *olddir, struct dentry *olddentry, + struct inode *newdir, struct dentry *newdentry, + unsigned int flags) +{ + int err; + + pr_debug("rename2(%pd2, %pd2, 0x%x)\n", + olddentry, newdentry, flags); + + err = vfs_rename(olddir, olddentry, newdir, newdentry, NULL, flags); + + if (err) { + pr_debug("...rename2(%pd2, %pd2, ...) = %i\n", + olddentry, newdentry, err); + } + return err; +} + +static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry) +{ + int err = vfs_whiteout(dir, dentry); + pr_debug("whiteout(%pd2) = %i\n", dentry, err); + return err; +} + +enum ovl_path_type ovl_path_type(struct dentry *dentry); +u64 ovl_dentry_version_get(struct dentry *dentry); +void ovl_dentry_version_inc(struct dentry *dentry); +void ovl_path_upper(struct dentry *dentry, struct path *path); +void ovl_path_lower(struct dentry *dentry, struct path *path); +enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path); +int ovl_path_next(int idx, struct dentry *dentry, struct path *path); +struct dentry *ovl_dentry_upper(struct dentry *dentry); +struct dentry *ovl_dentry_lower(struct dentry *dentry); +struct dentry *ovl_dentry_real(struct dentry *dentry); +struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper); +struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry); +void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache); +struct dentry *ovl_workdir(struct dentry *dentry); +int ovl_want_write(struct dentry *dentry); +void ovl_drop_write(struct dentry *dentry); +bool ovl_dentry_is_opaque(struct dentry *dentry); +void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque); +bool ovl_is_whiteout(struct dentry *dentry); +void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry); +struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags); +struct file *ovl_path_open(struct path *path, int flags); + +struct dentry *ovl_upper_create(struct dentry *upperdir, struct dentry *dentry, + struct kstat *stat, const char *link); + +/* readdir.c */ +extern const struct file_operations ovl_dir_operations; +int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list); +void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list); +void ovl_cache_free(struct list_head *list); + +/* inode.c */ +int ovl_setattr(struct dentry *dentry, struct iattr *attr); +int ovl_permission(struct inode *inode, int mask); +int ovl_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags); +ssize_t ovl_getxattr(struct dentry *dentry, const char *name, + void *value, size_t size); +ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); +int ovl_removexattr(struct dentry *dentry, const char *name); + +struct inode *ovl_new_inode(struct super_block *sb, umode_t mode, + struct ovl_entry *oe); +static inline void ovl_copyattr(struct inode *from, struct inode *to) +{ + to->i_uid = from->i_uid; + to->i_gid = from->i_gid; +} + +/* dir.c */ +extern const struct inode_operations ovl_dir_inode_operations; +struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry); +int ovl_create_real(struct inode *dir, struct dentry *newdentry, + struct kstat *stat, const char *link, + struct dentry *hardlink, bool debug); +void ovl_cleanup(struct inode *dir, struct dentry *dentry); + +/* copy_up.c */ +int ovl_copy_up(struct dentry *dentry); +int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, + struct path *lowerpath, struct kstat *stat, + struct iattr *attr); +int ovl_copy_xattr(struct dentry *old, struct dentry *new); +int ovl_set_attr(struct dentry *upper, struct kstat *stat); diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c new file mode 100644 index 000000000000..907870e81a72 --- /dev/null +++ b/fs/overlayfs/readdir.c @@ -0,0 +1,557 @@ +/* + * + * Copyright (C) 2011 Novell Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/namei.h> +#include <linux/file.h> +#include <linux/xattr.h> +#include <linux/rbtree.h> +#include <linux/security.h> +#include <linux/cred.h> +#include "overlayfs.h" + +struct ovl_cache_entry { + unsigned int len; + unsigned int type; + u64 ino; + struct list_head l_node; + struct rb_node node; + bool is_whiteout; + char name[]; +}; + +struct ovl_dir_cache { + long refcount; + u64 version; + struct list_head entries; +}; + +struct ovl_readdir_data { + struct dir_context ctx; + bool is_merge; + struct rb_root root; + struct list_head *list; + struct list_head middle; + struct dentry *dir; + int count; + int err; +}; + +struct ovl_dir_file { + bool is_real; + bool is_upper; + struct ovl_dir_cache *cache; + struct list_head *cursor; + struct file *realfile; + struct file *upperfile; +}; + +static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n) +{ + return container_of(n, struct ovl_cache_entry, node); +} + +static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root, + const char *name, int len) +{ + struct rb_node *node = root->rb_node; + int cmp; + + while (node) { + struct ovl_cache_entry *p = ovl_cache_entry_from_node(node); + + cmp = strncmp(name, p->name, len); + if (cmp > 0) + node = p->node.rb_right; + else if (cmp < 0 || len < p->len) + node = p->node.rb_left; + else + return p; + } + + return NULL; +} + +static struct ovl_cache_entry *ovl_cache_entry_new(struct dentry *dir, + const char *name, int len, + u64 ino, unsigned int d_type) +{ + struct ovl_cache_entry *p; + size_t size = offsetof(struct ovl_cache_entry, name[len + 1]); + + p = kmalloc(size, GFP_KERNEL); + if (!p) + return NULL; + + memcpy(p->name, name, len); + p->name[len] = '\0'; + p->len = len; + p->type = d_type; + p->ino = ino; + p->is_whiteout = false; + + if (d_type == DT_CHR) { + struct dentry *dentry; + const struct cred *old_cred; + struct cred *override_cred; + + override_cred = prepare_creds(); + if (!override_cred) { + kfree(p); + return NULL; + } + + /* + * CAP_DAC_OVERRIDE for lookup + */ + cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); + old_cred = override_creds(override_cred); + + dentry = lookup_one_len(name, dir, len); + if (!IS_ERR(dentry)) { + p->is_whiteout = ovl_is_whiteout(dentry); + dput(dentry); + } + revert_creds(old_cred); + put_cred(override_cred); + } + return p; +} + +static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd, + const char *name, int len, u64 ino, + unsigned int d_type) +{ + struct rb_node **newp = &rdd->root.rb_node; + struct rb_node *parent = NULL; + struct ovl_cache_entry *p; + + while (*newp) { + int cmp; + struct ovl_cache_entry *tmp; + + parent = *newp; + tmp = ovl_cache_entry_from_node(*newp); + cmp = strncmp(name, tmp->name, len); + if (cmp > 0) + newp = &tmp->node.rb_right; + else if (cmp < 0 || len < tmp->len) + newp = &tmp->node.rb_left; + else + return 0; + } + + p = ovl_cache_entry_new(rdd->dir, name, len, ino, d_type); + if (p == NULL) + return -ENOMEM; + + list_add_tail(&p->l_node, rdd->list); + rb_link_node(&p->node, parent, newp); + rb_insert_color(&p->node, &rdd->root); + + return 0; +} + +static int ovl_fill_lower(struct ovl_readdir_data *rdd, + const char *name, int namelen, + loff_t offset, u64 ino, unsigned int d_type) +{ + struct ovl_cache_entry *p; + + p = ovl_cache_entry_find(&rdd->root, name, namelen); + if (p) { + list_move_tail(&p->l_node, &rdd->middle); + } else { + p = ovl_cache_entry_new(rdd->dir, name, namelen, ino, d_type); + if (p == NULL) + rdd->err = -ENOMEM; + else + list_add_tail(&p->l_node, &rdd->middle); + } + + return rdd->err; +} + +void ovl_cache_free(struct list_head *list) +{ + struct ovl_cache_entry *p; + struct ovl_cache_entry *n; + + list_for_each_entry_safe(p, n, list, l_node) + kfree(p); + + INIT_LIST_HEAD(list); +} + +static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry) +{ + struct ovl_dir_cache *cache = od->cache; + + WARN_ON(cache->refcount <= 0); + cache->refcount--; + if (!cache->refcount) { + if (ovl_dir_cache(dentry) == cache) + ovl_set_dir_cache(dentry, NULL); + + ovl_cache_free(&cache->entries); + kfree(cache); + } +} + +static int ovl_fill_merge(struct dir_context *ctx, const char *name, + int namelen, loff_t offset, u64 ino, + unsigned int d_type) +{ + struct ovl_readdir_data *rdd = + container_of(ctx, struct ovl_readdir_data, ctx); + + rdd->count++; + if (!rdd->is_merge) + return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type); + else + return ovl_fill_lower(rdd, name, namelen, offset, ino, d_type); +} + +static inline int ovl_dir_read(struct path *realpath, + struct ovl_readdir_data *rdd) +{ + struct file *realfile; + int err; + + realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY); + if (IS_ERR(realfile)) + return PTR_ERR(realfile); + + rdd->dir = realpath->dentry; + rdd->ctx.pos = 0; + do { + rdd->count = 0; + rdd->err = 0; + err = iterate_dir(realfile, &rdd->ctx); + if (err >= 0) + err = rdd->err; + } while (!err && rdd->count); + fput(realfile); + + return err; +} + +static void ovl_dir_reset(struct file *file) +{ + struct ovl_dir_file *od = file->private_data; + struct ovl_dir_cache *cache = od->cache; + struct dentry *dentry = file->f_path.dentry; + enum ovl_path_type type = ovl_path_type(dentry); + + if (cache && ovl_dentry_version_get(dentry) != cache->version) { + ovl_cache_put(od, dentry); + od->cache = NULL; + od->cursor = NULL; + } + WARN_ON(!od->is_real && !OVL_TYPE_MERGE(type)); + if (od->is_real && OVL_TYPE_MERGE(type)) + od->is_real = false; +} + +static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list) +{ + int err; + struct path realpath; + struct ovl_readdir_data rdd = { + .ctx.actor = ovl_fill_merge, + .list = list, + .root = RB_ROOT, + .is_merge = false, + }; + int idx, next; + + for (idx = 0; idx != -1; idx = next) { + next = ovl_path_next(idx, dentry, &realpath); + + if (next != -1) { + err = ovl_dir_read(&realpath, &rdd); + if (err) + break; + } else { + /* + * Insert lowest layer entries before upper ones, this + * allows offsets to be reasonably constant + */ + list_add(&rdd.middle, rdd.list); + rdd.is_merge = true; + err = ovl_dir_read(&realpath, &rdd); + list_del(&rdd.middle); + } + } + return err; +} + +static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos) +{ + struct list_head *p; + loff_t off = 0; + + list_for_each(p, &od->cache->entries) { + if (off >= pos) + break; + off++; + } + /* Cursor is safe since the cache is stable */ + od->cursor = p; +} + +static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry) +{ + int res; + struct ovl_dir_cache *cache; + + cache = ovl_dir_cache(dentry); + if (cache && ovl_dentry_version_get(dentry) == cache->version) { + cache->refcount++; + return cache; + } + ovl_set_dir_cache(dentry, NULL); + + cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL); + if (!cache) + return ERR_PTR(-ENOMEM); + + cache->refcount = 1; + INIT_LIST_HEAD(&cache->entries); + + res = ovl_dir_read_merged(dentry, &cache->entries); + if (res) { + ovl_cache_free(&cache->entries); + kfree(cache); + return ERR_PTR(res); + } + + cache->version = ovl_dentry_version_get(dentry); + ovl_set_dir_cache(dentry, cache); + + return cache; +} + +static int ovl_iterate(struct file *file, struct dir_context *ctx) +{ + struct ovl_dir_file *od = file->private_data; + struct dentry *dentry = file->f_path.dentry; + struct ovl_cache_entry *p; + + if (!ctx->pos) + ovl_dir_reset(file); + + if (od->is_real) + return iterate_dir(od->realfile, ctx); + + if (!od->cache) { + struct ovl_dir_cache *cache; + + cache = ovl_cache_get(dentry); + if (IS_ERR(cache)) + return PTR_ERR(cache); + + od->cache = cache; + ovl_seek_cursor(od, ctx->pos); + } + + while (od->cursor != &od->cache->entries) { + p = list_entry(od->cursor, struct ovl_cache_entry, l_node); + if (!p->is_whiteout) + if (!dir_emit(ctx, p->name, p->len, p->ino, p->type)) + break; + od->cursor = p->l_node.next; + ctx->pos++; + } + return 0; +} + +static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin) +{ + loff_t res; + struct ovl_dir_file *od = file->private_data; + + mutex_lock(&file_inode(file)->i_mutex); + if (!file->f_pos) + ovl_dir_reset(file); + + if (od->is_real) { + res = vfs_llseek(od->realfile, offset, origin); + file->f_pos = od->realfile->f_pos; + } else { + res = -EINVAL; + + switch (origin) { + case SEEK_CUR: + offset += file->f_pos; + break; + case SEEK_SET: + break; + default: + goto out_unlock; + } + if (offset < 0) + goto out_unlock; + + if (offset != file->f_pos) { + file->f_pos = offset; + if (od->cache) + ovl_seek_cursor(od, offset); + } + res = offset; + } +out_unlock: + mutex_unlock(&file_inode(file)->i_mutex); + + return res; +} + +static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end, + int datasync) +{ + struct ovl_dir_file *od = file->private_data; + struct dentry *dentry = file->f_path.dentry; + struct file *realfile = od->realfile; + + /* + * Need to check if we started out being a lower dir, but got copied up + */ + if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) { + struct inode *inode = file_inode(file); + + realfile = lockless_dereference(od->upperfile); + if (!realfile) { + struct path upperpath; + + ovl_path_upper(dentry, &upperpath); + realfile = ovl_path_open(&upperpath, O_RDONLY); + smp_mb__before_spinlock(); + mutex_lock(&inode->i_mutex); + if (!od->upperfile) { + if (IS_ERR(realfile)) { + mutex_unlock(&inode->i_mutex); + return PTR_ERR(realfile); + } + od->upperfile = realfile; + } else { + /* somebody has beaten us to it */ + if (!IS_ERR(realfile)) + fput(realfile); + realfile = od->upperfile; + } + mutex_unlock(&inode->i_mutex); + } + } + + return vfs_fsync_range(realfile, start, end, datasync); +} + +static int ovl_dir_release(struct inode *inode, struct file *file) +{ + struct ovl_dir_file *od = file->private_data; + + if (od->cache) { + mutex_lock(&inode->i_mutex); + ovl_cache_put(od, file->f_path.dentry); + mutex_unlock(&inode->i_mutex); + } + fput(od->realfile); + if (od->upperfile) + fput(od->upperfile); + kfree(od); + + return 0; +} + +static int ovl_dir_open(struct inode *inode, struct file *file) +{ + struct path realpath; + struct file *realfile; + struct ovl_dir_file *od; + enum ovl_path_type type; + + od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL); + if (!od) + return -ENOMEM; + + type = ovl_path_real(file->f_path.dentry, &realpath); + realfile = ovl_path_open(&realpath, file->f_flags); + if (IS_ERR(realfile)) { + kfree(od); + return PTR_ERR(realfile); + } + od->realfile = realfile; + od->is_real = !OVL_TYPE_MERGE(type); + od->is_upper = OVL_TYPE_UPPER(type); + file->private_data = od; + + return 0; +} + +const struct file_operations ovl_dir_operations = { + .read = generic_read_dir, + .open = ovl_dir_open, + .iterate = ovl_iterate, + .llseek = ovl_dir_llseek, + .fsync = ovl_dir_fsync, + .release = ovl_dir_release, +}; + +int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list) +{ + int err; + struct ovl_cache_entry *p; + + err = ovl_dir_read_merged(dentry, list); + if (err) + return err; + + err = 0; + + list_for_each_entry(p, list, l_node) { + if (p->is_whiteout) + continue; + + if (p->name[0] == '.') { + if (p->len == 1) + continue; + if (p->len == 2 && p->name[1] == '.') + continue; + } + err = -ENOTEMPTY; + break; + } + + return err; +} + +void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list) +{ + struct ovl_cache_entry *p; + + mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_CHILD); + list_for_each_entry(p, list, l_node) { + struct dentry *dentry; + + if (!p->is_whiteout) + continue; + + dentry = lookup_one_len(p->name, upper, p->len); + if (IS_ERR(dentry)) { + pr_err("overlayfs: lookup '%s/%.*s' failed (%i)\n", + upper->d_name.name, p->len, p->name, + (int) PTR_ERR(dentry)); + continue; + } + ovl_cleanup(upper->d_inode, dentry); + dput(dentry); + } + mutex_unlock(&upper->d_inode->i_mutex); +} diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c new file mode 100644 index 000000000000..5f0d1993e6e3 --- /dev/null +++ b/fs/overlayfs/super.c @@ -0,0 +1,1046 @@ +/* + * + * Copyright (C) 2011 Novell Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#include <linux/fs.h> +#include <linux/namei.h> +#include <linux/xattr.h> +#include <linux/security.h> +#include <linux/mount.h> +#include <linux/slab.h> +#include <linux/parser.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/statfs.h> +#include <linux/seq_file.h> +#include "overlayfs.h" + +MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); +MODULE_DESCRIPTION("Overlay filesystem"); +MODULE_LICENSE("GPL"); + +#define OVERLAYFS_SUPER_MAGIC 0x794c7630 + +struct ovl_config { + char *lowerdir; + char *upperdir; + char *workdir; +}; + +/* private information held for overlayfs's superblock */ +struct ovl_fs { + struct vfsmount *upper_mnt; + unsigned numlower; + struct vfsmount **lower_mnt; + struct dentry *workdir; + long lower_namelen; + /* pathnames of lower and upper dirs, for show_options */ + struct ovl_config config; +}; + +struct ovl_dir_cache; + +/* private information held for every overlayfs dentry */ +struct ovl_entry { + struct dentry *__upperdentry; + struct ovl_dir_cache *cache; + union { + struct { + u64 version; + bool opaque; + }; + struct rcu_head rcu; + }; + unsigned numlower; + struct path lowerstack[]; +}; + +#define OVL_MAX_STACK 500 + +static struct dentry *__ovl_dentry_lower(struct ovl_entry *oe) +{ + return oe->numlower ? oe->lowerstack[0].dentry : NULL; +} + +enum ovl_path_type ovl_path_type(struct dentry *dentry) +{ + struct ovl_entry *oe = dentry->d_fsdata; + enum ovl_path_type type = 0; + + if (oe->__upperdentry) { + type = __OVL_PATH_UPPER; + + if (oe->numlower) { + if (S_ISDIR(dentry->d_inode->i_mode)) + type |= __OVL_PATH_MERGE; + } else if (!oe->opaque) { + type |= __OVL_PATH_PURE; + } + } else { + if (oe->numlower > 1) + type |= __OVL_PATH_MERGE; + } + return type; +} + +static struct dentry *ovl_upperdentry_dereference(struct ovl_entry *oe) +{ + return lockless_dereference(oe->__upperdentry); +} + +void ovl_path_upper(struct dentry *dentry, struct path *path) +{ + struct ovl_fs *ofs = dentry->d_sb->s_fs_info; + struct ovl_entry *oe = dentry->d_fsdata; + + path->mnt = ofs->upper_mnt; + path->dentry = ovl_upperdentry_dereference(oe); +} + +enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path) +{ + enum ovl_path_type type = ovl_path_type(dentry); + + if (!OVL_TYPE_UPPER(type)) + ovl_path_lower(dentry, path); + else + ovl_path_upper(dentry, path); + + return type; +} + +struct dentry *ovl_dentry_upper(struct dentry *dentry) +{ + struct ovl_entry *oe = dentry->d_fsdata; + + return ovl_upperdentry_dereference(oe); +} + +struct dentry *ovl_dentry_lower(struct dentry *dentry) +{ + struct ovl_entry *oe = dentry->d_fsdata; + + return __ovl_dentry_lower(oe); +} + +struct dentry *ovl_dentry_real(struct dentry *dentry) +{ + struct ovl_entry *oe = dentry->d_fsdata; + struct dentry *realdentry; + + realdentry = ovl_upperdentry_dereference(oe); + if (!realdentry) + realdentry = __ovl_dentry_lower(oe); + + return realdentry; +} + +struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper) +{ + struct dentry *realdentry; + + realdentry = ovl_upperdentry_dereference(oe); + if (realdentry) { + *is_upper = true; + } else { + realdentry = __ovl_dentry_lower(oe); + *is_upper = false; + } + return realdentry; +} + +struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry) +{ + struct ovl_entry *oe = dentry->d_fsdata; + + return oe->cache; +} + +void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache) +{ + struct ovl_entry *oe = dentry->d_fsdata; + + oe->cache = cache; +} + +void ovl_path_lower(struct dentry *dentry, struct path *path) +{ + struct ovl_entry *oe = dentry->d_fsdata; + + *path = oe->numlower ? oe->lowerstack[0] : (struct path) { NULL, NULL }; +} + +int ovl_want_write(struct dentry *dentry) +{ + struct ovl_fs *ofs = dentry->d_sb->s_fs_info; + return mnt_want_write(ofs->upper_mnt); +} + +void ovl_drop_write(struct dentry *dentry) +{ + struct ovl_fs *ofs = dentry->d_sb->s_fs_info; + mnt_drop_write(ofs->upper_mnt); +} + +struct dentry *ovl_workdir(struct dentry *dentry) +{ + struct ovl_fs *ofs = dentry->d_sb->s_fs_info; + return ofs->workdir; +} + +bool ovl_dentry_is_opaque(struct dentry *dentry) +{ + struct ovl_entry *oe = dentry->d_fsdata; + return oe->opaque; +} + +void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque) +{ + struct ovl_entry *oe = dentry->d_fsdata; + oe->opaque = opaque; +} + +void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry) +{ + struct ovl_entry *oe = dentry->d_fsdata; + + WARN_ON(!mutex_is_locked(&upperdentry->d_parent->d_inode->i_mutex)); + WARN_ON(oe->__upperdentry); + BUG_ON(!upperdentry->d_inode); + /* + * Make sure upperdentry is consistent before making it visible to + * ovl_upperdentry_dereference(). + */ + smp_wmb(); + oe->__upperdentry = upperdentry; +} + +void ovl_dentry_version_inc(struct dentry *dentry) +{ + struct ovl_entry *oe = dentry->d_fsdata; + + WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); + oe->version++; +} + +u64 ovl_dentry_version_get(struct dentry *dentry) +{ + struct ovl_entry *oe = dentry->d_fsdata; + + WARN_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); + return oe->version; +} + +bool ovl_is_whiteout(struct dentry *dentry) +{ + struct inode *inode = dentry->d_inode; + + return inode && IS_WHITEOUT(inode); +} + +static bool ovl_is_opaquedir(struct dentry *dentry) +{ + int res; + char val; + struct inode *inode = dentry->d_inode; + + if (!S_ISDIR(inode->i_mode) || !inode->i_op->getxattr) + return false; + + res = inode->i_op->getxattr(dentry, OVL_XATTR_OPAQUE, &val, 1); + if (res == 1 && val == 'y') + return true; + + return false; +} + +static void ovl_dentry_release(struct dentry *dentry) +{ + struct ovl_entry *oe = dentry->d_fsdata; + + if (oe) { + unsigned int i; + + dput(oe->__upperdentry); + for (i = 0; i < oe->numlower; i++) + dput(oe->lowerstack[i].dentry); + kfree_rcu(oe, rcu); + } +} + +static const struct dentry_operations ovl_dentry_operations = { + .d_release = ovl_dentry_release, +}; + +static struct ovl_entry *ovl_alloc_entry(unsigned int numlower) +{ + size_t size = offsetof(struct ovl_entry, lowerstack[numlower]); + struct ovl_entry *oe = kzalloc(size, GFP_KERNEL); + + if (oe) + oe->numlower = numlower; + + return oe; +} + +static inline struct dentry *ovl_lookup_real(struct dentry *dir, + struct qstr *name) +{ + struct dentry *dentry; + + mutex_lock(&dir->d_inode->i_mutex); + dentry = lookup_one_len(name->name, dir, name->len); + mutex_unlock(&dir->d_inode->i_mutex); + + if (IS_ERR(dentry)) { + if (PTR_ERR(dentry) == -ENOENT) + dentry = NULL; + } else if (!dentry->d_inode) { + dput(dentry); + dentry = NULL; + } + return dentry; +} + +/* + * Returns next layer in stack starting from top. + * Returns -1 if this is the last layer. + */ +int ovl_path_next(int idx, struct dentry *dentry, struct path *path) +{ + struct ovl_entry *oe = dentry->d_fsdata; + + BUG_ON(idx < 0); + if (idx == 0) { + ovl_path_upper(dentry, path); + if (path->dentry) + return oe->numlower ? 1 : -1; + idx++; + } + BUG_ON(idx > oe->numlower); + *path = oe->lowerstack[idx - 1]; + + return (idx < oe->numlower) ? idx + 1 : -1; +} + +struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags) +{ + struct ovl_entry *oe; + struct ovl_entry *poe = dentry->d_parent->d_fsdata; + struct path *stack = NULL; + struct dentry *upperdir, *upperdentry = NULL; + unsigned int ctr = 0; + struct inode *inode = NULL; + bool upperopaque = false; + struct dentry *this, *prev = NULL; + unsigned int i; + int err; + + upperdir = ovl_upperdentry_dereference(poe); + if (upperdir) { + this = ovl_lookup_real(upperdir, &dentry->d_name); + err = PTR_ERR(this); + if (IS_ERR(this)) + goto out; + + if (this) { + if (ovl_is_whiteout(this)) { + dput(this); + this = NULL; + upperopaque = true; + } else if (poe->numlower && ovl_is_opaquedir(this)) { + upperopaque = true; + } + } + upperdentry = prev = this; + } + + if (!upperopaque && poe->numlower) { + err = -ENOMEM; + stack = kcalloc(poe->numlower, sizeof(struct path), GFP_KERNEL); + if (!stack) + goto out_put_upper; + } + + for (i = 0; !upperopaque && i < poe->numlower; i++) { + bool opaque = false; + struct path lowerpath = poe->lowerstack[i]; + + this = ovl_lookup_real(lowerpath.dentry, &dentry->d_name); + err = PTR_ERR(this); + if (IS_ERR(this)) { + /* + * If it's positive, then treat ENAMETOOLONG as ENOENT. + */ + if (err == -ENAMETOOLONG && (upperdentry || ctr)) + continue; + goto out_put; + } + if (!this) + continue; + if (ovl_is_whiteout(this)) { + dput(this); + break; + } + /* + * Only makes sense to check opaque dir if this is not the + * lowermost layer. + */ + if (i < poe->numlower - 1 && ovl_is_opaquedir(this)) + opaque = true; + + if (prev && (!S_ISDIR(prev->d_inode->i_mode) || + !S_ISDIR(this->d_inode->i_mode))) { + /* + * FIXME: check for upper-opaqueness maybe better done + * in remove code. + */ + if (prev == upperdentry) + upperopaque = true; + dput(this); + break; + } + /* + * If this is a non-directory then stop here. + */ + if (!S_ISDIR(this->d_inode->i_mode)) + opaque = true; + + stack[ctr].dentry = this; + stack[ctr].mnt = lowerpath.mnt; + ctr++; + prev = this; + if (opaque) + break; + } + + oe = ovl_alloc_entry(ctr); + err = -ENOMEM; + if (!oe) + goto out_put; + + if (upperdentry || ctr) { + struct dentry *realdentry; + + realdentry = upperdentry ? upperdentry : stack[0].dentry; + + err = -ENOMEM; + inode = ovl_new_inode(dentry->d_sb, realdentry->d_inode->i_mode, + oe); + if (!inode) + goto out_free_oe; + ovl_copyattr(realdentry->d_inode, inode); + } + + oe->opaque = upperopaque; + oe->__upperdentry = upperdentry; + memcpy(oe->lowerstack, stack, sizeof(struct path) * ctr); + kfree(stack); + dentry->d_fsdata = oe; + d_add(dentry, inode); + + return NULL; + +out_free_oe: + kfree(oe); +out_put: + for (i = 0; i < ctr; i++) + dput(stack[i].dentry); + kfree(stack); +out_put_upper: + dput(upperdentry); +out: + return ERR_PTR(err); +} + +struct file *ovl_path_open(struct path *path, int flags) +{ + return dentry_open(path, flags, current_cred()); +} + +static void ovl_put_super(struct super_block *sb) +{ + struct ovl_fs *ufs = sb->s_fs_info; + unsigned i; + + dput(ufs->workdir); + mntput(ufs->upper_mnt); + for (i = 0; i < ufs->numlower; i++) + mntput(ufs->lower_mnt[i]); + + kfree(ufs->config.lowerdir); + kfree(ufs->config.upperdir); + kfree(ufs->config.workdir); + kfree(ufs); +} + +/** + * ovl_statfs + * @sb: The overlayfs super block + * @buf: The struct kstatfs to fill in with stats + * + * Get the filesystem statistics. As writes always target the upper layer + * filesystem pass the statfs to the upper filesystem (if it exists) + */ +static int ovl_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + struct ovl_fs *ofs = dentry->d_sb->s_fs_info; + struct dentry *root_dentry = dentry->d_sb->s_root; + struct path path; + int err; + + ovl_path_real(root_dentry, &path); + + err = vfs_statfs(&path, buf); + if (!err) { + buf->f_namelen = max(buf->f_namelen, ofs->lower_namelen); + buf->f_type = OVERLAYFS_SUPER_MAGIC; + } + + return err; +} + +/** + * ovl_show_options + * + * Prints the mount options for a given superblock. + * Returns zero; does not fail. + */ +static int ovl_show_options(struct seq_file *m, struct dentry *dentry) +{ + struct super_block *sb = dentry->d_sb; + struct ovl_fs *ufs = sb->s_fs_info; + + seq_printf(m, ",lowerdir=%s", ufs->config.lowerdir); + if (ufs->config.upperdir) { + seq_printf(m, ",upperdir=%s", ufs->config.upperdir); + seq_printf(m, ",workdir=%s", ufs->config.workdir); + } + return 0; +} + +static int ovl_remount(struct super_block *sb, int *flags, char *data) +{ + struct ovl_fs *ufs = sb->s_fs_info; + + if (!(*flags & MS_RDONLY) && !ufs->upper_mnt) + return -EROFS; + + return 0; +} + +static const struct super_operations ovl_super_operations = { + .put_super = ovl_put_super, + .statfs = ovl_statfs, + .show_options = ovl_show_options, + .remount_fs = ovl_remount, +}; + +enum { + OPT_LOWERDIR, + OPT_UPPERDIR, + OPT_WORKDIR, + OPT_ERR, +}; + +static const match_table_t ovl_tokens = { + {OPT_LOWERDIR, "lowerdir=%s"}, + {OPT_UPPERDIR, "upperdir=%s"}, + {OPT_WORKDIR, "workdir=%s"}, + {OPT_ERR, NULL} +}; + +static char *ovl_next_opt(char **s) +{ + char *sbegin = *s; + char *p; + + if (sbegin == NULL) + return NULL; + + for (p = sbegin; *p; p++) { + if (*p == '\\') { + p++; + if (!*p) + break; + } else if (*p == ',') { + *p = '\0'; + *s = p + 1; + return sbegin; + } + } + *s = NULL; + return sbegin; +} + +static int ovl_parse_opt(char *opt, struct ovl_config *config) +{ + char *p; + + while ((p = ovl_next_opt(&opt)) != NULL) { + int token; + substring_t args[MAX_OPT_ARGS]; + + if (!*p) + continue; + + token = match_token(p, ovl_tokens, args); + switch (token) { + case OPT_UPPERDIR: + kfree(config->upperdir); + config->upperdir = match_strdup(&args[0]); + if (!config->upperdir) + return -ENOMEM; + break; + + case OPT_LOWERDIR: + kfree(config->lowerdir); + config->lowerdir = match_strdup(&args[0]); + if (!config->lowerdir) + return -ENOMEM; + break; + + case OPT_WORKDIR: + kfree(config->workdir); + config->workdir = match_strdup(&args[0]); + if (!config->workdir) + return -ENOMEM; + break; + + default: + pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p); + return -EINVAL; + } + } + + /* Workdir is useless in non-upper mount */ + if (!config->upperdir && config->workdir) { + pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n", + config->workdir); + kfree(config->workdir); + config->workdir = NULL; + } + + return 0; +} + +#define OVL_WORKDIR_NAME "work" + +static struct dentry *ovl_workdir_create(struct vfsmount *mnt, + struct dentry *dentry) +{ + struct inode *dir = dentry->d_inode; + struct dentry *work; + int err; + bool retried = false; + + err = mnt_want_write(mnt); + if (err) + return ERR_PTR(err); + + mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT); +retry: + work = lookup_one_len(OVL_WORKDIR_NAME, dentry, + strlen(OVL_WORKDIR_NAME)); + + if (!IS_ERR(work)) { + struct kstat stat = { + .mode = S_IFDIR | 0, + }; + + if (work->d_inode) { + err = -EEXIST; + if (retried) + goto out_dput; + + retried = true; + ovl_cleanup(dir, work); + dput(work); + goto retry; + } + + err = ovl_create_real(dir, work, &stat, NULL, NULL, true); + if (err) + goto out_dput; + } +out_unlock: + mutex_unlock(&dir->i_mutex); + mnt_drop_write(mnt); + + return work; + +out_dput: + dput(work); + work = ERR_PTR(err); + goto out_unlock; +} + +static void ovl_unescape(char *s) +{ + char *d = s; + + for (;; s++, d++) { + if (*s == '\\') + s++; + *d = *s; + if (!*s) + break; + } +} + +static bool ovl_is_allowed_fs_type(struct dentry *root) +{ + const struct dentry_operations *dop = root->d_op; + + /* + * We don't support: + * - automount filesystems + * - filesystems with revalidate (FIXME for lower layer) + * - filesystems with case insensitive names + */ + if (dop && + (dop->d_manage || dop->d_automount || + dop->d_revalidate || dop->d_weak_revalidate || + dop->d_compare || dop->d_hash)) { + return false; + } + return true; +} + +static int ovl_mount_dir_noesc(const char *name, struct path *path) +{ + int err = -EINVAL; + + if (!*name) { + pr_err("overlayfs: empty lowerdir\n"); + goto out; + } + err = kern_path(name, LOOKUP_FOLLOW, path); + if (err) { + pr_err("overlayfs: failed to resolve '%s': %i\n", name, err); + goto out; + } + err = -EINVAL; + if (!ovl_is_allowed_fs_type(path->dentry)) { + pr_err("overlayfs: filesystem on '%s' not supported\n", name); + goto out_put; + } + if (!S_ISDIR(path->dentry->d_inode->i_mode)) { + pr_err("overlayfs: '%s' not a directory\n", name); + goto out_put; + } + return 0; + +out_put: + path_put(path); +out: + return err; +} + +static int ovl_mount_dir(const char *name, struct path *path) +{ + int err = -ENOMEM; + char *tmp = kstrdup(name, GFP_KERNEL); + + if (tmp) { + ovl_unescape(tmp); + err = ovl_mount_dir_noesc(tmp, path); + kfree(tmp); + } + return err; +} + +static int ovl_lower_dir(const char *name, struct path *path, long *namelen, + int *stack_depth) +{ + int err; + struct kstatfs statfs; + + err = ovl_mount_dir_noesc(name, path); + if (err) + goto out; + + err = vfs_statfs(path, &statfs); + if (err) { + pr_err("overlayfs: statfs failed on '%s'\n", name); + goto out_put; + } + *namelen = max(*namelen, statfs.f_namelen); + *stack_depth = max(*stack_depth, path->mnt->mnt_sb->s_stack_depth); + + return 0; + +out_put: + path_put(path); +out: + return err; +} + +/* Workdir should not be subdir of upperdir and vice versa */ +static bool ovl_workdir_ok(struct dentry *workdir, struct dentry *upperdir) +{ + bool ok = false; + + if (workdir != upperdir) { + ok = (lock_rename(workdir, upperdir) == NULL); + unlock_rename(workdir, upperdir); + } + return ok; +} + +static unsigned int ovl_split_lowerdirs(char *str) +{ + unsigned int ctr = 1; + char *s, *d; + + for (s = d = str;; s++, d++) { + if (*s == '\\') { + s++; + } else if (*s == ':') { + *d = '\0'; + ctr++; + continue; + } + *d = *s; + if (!*s) + break; + } + return ctr; +} + +static int ovl_fill_super(struct super_block *sb, void *data, int silent) +{ + struct path upperpath = { NULL, NULL }; + struct path workpath = { NULL, NULL }; + struct dentry *root_dentry; + struct ovl_entry *oe; + struct ovl_fs *ufs; + struct path *stack = NULL; + char *lowertmp; + char *lower; + unsigned int numlower; + unsigned int stacklen = 0; + unsigned int i; + int err; + + err = -ENOMEM; + ufs = kzalloc(sizeof(struct ovl_fs), GFP_KERNEL); + if (!ufs) + goto out; + + err = ovl_parse_opt((char *) data, &ufs->config); + if (err) + goto out_free_config; + + err = -EINVAL; + if (!ufs->config.lowerdir) { + pr_err("overlayfs: missing 'lowerdir'\n"); + goto out_free_config; + } + + sb->s_stack_depth = 0; + if (ufs->config.upperdir) { + if (!ufs->config.workdir) { + pr_err("overlayfs: missing 'workdir'\n"); + goto out_free_config; + } + + err = ovl_mount_dir(ufs->config.upperdir, &upperpath); + if (err) + goto out_free_config; + + /* Upper fs should not be r/o */ + if (upperpath.mnt->mnt_sb->s_flags & MS_RDONLY) { + pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n"); + err = -EINVAL; + goto out_put_upperpath; + } + + err = ovl_mount_dir(ufs->config.workdir, &workpath); + if (err) + goto out_put_upperpath; + + err = -EINVAL; + if (upperpath.mnt != workpath.mnt) { + pr_err("overlayfs: workdir and upperdir must reside under the same mount\n"); + goto out_put_workpath; + } + if (!ovl_workdir_ok(workpath.dentry, upperpath.dentry)) { + pr_err("overlayfs: workdir and upperdir must be separate subtrees\n"); + goto out_put_workpath; + } + sb->s_stack_depth = upperpath.mnt->mnt_sb->s_stack_depth; + } + err = -ENOMEM; + lowertmp = kstrdup(ufs->config.lowerdir, GFP_KERNEL); + if (!lowertmp) + goto out_put_workpath; + + err = -EINVAL; + stacklen = ovl_split_lowerdirs(lowertmp); + if (stacklen > OVL_MAX_STACK) { + pr_err("overlayfs: too many lower directries, limit is %d\n", + OVL_MAX_STACK); + goto out_free_lowertmp; + } else if (!ufs->config.upperdir && stacklen == 1) { + pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n"); + goto out_free_lowertmp; + } + + stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL); + if (!stack) + goto out_free_lowertmp; + + lower = lowertmp; + for (numlower = 0; numlower < stacklen; numlower++) { + err = ovl_lower_dir(lower, &stack[numlower], + &ufs->lower_namelen, &sb->s_stack_depth); + if (err) + goto out_put_lowerpath; + + lower = strchr(lower, '\0') + 1; + } + + err = -EINVAL; + sb->s_stack_depth++; + if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { + pr_err("overlayfs: maximum fs stacking depth exceeded\n"); + goto out_put_lowerpath; + } + + if (ufs->config.upperdir) { + ufs->upper_mnt = clone_private_mount(&upperpath); + err = PTR_ERR(ufs->upper_mnt); + if (IS_ERR(ufs->upper_mnt)) { + pr_err("overlayfs: failed to clone upperpath\n"); + goto out_put_lowerpath; + } + + ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry); + err = PTR_ERR(ufs->workdir); + if (IS_ERR(ufs->workdir)) { + pr_err("overlayfs: failed to create directory %s/%s\n", + ufs->config.workdir, OVL_WORKDIR_NAME); + goto out_put_upper_mnt; + } + } + + err = -ENOMEM; + ufs->lower_mnt = kcalloc(numlower, sizeof(struct vfsmount *), GFP_KERNEL); + if (ufs->lower_mnt == NULL) + goto out_put_workdir; + for (i = 0; i < numlower; i++) { + struct vfsmount *mnt = clone_private_mount(&stack[i]); + + err = PTR_ERR(mnt); + if (IS_ERR(mnt)) { + pr_err("overlayfs: failed to clone lowerpath\n"); + goto out_put_lower_mnt; + } + /* + * Make lower_mnt R/O. That way fchmod/fchown on lower file + * will fail instead of modifying lower fs. + */ + mnt->mnt_flags |= MNT_READONLY; + + ufs->lower_mnt[ufs->numlower] = mnt; + ufs->numlower++; + } + + /* If the upper fs is nonexistent, we mark overlayfs r/o too */ + if (!ufs->upper_mnt) + sb->s_flags |= MS_RDONLY; + + sb->s_d_op = &ovl_dentry_operations; + + err = -ENOMEM; + oe = ovl_alloc_entry(numlower); + if (!oe) + goto out_put_lower_mnt; + + root_dentry = d_make_root(ovl_new_inode(sb, S_IFDIR, oe)); + if (!root_dentry) + goto out_free_oe; + + mntput(upperpath.mnt); + for (i = 0; i < numlower; i++) + mntput(stack[i].mnt); + path_put(&workpath); + kfree(lowertmp); + + oe->__upperdentry = upperpath.dentry; + for (i = 0; i < numlower; i++) { + oe->lowerstack[i].dentry = stack[i].dentry; + oe->lowerstack[i].mnt = ufs->lower_mnt[i]; + } + + root_dentry->d_fsdata = oe; + + sb->s_magic = OVERLAYFS_SUPER_MAGIC; + sb->s_op = &ovl_super_operations; + sb->s_root = root_dentry; + sb->s_fs_info = ufs; + + return 0; + +out_free_oe: + kfree(oe); +out_put_lower_mnt: + for (i = 0; i < ufs->numlower; i++) + mntput(ufs->lower_mnt[i]); + kfree(ufs->lower_mnt); +out_put_workdir: + dput(ufs->workdir); +out_put_upper_mnt: + mntput(ufs->upper_mnt); +out_put_lowerpath: + for (i = 0; i < numlower; i++) + path_put(&stack[i]); + kfree(stack); +out_free_lowertmp: + kfree(lowertmp); +out_put_workpath: + path_put(&workpath); +out_put_upperpath: + path_put(&upperpath); +out_free_config: + kfree(ufs->config.lowerdir); + kfree(ufs->config.upperdir); + kfree(ufs->config.workdir); + kfree(ufs); +out: + return err; +} + +static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *raw_data) +{ + return mount_nodev(fs_type, flags, raw_data, ovl_fill_super); +} + +static struct file_system_type ovl_fs_type = { + .owner = THIS_MODULE, + .name = "overlay", + .mount = ovl_mount, + .kill_sb = kill_anon_super, +}; +MODULE_ALIAS_FS("overlay"); + +static int __init ovl_init(void) +{ + return register_filesystem(&ovl_fs_type); +} + +static void __exit ovl_exit(void) +{ + unregister_filesystem(&ovl_fs_type); +} + +module_init(ovl_init); +module_exit(ovl_exit); diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 9ae46b87470d..89026095f2b5 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -146,7 +146,7 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl struct task_struct *task; void *ns; char name[50]; - int len = -EACCES; + int res = -EACCES; task = get_proc_task(inode); if (!task) @@ -155,24 +155,18 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl if (!ptrace_may_access(task, PTRACE_MODE_READ)) goto out_put_task; - len = -ENOENT; + res = -ENOENT; ns = ns_ops->get(task); if (!ns) goto out_put_task; snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns_ops->inum(ns)); - len = strlen(name); - - if (len > buflen) - len = buflen; - if (copy_to_user(buffer, name, len)) - len = -EFAULT; - + res = readlink_copy(buffer, buflen, name); ns_ops->put(ns); out_put_task: put_task_struct(task); out: - return len; + return res; } static const struct inode_operations proc_ns_link_inode_operations = { diff --git a/fs/proc/self.c b/fs/proc/self.c index ffeb202ec942..4348bb8907c2 100644 --- a/fs/proc/self.c +++ b/fs/proc/self.c @@ -16,7 +16,7 @@ static int proc_self_readlink(struct dentry *dentry, char __user *buffer, if (!tgid) return -ENOENT; sprintf(tmp, "%d", tgid); - return vfs_readlink(dentry,buffer,buflen,tmp); + return readlink_copy(buffer, buflen, tmp); } static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index c4b2646b6d7c..c2546717fc2b 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1227,6 +1227,9 @@ out: static int pagemap_open(struct inode *inode, struct file *file) { + /* do not disclose physical addresses: attack vector */ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; pr_warn_once("Bits 55-60 of /proc/PID/pagemap entries are about " "to stop being page-shift some time soon. See the " "linux/Documentation/vm/pagemap.txt for details.\n"); diff --git a/fs/readdir.c b/fs/readdir.c index 5b53d995cae6..e21af53ec24e 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -72,10 +72,11 @@ struct readdir_callback { int result; }; -static int fillonedir(void * __buf, const char * name, int namlen, loff_t offset, - u64 ino, unsigned int d_type) +static int fillonedir(struct dir_context *ctx, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type) { - struct readdir_callback *buf = (struct readdir_callback *) __buf; + struct readdir_callback *buf = + container_of(ctx, struct readdir_callback, ctx); struct old_linux_dirent __user * dirent; unsigned long d_ino; @@ -146,11 +147,12 @@ struct getdents_callback { int error; }; -static int filldir(void * __buf, const char * name, int namlen, loff_t offset, - u64 ino, unsigned int d_type) +static int filldir(struct dir_context *ctx, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type) { struct linux_dirent __user * dirent; - struct getdents_callback * buf = (struct getdents_callback *) __buf; + struct getdents_callback *buf = + container_of(ctx, struct getdents_callback, ctx); unsigned long d_ino; int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2, sizeof(long)); @@ -230,11 +232,12 @@ struct getdents_callback64 { int error; }; -static int filldir64(void * __buf, const char * name, int namlen, loff_t offset, - u64 ino, unsigned int d_type) +static int filldir64(struct dir_context *ctx, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type) { struct linux_dirent64 __user *dirent; - struct getdents_callback64 * buf = (struct getdents_callback64 *) __buf; + struct getdents_callback64 *buf = + container_of(ctx, struct getdents_callback64, ctx); int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1, sizeof(u64)); diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 5cdfbd638b5c..d8b7acfd5066 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -177,10 +177,11 @@ struct reiserfs_dentry_buf { }; static int -fill_with_dentries(void *buf, const char *name, int namelen, loff_t offset, - u64 ino, unsigned int d_type) +fill_with_dentries(struct dir_context *ctx, const char *name, int namelen, + loff_t offset, u64 ino, unsigned int d_type) { - struct reiserfs_dentry_buf *dbuf = buf; + struct reiserfs_dentry_buf *dbuf = + container_of(ctx, struct reiserfs_dentry_buf, ctx); struct dentry *dentry; WARN_ON_ONCE(!mutex_is_locked(&dbuf->xadir->d_inode->i_mutex)); @@ -794,10 +795,12 @@ struct listxattr_buf { struct dentry *dentry; }; -static int listxattr_filler(void *buf, const char *name, int namelen, - loff_t offset, u64 ino, unsigned int d_type) +static int listxattr_filler(struct dir_context *ctx, const char *name, + int namelen, loff_t offset, u64 ino, + unsigned int d_type) { - struct listxattr_buf *b = (struct listxattr_buf *)buf; + struct listxattr_buf *b = + container_of(ctx, struct listxattr_buf, ctx); size_t size; if (name[0] != '.' || (namelen != 1 && (name[1] != '.' || namelen != 2))) { diff --git a/fs/splice.c b/fs/splice.c index 12028fa41def..ffb92b9d34ba 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1327,6 +1327,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, return ret; } +EXPORT_SYMBOL(do_splice_direct); static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, struct pipe_inode_info *opipe, diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 78e62cc471c5..6152cbe353e8 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -271,32 +271,6 @@ xfs_open_by_handle( return error; } -/* - * This is a copy from fs/namei.c:vfs_readlink(), except for removing it's - * unused first argument. - */ -STATIC int -do_readlink( - char __user *buffer, - int buflen, - const char *link) -{ - int len; - - len = PTR_ERR(link); - if (IS_ERR(link)) - goto out; - - len = strlen(link); - if (len > (unsigned) buflen) - len = buflen; - if (copy_to_user(buffer, link, len)) - len = -EFAULT; - out: - return len; -} - - int xfs_readlink_by_handle( struct file *parfilp, @@ -334,7 +308,7 @@ xfs_readlink_by_handle( error = -xfs_readlink(XFS_I(dentry->d_inode), link); if (error) goto out_kfree; - error = do_readlink(hreq->ohandle, olen, link); + error = readlink_copy(hreq->ohandle, olen, link); if (error) goto out_kfree; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index bbc3a6c88fce..33fd7ff53a77 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -180,7 +180,9 @@ enum rq_flag_bits { __REQ_ELVPRIV, /* elevator private data attached */ __REQ_FAILED, /* set if the request failed */ __REQ_QUIET, /* don't worry about errors */ - __REQ_PREEMPT, /* set for "ide_preempt" requests */ + __REQ_PREEMPT, /* set for "ide_preempt" requests and also + for requests for which the SCSI "quiesce" + state must be ignored. */ __REQ_ALLOCED, /* request came from our alloc pool */ __REQ_COPY_USER, /* contains copies of user pages */ __REQ_FLUSH_SEQ, /* request for flush sequence */ diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 50fcbb0ac4e7..d1338170b6b9 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -69,7 +69,6 @@ struct cpuidle_device { unsigned int cpu; int last_residency; - int state_count; struct cpuidle_state_usage states_usage[CPUIDLE_STATE_MAX]; struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX]; struct cpuidle_driver_kobj *kobj_driver; diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 3b50cac7ccb3..3b9bfdb83ba6 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -308,6 +308,7 @@ extern void dentry_update_name_case(struct dentry *, struct qstr *); /* used for rename() and baskets */ extern void d_move(struct dentry *, struct dentry *); +extern void d_exchange(struct dentry *, struct dentry *); extern struct dentry *d_ancestor(struct dentry *, struct dentry *); /* appendix may either be NULL or be used for transname suffixes */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 23b2a35d712e..e1cee8b12d1a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -218,6 +218,13 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define ATTR_TIMES_SET (1 << 16) /* + * Whiteout is represented by a char device. The following constants define the + * mode and device number to use. + */ +#define WHITEOUT_MODE 0 +#define WHITEOUT_DEV 0 + +/* * This is the Inode Attributes structure, used for notify_change(). It * uses the above definitions as flags, to know which values have changed. * Also, in this manner, a Filesystem can look at only the values it cares @@ -249,6 +256,12 @@ struct iattr { */ #include <linux/quota.h> +/* + * Maximum number of layers of fs stack. Needs to be limited to + * prevent kernel stack overflow + */ +#define FILESYSTEM_MAX_STACK_DEPTH 2 + /** * enum positive_aop_returns - aop return codes with specific semantics * @@ -628,11 +641,13 @@ static inline int inode_unhashed(struct inode *inode) * 2: child/target * 3: xattr * 4: second non-directory - * The last is for certain operations (such as rename) which lock two + * 5: second parent (when locking independent directories in rename) + * + * I_MUTEX_NONDIR2 is for certain operations (such as rename) which lock two * non-directories at once. * * The locking order between these classes is - * parent -> child -> normal -> xattr -> second non-directory + * parent[2] -> child -> grandchild -> normal -> xattr -> second non-directory */ enum inode_i_mutex_lock_class { @@ -640,7 +655,8 @@ enum inode_i_mutex_lock_class I_MUTEX_PARENT, I_MUTEX_CHILD, I_MUTEX_XATTR, - I_MUTEX_NONDIR2 + I_MUTEX_NONDIR2, + I_MUTEX_PARENT2, }; void lock_two_nondirectories(struct inode *, struct inode*); @@ -1329,6 +1345,11 @@ struct super_block { struct list_lru s_dentry_lru ____cacheline_aligned_in_smp; struct list_lru s_inode_lru ____cacheline_aligned_in_smp; struct rcu_head rcu; + + /* + * Indicates how deep in a filesystem stack this SB is + */ + int s_stack_depth; }; extern struct timespec current_fs_time(struct super_block *sb); @@ -1460,7 +1481,8 @@ extern int vfs_symlink(struct inode *, struct dentry *, const char *); extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct inode **); extern int vfs_rmdir(struct inode *, struct dentry *); extern int vfs_unlink(struct inode *, struct dentry *, struct inode **); -extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **); +extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int); +extern int vfs_whiteout(struct inode *, struct dentry *); /* * VFS dentry helper functions. @@ -1508,7 +1530,10 @@ int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); * This allows the kernel to read directories into kernel space or * to have different dirent layouts depending on the binary type. */ -typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned); +struct dir_context; +typedef int (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64, + unsigned); + struct dir_context { const filldir_t actor; loff_t pos; @@ -1571,6 +1596,8 @@ struct inode_operations { int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t); int (*rename) (struct inode *, struct dentry *, struct inode *, struct dentry *); + int (*rename2) (struct inode *, struct dentry *, + struct inode *, struct dentry *, unsigned int); int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); @@ -1585,6 +1612,9 @@ struct inode_operations { umode_t create_mode, int *opened); int (*tmpfile) (struct inode *, struct dentry *, umode_t); int (*set_acl)(struct inode *, struct posix_acl *, int); + + /* WARNING: probably going away soon, do not use! */ + int (*dentry_open)(struct dentry *, struct file *, const struct cred *); } ____cacheline_aligned; ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, @@ -1682,6 +1712,9 @@ struct super_operations { #define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT) #define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC) +#define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \ + (inode)->i_rdev == WHITEOUT_DEV) + /* * Inode state bits. Protected by inode->i_lock * @@ -2079,6 +2112,7 @@ extern struct file *file_open_name(struct filename *, int, umode_t); extern struct file *filp_open(const char *, int, umode_t); extern struct file *file_open_root(struct dentry *, struct vfsmount *, const char *, int); +extern int vfs_open(const struct path *, struct file *, const struct cred *); extern struct file * dentry_open(const struct path *, int, const struct cred *); extern int filp_close(struct file *, fl_owner_t id); @@ -2292,7 +2326,9 @@ extern sector_t bmap(struct inode *, sector_t); #endif extern int notify_change(struct dentry *, struct iattr *, struct inode **); extern int inode_permission(struct inode *, int); +extern int __inode_permission(struct inode *, int); extern int generic_permission(struct inode *, int); +extern int __check_sticky(struct inode *dir, struct inode *inode); static inline bool execute_ok(struct inode *inode) { @@ -2500,6 +2536,9 @@ extern ssize_t generic_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, loff_t *, size_t len, unsigned int flags); +extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, + loff_t *opos, size_t len, unsigned int flags); + extern void file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); @@ -2564,7 +2603,7 @@ extern const struct file_operations generic_ro_fops; #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) -extern int vfs_readlink(struct dentry *, char __user *, int, const char *); +extern int readlink_copy(char __user *, int, const char *); extern int page_readlink(struct dentry *, char __user *, int); extern void *page_follow_link_light(struct dentry *, struct nameidata *); extern void page_put_link(struct dentry *, struct nameidata *, void *); @@ -2777,6 +2816,14 @@ static inline int is_sxid(umode_t mode) return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP)); } +static inline int check_sticky(struct inode *dir, struct inode *inode) +{ + if (!(dir->i_mode & S_ISVTX)) + return 0; + + return __check_sticky(dir, inode); +} + static inline void inode_has_no_xattr(struct inode *inode) { if (!is_sxid(inode->i_mode) && (inode->i_sb->s_flags & MS_NOSEC)) diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index b0c7bf8f2bd1..e645bb5c2b96 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -467,8 +467,9 @@ extern int schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta, const enum hrtimer_mode mode, int clock); extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode); -/* Called from the periodic timer tick */ +/* Soft interrupt function to run the hrtimer queues: */ extern void hrtimer_run_queues(void); +extern void hrtimer_run_pending(void); /* Bootup initialization: */ extern void __init hrtimers_init(void); diff --git a/include/linux/mount.h b/include/linux/mount.h index b0c1e6574e7f..fff78cb99fa1 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -81,6 +81,9 @@ extern void mnt_pin(struct vfsmount *mnt); extern void mnt_unpin(struct vfsmount *mnt); extern int __mnt_is_readonly(struct vfsmount *mnt); +struct path; +extern struct vfsmount *clone_private_mount(struct path *path); + struct file_system_type; extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, int flags, const char *name, diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index fbfdb9d8d3a7..039dd5df0ef7 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -87,9 +87,21 @@ static inline void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) #ifdef CONFIG_NETPOLL static inline bool netpoll_rx_on(struct sk_buff *skb) { - struct netpoll_info *npinfo = rcu_dereference_bh(skb->dev->npinfo); + struct netpoll_info *npinfo; + bool ret; + +#ifdef CONFIG_PREEMPT_RT_FULL + rcu_read_lock_bh(); +#endif + + npinfo = rcu_dereference_bh(skb->dev->npinfo); + ret = npinfo && (!list_empty(&npinfo->rx_np) || npinfo->rx_flags); + +#ifdef CONFIG_PREEMPT_RT_FULL + rcu_read_unlock_bh(); +#endif - return npinfo && (!list_empty(&npinfo->rx_np) || npinfo->rx_flags); + return ret; } static inline bool netpoll_rx(struct sk_buff *skb) diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 5954ee952601..6c990b621d36 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -573,6 +573,21 @@ static inline void rcu_preempt_sleep_check(void) #define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v) /** + * lockless_dereference() - safely load a pointer for later dereference + * @p: The pointer to load + * + * Similar to rcu_dereference(), but for situations where the pointed-to + * object's lifetime is managed by something other than RCU. That + * "something other" might be reference counting or simple immortality. + */ +#define lockless_dereference(p) \ +({ \ + typeof(p) _________p1 = ACCESS_ONCE(p); \ + smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ + (_________p1); \ +}) + +/** * rcu_assign_pointer() - assign to RCU-protected pointer * @p: pointer to assign to * @v: value to assign (publish) diff --git a/include/linux/rtmutex.h b/include/linux/rtmutex.h index edb77fd15803..d5a04ea47a13 100644 --- a/include/linux/rtmutex.h +++ b/include/linux/rtmutex.h @@ -105,12 +105,10 @@ extern void __rt_mutex_init(struct rt_mutex *lock, const char *name); extern void rt_mutex_destroy(struct rt_mutex *lock); extern void rt_mutex_lock(struct rt_mutex *lock); -extern int rt_mutex_lock_interruptible(struct rt_mutex *lock, - int detect_deadlock); -extern int rt_mutex_lock_killable(struct rt_mutex *lock, int detect_deadlock); +extern int rt_mutex_lock_interruptible(struct rt_mutex *lock); +extern int rt_mutex_lock_killable(struct rt_mutex *lock); extern int rt_mutex_timed_lock(struct rt_mutex *lock, - struct hrtimer_sleeper *timeout, - int detect_deadlock); + struct hrtimer_sleeper *timeout); extern int rt_mutex_trylock(struct rt_mutex *lock); diff --git a/include/linux/rwsem_rt.h b/include/linux/rwsem_rt.h index 0065b08fbb7a..924c2d274ab5 100644 --- a/include/linux/rwsem_rt.h +++ b/include/linux/rwsem_rt.h @@ -20,6 +20,7 @@ struct rw_semaphore { struct rt_mutex lock; + int read_depth; #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; #endif diff --git a/include/linux/security.h b/include/linux/security.h index 2fc42d191f79..6478ce3252c7 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1793,7 +1793,8 @@ int security_inode_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) int security_inode_rmdir(struct inode *dir, struct dentry *dentry); int security_inode_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev); int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry); + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags); int security_inode_readlink(struct dentry *dentry); int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd); int security_inode_permission(struct inode *inode, int mask); @@ -2161,7 +2162,8 @@ static inline int security_inode_mknod(struct inode *dir, static inline int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, - struct dentry *new_dentry) + struct dentry *new_dentry, + unsigned int flags) { return 0; } @@ -2955,7 +2957,8 @@ int security_path_symlink(struct path *dir, struct dentry *dentry, int security_path_link(struct dentry *old_dentry, struct path *new_dir, struct dentry *new_dentry); int security_path_rename(struct path *old_dir, struct dentry *old_dentry, - struct path *new_dir, struct dentry *new_dentry); + struct path *new_dir, struct dentry *new_dentry, + unsigned int flags); int security_path_chmod(struct path *path, umode_t mode); int security_path_chown(struct path *path, kuid_t uid, kgid_t gid); int security_path_chroot(struct path *path); @@ -3003,7 +3006,8 @@ static inline int security_path_link(struct dentry *old_dentry, static inline int security_path_rename(struct path *old_dir, struct dentry *old_dentry, struct path *new_dir, - struct dentry *new_dentry) + struct dentry *new_dentry, + unsigned int flags) { return 0; } diff --git a/include/linux/work-simple.h b/include/linux/work-simple.h new file mode 100644 index 000000000000..f175fa9a6016 --- /dev/null +++ b/include/linux/work-simple.h @@ -0,0 +1,24 @@ +#ifndef _LINUX_SWORK_H +#define _LINUX_SWORK_H + +#include <linux/list.h> + +struct swork_event { + struct list_head item; + unsigned long flags; + void (*func)(struct swork_event *); +}; + +static inline void INIT_SWORK(struct swork_event *event, + void (*func)(struct swork_event *)) +{ + event->flags = 0; + event->func = func; +} + +bool swork_queue(struct swork_event *sev); + +int swork_get(void); +void swork_put(void); + +#endif /* _LINUX_SWORK_H */ diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 6c62cfa25f1a..bc9d2c2ddf87 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -71,7 +71,8 @@ enum { /* data contains off-queue information when !WORK_STRUCT_PWQ */ WORK_OFFQ_FLAG_BASE = WORK_STRUCT_COLOR_SHIFT, - WORK_OFFQ_CANCELING = (1 << WORK_OFFQ_FLAG_BASE), + __WORK_OFFQ_CANCELING = WORK_OFFQ_FLAG_BASE, + WORK_OFFQ_CANCELING = (1 << __WORK_OFFQ_CANCELING), /* * When a work item is off queue, its high bits point to the last diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 6c28b61bb690..3735fa0a6784 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -35,6 +35,10 @@ #define SEEK_HOLE 4 /* seek to the next hole */ #define SEEK_MAX SEEK_HOLE +#define RENAME_NOREPLACE (1 << 0) /* Don't overwrite target */ +#define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */ +#define RENAME_WHITEOUT (1 << 2) /* Whiteout source */ + struct fstrim_range { __u64 start; __u64 len; diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index 60bb2f9f7b74..b77cf381a44b 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -93,6 +93,9 @@ * * 7.22 * - add FUSE_ASYNC_DIO + * + * 7.23 + * - add FUSE_RENAME2 request */ #ifndef _LINUX_FUSE_H @@ -343,6 +346,7 @@ enum fuse_opcode { FUSE_BATCH_FORGET = 42, FUSE_FALLOCATE = 43, FUSE_READDIRPLUS = 44, + FUSE_RENAME2 = 45, /* CUSE specific operations */ CUSE_INIT = 4096, @@ -421,6 +425,12 @@ struct fuse_rename_in { uint64_t newdir; }; +struct fuse_rename2_in { + uint64_t newdir; + uint32_t flags; + uint32_t padding; +}; + struct fuse_link_in { uint64_t oldnodeid; }; diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 5f663c755a9f..d8694100d607 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -514,9 +514,6 @@ static void update_domain_attr_tree(struct sched_domain_attr *dattr, rcu_read_lock(); cpuset_for_each_descendant_pre(cp, pos_css, root_cs) { - if (cp == root_cs) - continue; - /* skip the whole subtree if @cp doesn't have any CPU */ if (cpumask_empty(cp->cpus_allowed)) { pos_css = css_rightmost_descendant(pos_css); diff --git a/kernel/events/core.c b/kernel/events/core.c index 5fbeded315e3..2c1e365be3f3 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4232,6 +4232,13 @@ static void perf_pending_event(struct irq_work *entry) { struct perf_event *event = container_of(entry, struct perf_event, pending); + int rctx; + + rctx = perf_swevent_get_recursion_context(); + /* + * If we 'fail' here, that's OK, it means recursion is already disabled + * and we won't recurse 'further'. + */ if (event->pending_disable) { event->pending_disable = 0; @@ -4242,6 +4249,9 @@ static void perf_pending_event(struct irq_work *entry) event->pending_wakeup = 0; perf_event_wakeup(event); } + + if (rctx >= 0) + perf_swevent_put_recursion_context(rctx); } /* diff --git a/kernel/futex.c b/kernel/futex.c index 5ba3c0fc4d19..e7f392e56be4 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -782,94 +782,91 @@ void exit_pi_state_list(struct task_struct *curr) * [10] There is no transient state which leaves owner and user space * TID out of sync. */ -static int -lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, - union futex_key *key, struct futex_pi_state **ps) + +/* + * Validate that the existing waiter has a pi_state and sanity check + * the pi_state against the user space value. If correct, attach to + * it. + */ +static int attach_to_pi_state(u32 uval, struct futex_pi_state *pi_state, + struct futex_pi_state **ps) { - struct futex_pi_state *pi_state = NULL; - struct futex_q *this, *next; - struct task_struct *p; pid_t pid = uval & FUTEX_TID_MASK; - plist_for_each_entry_safe(this, next, &hb->chain, list) { - if (match_futex(&this->key, key)) { - /* - * Sanity check the waiter before increasing - * the refcount and attaching to it. - */ - pi_state = this->pi_state; - /* - * Userspace might have messed up non-PI and - * PI futexes [3] - */ - if (unlikely(!pi_state)) - return -EINVAL; + /* + * Userspace might have messed up non-PI and PI futexes [3] + */ + if (unlikely(!pi_state)) + return -EINVAL; - WARN_ON(!atomic_read(&pi_state->refcount)); + WARN_ON(!atomic_read(&pi_state->refcount)); + /* + * Handle the owner died case: + */ + if (uval & FUTEX_OWNER_DIED) { + /* + * exit_pi_state_list sets owner to NULL and wakes the + * topmost waiter. The task which acquires the + * pi_state->rt_mutex will fixup owner. + */ + if (!pi_state->owner) { /* - * Handle the owner died case: + * No pi state owner, but the user space TID + * is not 0. Inconsistent state. [5] */ - if (uval & FUTEX_OWNER_DIED) { - /* - * exit_pi_state_list sets owner to NULL and - * wakes the topmost waiter. The task which - * acquires the pi_state->rt_mutex will fixup - * owner. - */ - if (!pi_state->owner) { - /* - * No pi state owner, but the user - * space TID is not 0. Inconsistent - * state. [5] - */ - if (pid) - return -EINVAL; - /* - * Take a ref on the state and - * return. [4] - */ - goto out_state; - } - - /* - * If TID is 0, then either the dying owner - * has not yet executed exit_pi_state_list() - * or some waiter acquired the rtmutex in the - * pi state, but did not yet fixup the TID in - * user space. - * - * Take a ref on the state and return. [6] - */ - if (!pid) - goto out_state; - } else { - /* - * If the owner died bit is not set, - * then the pi_state must have an - * owner. [7] - */ - if (!pi_state->owner) - return -EINVAL; - } - + if (pid) + return -EINVAL; /* - * Bail out if user space manipulated the - * futex value. If pi state exists then the - * owner TID must be the same as the user - * space TID. [9/10] + * Take a ref on the state and return success. [4] */ - if (pid != task_pid_vnr(pi_state->owner)) - return -EINVAL; - - out_state: - atomic_inc(&pi_state->refcount); - *ps = pi_state; - return 0; + goto out_state; } + + /* + * If TID is 0, then either the dying owner has not + * yet executed exit_pi_state_list() or some waiter + * acquired the rtmutex in the pi state, but did not + * yet fixup the TID in user space. + * + * Take a ref on the state and return success. [6] + */ + if (!pid) + goto out_state; + } else { + /* + * If the owner died bit is not set, then the pi_state + * must have an owner. [7] + */ + if (!pi_state->owner) + return -EINVAL; } /* + * Bail out if user space manipulated the futex value. If pi + * state exists then the owner TID must be the same as the + * user space TID. [9/10] + */ + if (pid != task_pid_vnr(pi_state->owner)) + return -EINVAL; +out_state: + atomic_inc(&pi_state->refcount); + *ps = pi_state; + return 0; +} + +/* + * Lookup the task for the TID provided from user space and attach to + * it after doing proper sanity checks. + */ +static int attach_to_pi_owner(u32 uval, union futex_key *key, + struct futex_pi_state **ps) +{ + pid_t pid = uval & FUTEX_TID_MASK; + struct futex_pi_state *pi_state; + struct task_struct *p; + + /* * We are the first waiter - try to look up the real owner and attach * the new pi_state to it, but bail out when TID = 0 [1] */ @@ -910,7 +907,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, pi_state = alloc_pi_state(); /* - * Initialize the pi_mutex in locked state and make 'p' + * Initialize the pi_mutex in locked state and make @p * the owner of it: */ rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p); @@ -930,6 +927,36 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, return 0; } +static int lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, + union futex_key *key, struct futex_pi_state **ps) +{ + struct futex_q *match = futex_top_waiter(hb, key); + + /* + * If there is a waiter on that futex, validate it and + * attach to the pi_state when the validation succeeds. + */ + if (match) + return attach_to_pi_state(uval, match->pi_state, ps); + + /* + * We are the first waiter - try to look up the owner based on + * @uval and attach to it. + */ + return attach_to_pi_owner(uval, key, ps); +} + +static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) +{ + u32 uninitialized_var(curval); + + if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))) + return -EFAULT; + + /*If user space value changed, let the caller retry */ + return curval != uval ? -EAGAIN : 0; +} + /** * futex_lock_pi_atomic() - Atomic work required to acquire a pi aware futex * @uaddr: the pi futex user address @@ -953,113 +980,69 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, struct futex_pi_state **ps, struct task_struct *task, int set_waiters) { - int lock_taken, ret, force_take = 0; - u32 uval, newval, curval, vpid = task_pid_vnr(task); - -retry: - ret = lock_taken = 0; + u32 uval, newval, vpid = task_pid_vnr(task); + struct futex_q *match; + int ret; /* - * To avoid races, we attempt to take the lock here again - * (by doing a 0 -> TID atomic cmpxchg), while holding all - * the locks. It will most likely not succeed. + * Read the user space value first so we can validate a few + * things before proceeding further. */ - newval = vpid; - if (set_waiters) - newval |= FUTEX_WAITERS; - - if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, 0, newval))) + if (get_futex_value_locked(&uval, uaddr)) return -EFAULT; /* * Detect deadlocks. */ - if ((unlikely((curval & FUTEX_TID_MASK) == vpid))) + if ((unlikely((uval & FUTEX_TID_MASK) == vpid))) return -EDEADLK; /* - * Surprise - we got the lock, but we do not trust user space at all. - */ - if (unlikely(!curval)) { - /* - * We verify whether there is kernel state for this - * futex. If not, we can safely assume, that the 0 -> - * TID transition is correct. If state exists, we do - * not bother to fixup the user space state as it was - * corrupted already. - */ - return futex_top_waiter(hb, key) ? -EINVAL : 1; - } - - uval = curval; - - /* - * Set the FUTEX_WAITERS flag, so the owner will know it has someone - * to wake at the next unlock. + * Lookup existing state first. If it exists, try to attach to + * its pi_state. */ - newval = curval | FUTEX_WAITERS; + match = futex_top_waiter(hb, key); + if (match) + return attach_to_pi_state(uval, match->pi_state, ps); /* - * Should we force take the futex? See below. + * No waiter and user TID is 0. We are here because the + * waiters or the owner died bit is set or called from + * requeue_cmp_pi or for whatever reason something took the + * syscall. */ - if (unlikely(force_take)) { + if (!(uval & FUTEX_TID_MASK)) { /* - * Keep the OWNER_DIED and the WAITERS bit and set the - * new TID value. + * We take over the futex. No other waiters and the user space + * TID is 0. We preserve the owner died bit. */ - newval = (curval & ~FUTEX_TID_MASK) | vpid; - force_take = 0; - lock_taken = 1; - } + newval = uval & FUTEX_OWNER_DIED; + newval |= vpid; - if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))) - return -EFAULT; - if (unlikely(curval != uval)) - goto retry; + /* The futex requeue_pi code can enforce the waiters bit */ + if (set_waiters) + newval |= FUTEX_WAITERS; + + ret = lock_pi_update_atomic(uaddr, uval, newval); + /* If the take over worked, return 1 */ + return ret < 0 ? ret : 1; + } /* - * We took the lock due to forced take over. + * First waiter. Set the waiters bit before attaching ourself to + * the owner. If owner tries to unlock, it will be forced into + * the kernel and blocked on hb->lock. */ - if (unlikely(lock_taken)) - return 1; - + newval = uval | FUTEX_WAITERS; + ret = lock_pi_update_atomic(uaddr, uval, newval); + if (ret) + return ret; /* - * We dont have the lock. Look up the PI state (or create it if - * we are the first waiter): + * If the update of the user space value succeeded, we try to + * attach to the owner. If that fails, no harm done, we only + * set the FUTEX_WAITERS bit in the user space variable. */ - ret = lookup_pi_state(uval, hb, key, ps); - - if (unlikely(ret)) { - switch (ret) { - case -ESRCH: - /* - * We failed to find an owner for this - * futex. So we have no pi_state to block - * on. This can happen in two cases: - * - * 1) The owner died - * 2) A stale FUTEX_WAITERS bit - * - * Re-read the futex value. - */ - if (get_futex_value_locked(&curval, uaddr)) - return -EFAULT; - - /* - * If the owner died or we have a stale - * WAITERS bit the owner TID in the user space - * futex is 0. - */ - if (!(curval & FUTEX_TID_MASK)) { - force_take = 1; - goto retry; - } - default: - break; - } - } - - return ret; + return attach_to_pi_owner(uval, key, ps); } /** @@ -1176,22 +1159,6 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) return 0; } -static int unlock_futex_pi(u32 __user *uaddr, u32 uval) -{ - u32 uninitialized_var(oldval); - - /* - * There is no waiter, so we unlock the futex. The owner died - * bit has not to be preserved here. We are the owner: - */ - if (cmpxchg_futex_value_locked(&oldval, uaddr, uval, 0)) - return -EFAULT; - if (oldval != uval) - return -EAGAIN; - - return 0; -} - /* * Express the locking dependencies for lockdep: */ @@ -1649,7 +1616,12 @@ retry_private: goto retry; goto out; case -EAGAIN: - /* The owner was exiting, try again. */ + /* + * Two reasons for this: + * - Owner is exiting and we just wait for the + * exit to complete. + * - The user space value changed. + */ double_unlock_hb(hb1, hb2); hb_waiters_dec(hb2); put_futex_key(&key2); @@ -1708,7 +1680,7 @@ retry_private: this->pi_state = pi_state; ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex, this->rt_waiter, - this->task, 1); + this->task); if (ret == 1) { /* We got the lock. */ requeue_pi_wake_futex(this, &key2, hb2); @@ -2316,8 +2288,10 @@ retry_private: goto uaddr_faulted; case -EAGAIN: /* - * Task is exiting and we just wait for the - * exit to complete. + * Two reasons for this: + * - Task is exiting and we just wait for the + * exit to complete. + * - The user space value changed. */ queue_unlock(hb); put_futex_key(&q.key); @@ -2337,9 +2311,9 @@ retry_private: /* * Block on the PI mutex: */ - if (!trylock) - ret = rt_mutex_timed_lock(&q.pi_state->pi_mutex, to, 1); - else { + if (!trylock) { + ret = rt_mutex_timed_futex_lock(&q.pi_state->pi_mutex, to); + } else { ret = rt_mutex_trylock(&q.pi_state->pi_mutex); /* Fixup the trylock return value: */ ret = ret ? 0 : -EWOULDBLOCK; @@ -2401,10 +2375,10 @@ uaddr_faulted: */ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) { - struct futex_hash_bucket *hb; - struct futex_q *this, *next; + u32 uninitialized_var(curval), uval, vpid = task_pid_vnr(current); union futex_key key = FUTEX_KEY_INIT; - u32 uval, vpid = task_pid_vnr(current); + struct futex_hash_bucket *hb; + struct futex_q *match; int ret; retry: @@ -2417,57 +2391,47 @@ retry: return -EPERM; ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, VERIFY_WRITE); - if (unlikely(ret != 0)) - goto out; + if (ret) + return ret; hb = hash_futex(&key); spin_lock(&hb->lock); /* - * To avoid races, try to do the TID -> 0 atomic transition - * again. If it succeeds then we can return without waking - * anyone else up. We only try this if neither the waiters nor - * the owner died bit are set. - */ - if (!(uval & ~FUTEX_TID_MASK) && - cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0)) - goto pi_faulted; - /* - * Rare case: we managed to release the lock atomically, - * no need to wake anyone else up: - */ - if (unlikely(uval == vpid)) - goto out_unlock; - - /* - * Ok, other tasks may need to be woken up - check waiters - * and do the wakeup if necessary: + * Check waiters first. We do not trust user space values at + * all and we at least want to know if user space fiddled + * with the futex value instead of blindly unlocking. */ - plist_for_each_entry_safe(this, next, &hb->chain, list) { - if (!match_futex (&this->key, &key)) - continue; - ret = wake_futex_pi(uaddr, uval, this); + match = futex_top_waiter(hb, &key); + if (match) { + ret = wake_futex_pi(uaddr, uval, match); /* - * The atomic access to the futex value - * generated a pagefault, so retry the - * user-access and the wakeup: + * The atomic access to the futex value generated a + * pagefault, so retry the user-access and the wakeup: */ if (ret == -EFAULT) goto pi_faulted; goto out_unlock; } + /* - * No waiters - kernel unlocks the futex: + * We have no kernel internal state, i.e. no waiters in the + * kernel. Waiters which are about to queue themselves are stuck + * on hb->lock. So we can safely ignore them. We do neither + * preserve the WAITERS bit not the OWNER_DIED one. We are the + * owner. */ - ret = unlock_futex_pi(uaddr, uval); - if (ret == -EFAULT) + if (cmpxchg_futex_value_locked(&curval, uaddr, uval, 0)) goto pi_faulted; + /* + * If uval has changed, let user space handle it. + */ + ret = (curval == uval) ? 0 : -EAGAIN; + out_unlock: spin_unlock(&hb->lock); put_futex_key(&key); - -out: return ret; pi_faulted: @@ -2703,7 +2667,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, */ WARN_ON(!q.pi_state); pi_mutex = &q.pi_state->pi_mutex; - ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1); + ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter); debug_rt_mutex_free_waiter(&rt_waiter); spin_lock(&hb2->lock); diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 0731b2f2aeb0..df7c2cb15367 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1757,6 +1757,30 @@ static void run_hrtimer_softirq(struct softirq_action *h) } /* + * Called from timer softirq every jiffy, expire hrtimers: + * + * For HRT its the fall back code to run the softirq in the timer + * softirq context in case the hrtimer initialization failed or has + * not been done yet. + */ +void hrtimer_run_pending(void) +{ + if (hrtimer_hres_active()) + return; + + /* + * This _is_ ugly: We have to check in the softirq context, + * whether we can switch to highres and / or nohz mode. The + * clocksource switch happens in the timer interrupt with + * xtime_lock held. Notification from there only sets the + * check bit in the tick_oneshot code, otherwise we might + * deadlock vs. xtime_lock. + */ + if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) + hrtimer_switch_to_hres(); +} + +/* * Called from hardirq context every jiffy */ void hrtimer_run_queues(void) @@ -1769,13 +1793,6 @@ void hrtimer_run_queues(void) if (hrtimer_hres_active()) return; - /* - * Check whether we can switch to highres mode. - */ - if (tick_check_oneshot_change(!hrtimer_is_hres_enabled()) - && hrtimer_switch_to_hres()) - return; - for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) { base = &cpu_base->clock_base[index]; if (!timerqueue_getnext(&base->active)) diff --git a/kernel/locking/rt.c b/kernel/locking/rt.c index 90b8ba03e2a4..9a972e996185 100644 --- a/kernel/locking/rt.c +++ b/kernel/locking/rt.c @@ -98,7 +98,7 @@ int __lockfunc _mutex_lock_interruptible(struct mutex *lock) int ret; mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); - ret = rt_mutex_lock_interruptible(&lock->lock, 0); + ret = rt_mutex_lock_interruptible(&lock->lock); if (ret) mutex_release(&lock->dep_map, 1, _RET_IP_); return ret; @@ -110,7 +110,7 @@ int __lockfunc _mutex_lock_killable(struct mutex *lock) int ret; mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_); - ret = rt_mutex_lock_killable(&lock->lock, 0); + ret = rt_mutex_lock_killable(&lock->lock); if (ret) mutex_release(&lock->dep_map, 1, _RET_IP_); return ret; @@ -137,7 +137,7 @@ int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass int ret; mutex_acquire_nest(&lock->dep_map, subclass, 0, NULL, _RET_IP_); - ret = rt_mutex_lock_interruptible(&lock->lock, 0); + ret = rt_mutex_lock_interruptible(&lock->lock); if (ret) mutex_release(&lock->dep_map, 1, _RET_IP_); return ret; @@ -149,7 +149,7 @@ int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass) int ret; mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); - ret = rt_mutex_lock_killable(&lock->lock, 0); + ret = rt_mutex_lock_killable(&lock->lock); if (ret) mutex_release(&lock->dep_map, 1, _RET_IP_); return ret; @@ -322,7 +322,8 @@ EXPORT_SYMBOL(rt_up_write); void rt_up_read(struct rw_semaphore *rwsem) { rwsem_release(&rwsem->dep_map, 1, _RET_IP_); - rt_mutex_unlock(&rwsem->lock); + if (--rwsem->read_depth == 0) + rt_mutex_unlock(&rwsem->lock); } EXPORT_SYMBOL(rt_up_read); @@ -333,6 +334,7 @@ EXPORT_SYMBOL(rt_up_read); void rt_downgrade_write(struct rw_semaphore *rwsem) { BUG_ON(rt_mutex_owner(&rwsem->lock) != current); + rwsem->read_depth = 1; } EXPORT_SYMBOL(rt_downgrade_write); @@ -370,12 +372,23 @@ EXPORT_SYMBOL(rt_down_write_nested_lock); int rt_down_read_trylock(struct rw_semaphore *rwsem) { - int ret; + struct rt_mutex *lock = &rwsem->lock; + int ret = 1; - ret = rt_mutex_trylock(&rwsem->lock); - if (ret) - rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_); + /* + * recursive read locks succeed when current owns the rwsem, + * but not when read_depth == 0 which means that the rwsem is + * write locked. + */ + if (rt_mutex_owner(lock) != current) + ret = rt_mutex_trylock(&rwsem->lock); + else if (!rwsem->read_depth) + ret = 0; + if (ret) { + rwsem->read_depth++; + rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_); + } return ret; } EXPORT_SYMBOL(rt_down_read_trylock); @@ -383,7 +396,10 @@ EXPORT_SYMBOL(rt_down_read_trylock); static void __rt_down_read(struct rw_semaphore *rwsem, int subclass) { rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_); - rt_mutex_lock(&rwsem->lock); + + if (rt_mutex_owner(&rwsem->lock) != current) + rt_mutex_lock(&rwsem->lock); + rwsem->read_depth++; } void rt_down_read(struct rw_semaphore *rwsem) @@ -408,6 +424,7 @@ void __rt_rwsem_init(struct rw_semaphore *rwsem, const char *name, debug_check_no_locks_freed((void *)rwsem, sizeof(*rwsem)); lockdep_init_map(&rwsem->dep_map, name, key, 0); #endif + rwsem->read_depth = 0; rwsem->lock.save_state = 0; } EXPORT_SYMBOL(__rt_rwsem_init); diff --git a/kernel/locking/rtmutex-debug.c b/kernel/locking/rtmutex-debug.c index 49b2ed3dced8..62b6cee8ea7f 100644 --- a/kernel/locking/rtmutex-debug.c +++ b/kernel/locking/rtmutex-debug.c @@ -66,12 +66,13 @@ void rt_mutex_debug_task_free(struct task_struct *task) * the deadlock. We print when we return. act_waiter can be NULL in * case of a remove waiter operation. */ -void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *act_waiter, +void debug_rt_mutex_deadlock(enum rtmutex_chainwalk chwalk, + struct rt_mutex_waiter *act_waiter, struct rt_mutex *lock) { struct task_struct *task; - if (!debug_locks || detect || !act_waiter) + if (!debug_locks || chwalk == RT_MUTEX_FULL_CHAINWALK || !act_waiter) return; task = rt_mutex_owner(act_waiter->lock); diff --git a/kernel/locking/rtmutex-debug.h b/kernel/locking/rtmutex-debug.h index ab29b6a22669..d0519c3432b6 100644 --- a/kernel/locking/rtmutex-debug.h +++ b/kernel/locking/rtmutex-debug.h @@ -20,14 +20,15 @@ extern void debug_rt_mutex_unlock(struct rt_mutex *lock); extern void debug_rt_mutex_proxy_lock(struct rt_mutex *lock, struct task_struct *powner); extern void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock); -extern void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *waiter, +extern void debug_rt_mutex_deadlock(enum rtmutex_chainwalk chwalk, + struct rt_mutex_waiter *waiter, struct rt_mutex *lock); extern void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter); # define debug_rt_mutex_reset_waiter(w) \ do { (w)->deadlock_lock = NULL; } while (0) -static inline int debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *waiter, - int detect) +static inline bool debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *waiter, + enum rtmutex_chainwalk walk) { return (waiter != NULL); } diff --git a/kernel/locking/rtmutex-tester.c b/kernel/locking/rtmutex-tester.c index 1d96dd0d93c1..a6b605b51b97 100644 --- a/kernel/locking/rtmutex-tester.c +++ b/kernel/locking/rtmutex-tester.c @@ -16,7 +16,7 @@ #include <linux/freezer.h> #include <linux/stat.h> -#include "rtmutex.h" +#include "rtmutex_common.h" #define MAX_RT_TEST_THREADS 8 #define MAX_RT_TEST_MUTEXES 8 @@ -107,7 +107,7 @@ static int handle_op(struct test_thread_data *td, int lockwakeup) td->mutexes[id] = 1; td->event = atomic_add_return(1, &rttest_event); - ret = rt_mutex_lock_interruptible(&mutexes[id], 0); + ret = rt_mutex_lock_interruptible(&mutexes[id]); td->event = atomic_add_return(1, &rttest_event); td->mutexes[id] = ret ? 0 : 4; return ret ? -EINTR : 0; diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 6c40660d1168..ee4e7e747e06 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -329,13 +329,40 @@ static void rt_mutex_wake_waiter(struct rt_mutex_waiter *waiter) } /* + * Deadlock detection is conditional: + * + * If CONFIG_DEBUG_RT_MUTEXES=n, deadlock detection is only conducted + * if the detect argument is == RT_MUTEX_FULL_CHAINWALK. + * + * If CONFIG_DEBUG_RT_MUTEXES=y, deadlock detection is always + * conducted independent of the detect argument. + * + * If the waiter argument is NULL this indicates the deboost path and + * deadlock detection is disabled independent of the detect argument + * and the config settings. + */ +static bool rt_mutex_cond_detect_deadlock(struct rt_mutex_waiter *waiter, + enum rtmutex_chainwalk chwalk) +{ + /* + * This is just a wrapper function for the following call, + * because debug_rt_mutex_detect_deadlock() smells like a magic + * debug feature and I wanted to keep the cond function in the + * main source file along with the comments instead of having + * two of the same in the headers. + */ + return debug_rt_mutex_detect_deadlock(waiter, chwalk); +} + +/* * Max number of times we'll walk the boosting chain: */ int max_lock_depth = 1024; static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p) { - return p->pi_blocked_on ? p->pi_blocked_on->lock : NULL; + return rt_mutex_real_waiter(p->pi_blocked_on) ? + p->pi_blocked_on->lock : NULL; } /* @@ -358,21 +385,65 @@ static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p) * @top_task: the current top waiter * * Returns 0 or -EDEADLK. + * + * Chain walk basics and protection scope + * + * [R] refcount on task + * [P] task->pi_lock held + * [L] rtmutex->wait_lock held + * + * Step Description Protected by + * function arguments: + * @task [R] + * @orig_lock if != NULL @top_task is blocked on it + * @next_lock Unprotected. Cannot be + * dereferenced. Only used for + * comparison. + * @orig_waiter if != NULL @top_task is blocked on it + * @top_task current, or in case of proxy + * locking protected by calling + * code + * again: + * loop_sanity_check(); + * retry: + * [1] lock(task->pi_lock); [R] acquire [P] + * [2] waiter = task->pi_blocked_on; [P] + * [3] check_exit_conditions_1(); [P] + * [4] lock = waiter->lock; [P] + * [5] if (!try_lock(lock->wait_lock)) { [P] try to acquire [L] + * unlock(task->pi_lock); release [P] + * goto retry; + * } + * [6] check_exit_conditions_2(); [P] + [L] + * [7] requeue_lock_waiter(lock, waiter); [P] + [L] + * [8] unlock(task->pi_lock); release [P] + * put_task_struct(task); release [R] + * [9] check_exit_conditions_3(); [L] + * [10] task = owner(lock); [L] + * get_task_struct(task); [L] acquire [R] + * lock(task->pi_lock); [L] acquire [P] + * [11] requeue_pi_waiter(tsk, waiters(lock));[P] + [L] + * [12] check_exit_conditions_4(); [P] + [L] + * [13] unlock(task->pi_lock); release [P] + * unlock(lock->wait_lock); release [L] + * goto again; */ static int rt_mutex_adjust_prio_chain(struct task_struct *task, - int deadlock_detect, + enum rtmutex_chainwalk chwalk, struct rt_mutex *orig_lock, struct rt_mutex *next_lock, struct rt_mutex_waiter *orig_waiter, struct task_struct *top_task) { - struct rt_mutex *lock; struct rt_mutex_waiter *waiter, *top_waiter = orig_waiter; - int detect_deadlock, ret = 0, depth = 0; + struct rt_mutex_waiter *prerequeue_top_waiter; + int ret = 0, depth = 0; + struct rt_mutex *lock; + bool detect_deadlock; unsigned long flags; + bool requeue = true; - detect_deadlock = debug_rt_mutex_detect_deadlock(orig_waiter, - deadlock_detect); + detect_deadlock = rt_mutex_cond_detect_deadlock(orig_waiter, chwalk); /* * The (de)boosting is a step by step approach with a lot of @@ -381,6 +452,9 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, * carefully whether things change under us. */ again: + /* + * We limit the lock chain length for each invocation. + */ if (++depth > max_lock_depth) { static int prev_max; @@ -398,13 +472,28 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, return -EDEADLK; } + + /* + * We are fully preemptible here and only hold the refcount on + * @task. So everything can have changed under us since the + * caller or our own code below (goto retry/again) dropped all + * locks. + */ retry: /* - * Task can not go away as we did a get_task() before ! + * [1] Task cannot go away as we did a get_task() before ! */ raw_spin_lock_irqsave(&task->pi_lock, flags); + /* + * [2] Get the waiter on which @task is blocked on. + */ waiter = task->pi_blocked_on; + + /* + * [3] check_exit_conditions_1() protected by task->pi_lock. + */ + /* * Check whether the end of the boosting chain has been * reached or the state of the chain has changed while we @@ -442,20 +531,41 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, goto out_unlock_pi; /* * If deadlock detection is off, we stop here if we - * are not the top pi waiter of the task. + * are not the top pi waiter of the task. If deadlock + * detection is enabled we continue, but stop the + * requeueing in the chain walk. */ - if (!detect_deadlock && top_waiter != task_top_pi_waiter(task)) - goto out_unlock_pi; + if (top_waiter != task_top_pi_waiter(task)) { + if (!detect_deadlock) + goto out_unlock_pi; + else + requeue = false; + } } /* - * When deadlock detection is off then we check, if further - * priority adjustment is necessary. + * If the waiter priority is the same as the task priority + * then there is no further priority adjustment necessary. If + * deadlock detection is off, we stop the chain walk. If its + * enabled we continue, but stop the requeueing in the chain + * walk. */ - if (!detect_deadlock && waiter->prio == task->prio) - goto out_unlock_pi; + if (waiter->prio == task->prio) { + if (!detect_deadlock) + goto out_unlock_pi; + else + requeue = false; + } + /* + * [4] Get the next lock + */ lock = waiter->lock; + /* + * [5] We need to trylock here as we are holding task->pi_lock, + * which is the reverse lock order versus the other rtmutex + * operations. + */ if (!raw_spin_trylock(&lock->wait_lock)) { raw_spin_unlock_irqrestore(&task->pi_lock, flags); cpu_relax(); @@ -463,81 +573,183 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task, } /* + * [6] check_exit_conditions_2() protected by task->pi_lock and + * lock->wait_lock. + * * Deadlock detection. If the lock is the same as the original * lock which caused us to walk the lock chain or if the * current lock is owned by the task which initiated the chain * walk, we detected a deadlock. */ if (lock == orig_lock || rt_mutex_owner(lock) == top_task) { - debug_rt_mutex_deadlock(deadlock_detect, orig_waiter, lock); + debug_rt_mutex_deadlock(chwalk, orig_waiter, lock); raw_spin_unlock(&lock->wait_lock); ret = -EDEADLK; goto out_unlock_pi; } - top_waiter = rt_mutex_top_waiter(lock); + /* + * If we just follow the lock chain for deadlock detection, no + * need to do all the requeue operations. To avoid a truckload + * of conditionals around the various places below, just do the + * minimum chain walk checks. + */ + if (!requeue) { + /* + * No requeue[7] here. Just release @task [8] + */ + raw_spin_unlock_irqrestore(&task->pi_lock, flags); + put_task_struct(task); + + /* + * [9] check_exit_conditions_3 protected by lock->wait_lock. + * If there is no owner of the lock, end of chain. + */ + if (!rt_mutex_owner(lock)) { + raw_spin_unlock(&lock->wait_lock); + return 0; + } + + /* [10] Grab the next task, i.e. owner of @lock */ + task = rt_mutex_owner(lock); + get_task_struct(task); + raw_spin_lock_irqsave(&task->pi_lock, flags); - /* Requeue the waiter */ + /* + * No requeue [11] here. We just do deadlock detection. + * + * [12] Store whether owner is blocked + * itself. Decision is made after dropping the locks + */ + next_lock = task_blocked_on_lock(task); + /* + * Get the top waiter for the next iteration + */ + top_waiter = rt_mutex_top_waiter(lock); + + /* [13] Drop locks */ + raw_spin_unlock_irqrestore(&task->pi_lock, flags); + raw_spin_unlock(&lock->wait_lock); + + /* If owner is not blocked, end of chain. */ + if (!next_lock) + goto out_put_task; + goto again; + } + + /* + * Store the current top waiter before doing the requeue + * operation on @lock. We need it for the boost/deboost + * decision below. + */ + prerequeue_top_waiter = rt_mutex_top_waiter(lock); + + /* [7] Requeue the waiter in the lock waiter list. */ rt_mutex_dequeue(lock, waiter); waiter->prio = task->prio; rt_mutex_enqueue(lock, waiter); - /* Release the task */ + /* [8] Release the task */ raw_spin_unlock_irqrestore(&task->pi_lock, flags); + put_task_struct(task); + + /* + * [9] check_exit_conditions_3 protected by lock->wait_lock. + * + * We must abort the chain walk if there is no lock owner even + * in the dead lock detection case, as we have nothing to + * follow here. This is the end of the chain we are walking. + */ if (!rt_mutex_owner(lock)) { struct rt_mutex_waiter *lock_top_waiter; /* - * If the requeue above changed the top waiter, then we need - * to wake the new top waiter up to try to get the lock. + * If the requeue [7] above changed the top waiter, + * then we need to wake the new top waiter up to try + * to get the lock. */ lock_top_waiter = rt_mutex_top_waiter(lock); - if (top_waiter != lock_top_waiter) + if (prerequeue_top_waiter != lock_top_waiter) rt_mutex_wake_waiter(lock_top_waiter); raw_spin_unlock(&lock->wait_lock); - goto out_put_task; + return 0; } - put_task_struct(task); - /* Grab the next task */ + /* [10] Grab the next task, i.e. the owner of @lock */ task = rt_mutex_owner(lock); get_task_struct(task); raw_spin_lock_irqsave(&task->pi_lock, flags); + /* [11] requeue the pi waiters if necessary */ if (waiter == rt_mutex_top_waiter(lock)) { - /* Boost the owner */ - rt_mutex_dequeue_pi(task, top_waiter); + /* + * The waiter became the new top (highest priority) + * waiter on the lock. Replace the previous top waiter + * in the owner tasks pi waiters list with this waiter + * and adjust the priority of the owner. + */ + rt_mutex_dequeue_pi(task, prerequeue_top_waiter); rt_mutex_enqueue_pi(task, waiter); __rt_mutex_adjust_prio(task); - } else if (top_waiter == waiter) { - /* Deboost the owner */ + } else if (prerequeue_top_waiter == waiter) { + /* + * The waiter was the top waiter on the lock, but is + * no longer the top prority waiter. Replace waiter in + * the owner tasks pi waiters list with the new top + * (highest priority) waiter and adjust the priority + * of the owner. + * The new top waiter is stored in @waiter so that + * @waiter == @top_waiter evaluates to true below and + * we continue to deboost the rest of the chain. + */ rt_mutex_dequeue_pi(task, waiter); waiter = rt_mutex_top_waiter(lock); rt_mutex_enqueue_pi(task, waiter); __rt_mutex_adjust_prio(task); + } else { + /* + * Nothing changed. No need to do any priority + * adjustment. + */ } /* + * [12] check_exit_conditions_4() protected by task->pi_lock + * and lock->wait_lock. The actual decisions are made after we + * dropped the locks. + * * Check whether the task which owns the current lock is pi * blocked itself. If yes we store a pointer to the lock for * the lock chain change detection above. After we dropped * task->pi_lock next_lock cannot be dereferenced anymore. */ next_lock = task_blocked_on_lock(task); + /* + * Store the top waiter of @lock for the end of chain walk + * decision below. + */ + top_waiter = rt_mutex_top_waiter(lock); + /* [13] Drop the locks */ raw_spin_unlock_irqrestore(&task->pi_lock, flags); - - top_waiter = rt_mutex_top_waiter(lock); raw_spin_unlock(&lock->wait_lock); /* + * Make the actual exit decisions [12], based on the stored + * values. + * * We reached the end of the lock chain. Stop right here. No * point to go back just to figure that out. */ if (!next_lock) goto out_put_task; + /* + * If the current waiter is not the top waiter on the lock, + * then we can stop the chain walk here if we are not in full + * deadlock detection mode. + */ if (!detect_deadlock && waiter != top_waiter) goto out_put_task; @@ -575,78 +787,120 @@ static inline int lock_is_stealable(struct task_struct *task, * * Must be called with lock->wait_lock held. * - * @lock: the lock to be acquired. - * @task: the task which wants to acquire the lock - * @waiter: the waiter that is queued to the lock's wait list. (could be NULL) + * @lock: The lock to be acquired. + * @task: The task which wants to acquire the lock + * @waiter: The waiter that is queued to the lock's wait list if the + * callsite called task_blocked_on_lock(), otherwise NULL */ static int __try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, struct rt_mutex_waiter *waiter, int mode) { + unsigned long flags; + /* - * We have to be careful here if the atomic speedups are - * enabled, such that, when - * - no other waiter is on the lock - * - the lock has been released since we did the cmpxchg - * the lock can be released or taken while we are doing the - * checks and marking the lock with RT_MUTEX_HAS_WAITERS. + * Before testing whether we can acquire @lock, we set the + * RT_MUTEX_HAS_WAITERS bit in @lock->owner. This forces all + * other tasks which try to modify @lock into the slow path + * and they serialize on @lock->wait_lock. + * + * The RT_MUTEX_HAS_WAITERS bit can have a transitional state + * as explained at the top of this file if and only if: * - * The atomic acquire/release aware variant of - * mark_rt_mutex_waiters uses a cmpxchg loop. After setting - * the WAITERS bit, the atomic release / acquire can not - * happen anymore and lock->wait_lock protects us from the - * non-atomic case. + * - There is a lock owner. The caller must fixup the + * transient state if it does a trylock or leaves the lock + * function due to a signal or timeout. * - * Note, that this might set lock->owner = - * RT_MUTEX_HAS_WAITERS in the case the lock is not contended - * any more. This is fixed up when we take the ownership. - * This is the transitional state explained at the top of this file. + * - @task acquires the lock and there are no other + * waiters. This is undone in rt_mutex_set_owner(@task) at + * the end of this function. */ mark_rt_mutex_waiters(lock); + /* + * If @lock has an owner, give up. + */ if (rt_mutex_owner(lock)) return 0; /* - * It will get the lock because of one of these conditions: - * 1) there is no waiter - * 2) higher priority than waiters - * 3) it is top waiter + * If @waiter != NULL, @task has already enqueued the waiter + * into @lock waiter list. If @waiter == NULL then this is a + * trylock attempt. */ - if (rt_mutex_has_waiters(lock)) { - struct task_struct *pown = rt_mutex_top_waiter(lock)->task; - - if (task != pown && !lock_is_stealable(task, pown, mode)) + if (waiter) { + /* + * If waiter is not the highest priority waiter of + * @lock, give up. + */ + if (waiter != rt_mutex_top_waiter(lock)) return 0; - } - - /* We got the lock. */ - - if (waiter || rt_mutex_has_waiters(lock)) { - unsigned long flags; - struct rt_mutex_waiter *top; - - raw_spin_lock_irqsave(&task->pi_lock, flags); - /* remove the queued waiter. */ - if (waiter) { - rt_mutex_dequeue(lock, waiter); - task->pi_blocked_on = NULL; - } + /* + * We can acquire the lock. Remove the waiter from the + * lock waiters list. + */ + rt_mutex_dequeue(lock, waiter); + } else { /* - * We have to enqueue the top waiter(if it exists) into - * task->pi_waiters list. + * If the lock has waiters already we check whether @task is + * eligible to take over the lock. + * + * If there are no other waiters, @task can acquire + * the lock. @task->pi_blocked_on is NULL, so it does + * not need to be dequeued. */ if (rt_mutex_has_waiters(lock)) { - top = rt_mutex_top_waiter(lock); - rt_mutex_enqueue_pi(task, top); + /* + * If @task->prio is greater than or equal to + * the top waiter priority (kernel view), + * @task lost. + */ + if (task->prio >= rt_mutex_top_waiter(lock)->prio) + return 0; + + /* + * The current top waiter stays enqueued. We + * don't have to change anything in the lock + * waiters order. + */ + } else { + /* + * No waiters. Take the lock without the + * pi_lock dance.@task->pi_blocked_on is NULL + * and we have no waiters to enqueue in @task + * pi waiters list. + */ + goto takeit; } - raw_spin_unlock_irqrestore(&task->pi_lock, flags); } + /* + * Clear @task->pi_blocked_on. Requires protection by + * @task->pi_lock. Redundant operation for the @waiter == NULL + * case, but conditionals are more expensive than a redundant + * store. + */ + raw_spin_lock_irqsave(&task->pi_lock, flags); + task->pi_blocked_on = NULL; + /* + * Finish the lock acquisition. @task is the new owner. If + * other waiters exist we have to insert the highest priority + * waiter into @task->pi_waiters list. + */ + if (rt_mutex_has_waiters(lock)) + rt_mutex_enqueue_pi(task, rt_mutex_top_waiter(lock)); + raw_spin_unlock_irqrestore(&task->pi_lock, flags); + +takeit: + /* We got the lock. */ debug_rt_mutex_lock(lock); + /* + * This either preserves the RT_MUTEX_HAS_WAITERS bit if there + * are still waiters or clears it. + */ rt_mutex_set_owner(lock, task); rt_mutex_deadlock_account_lock(lock, task); @@ -671,7 +925,7 @@ try_to_take_rt_mutex(struct rt_mutex *lock, struct task_struct *task, static int task_blocks_on_rt_mutex(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, struct task_struct *task, - int detect_deadlock) + enum rtmutex_chainwalk chwalk) { struct task_struct *owner = rt_mutex_owner(lock); struct rt_mutex_waiter *top_waiter = waiter; @@ -734,7 +988,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, __rt_mutex_adjust_prio(owner); if (rt_mutex_real_waiter(owner->pi_blocked_on)) chain_walk = 1; - } else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock)) { + } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { chain_walk = 1; } @@ -759,7 +1013,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock, raw_spin_unlock(&lock->wait_lock); - res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, + res = rt_mutex_adjust_prio_chain(owner, chwalk, lock, next_lock, waiter, task); raw_spin_lock(&lock->wait_lock); @@ -821,7 +1075,7 @@ static void wakeup_next_waiter(struct rt_mutex *lock) static void remove_waiter(struct rt_mutex *lock, struct rt_mutex_waiter *waiter) { - int first = (waiter == rt_mutex_top_waiter(lock)); + bool is_top_waiter = (waiter == rt_mutex_top_waiter(lock)); struct task_struct *owner = rt_mutex_owner(lock); struct rt_mutex *next_lock = NULL; unsigned long flags; @@ -831,30 +1085,32 @@ static void remove_waiter(struct rt_mutex *lock, current->pi_blocked_on = NULL; raw_spin_unlock_irqrestore(¤t->pi_lock, flags); - if (!owner) + /* + * Only update priority if the waiter was the highest priority + * waiter of the lock and there is an owner to update. + */ + if (!owner || !is_top_waiter) return; - if (first) { - - raw_spin_lock_irqsave(&owner->pi_lock, flags); + raw_spin_lock_irqsave(&owner->pi_lock, flags); - rt_mutex_dequeue_pi(owner, waiter); + rt_mutex_dequeue_pi(owner, waiter); - if (rt_mutex_has_waiters(lock)) { - struct rt_mutex_waiter *next; + if (rt_mutex_has_waiters(lock)) + rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock)); - next = rt_mutex_top_waiter(lock); - rt_mutex_enqueue_pi(owner, next); - } - __rt_mutex_adjust_prio(owner); + __rt_mutex_adjust_prio(owner); - /* Store the lock on which owner is blocked or NULL */ - if (rt_mutex_real_waiter(owner->pi_blocked_on)) - next_lock = task_blocked_on_lock(owner); + /* Store the lock on which owner is blocked or NULL */ + if (rt_mutex_real_waiter(owner->pi_blocked_on)) + next_lock = task_blocked_on_lock(owner); - raw_spin_unlock_irqrestore(&owner->pi_lock, flags); - } + raw_spin_unlock_irqrestore(&owner->pi_lock, flags); + /* + * Don't walk the chain, if the owner task is not blocked + * itself. + */ if (!next_lock) return; @@ -863,7 +1119,8 @@ static void remove_waiter(struct rt_mutex *lock, raw_spin_unlock(&lock->wait_lock); - rt_mutex_adjust_prio_chain(owner, 0, lock, next_lock, NULL, current); + rt_mutex_adjust_prio_chain(owner, RT_MUTEX_MIN_CHAINWALK, lock, + next_lock, NULL, current); raw_spin_lock(&lock->wait_lock); } @@ -892,7 +1149,8 @@ void rt_mutex_adjust_pi(struct task_struct *task) /* gets dropped in rt_mutex_adjust_prio_chain()! */ get_task_struct(task); raw_spin_unlock_irqrestore(&task->pi_lock, flags); - rt_mutex_adjust_prio_chain(task, 0, NULL, next_lock, NULL, task); + rt_mutex_adjust_prio_chain(task, RT_MUTEX_MIN_CHAINWALK, NULL, + next_lock, NULL, task); } #ifdef CONFIG_PREEMPT_RT_FULL @@ -1405,7 +1663,8 @@ static void ww_mutex_account_lock(struct rt_mutex *lock, static int __sched rt_mutex_slowlock(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, - int detect_deadlock, struct ww_acquire_ctx *ww_ctx) + enum rtmutex_chainwalk chwalk, + struct ww_acquire_ctx *ww_ctx) { struct rt_mutex_waiter waiter; int ret = 0; @@ -1431,16 +1690,24 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, timeout->task = NULL; } - ret = task_blocks_on_rt_mutex(lock, &waiter, current, detect_deadlock); + ret = task_blocks_on_rt_mutex(lock, &waiter, current, chwalk); if (likely(!ret)) ret = __rt_mutex_slowlock(lock, state, timeout, &waiter, ww_ctx); + else if (ww_ctx) { + /* ww_mutex received EDEADLK, let it become EALREADY */ + ret = __mutex_lock_check_stamp(lock, ww_ctx); + BUG_ON(!ret); + } set_current_state(TASK_RUNNING); if (unlikely(ret)) { - remove_waiter(lock, &waiter); - rt_mutex_handle_deadlock(ret, detect_deadlock, &waiter); + if (rt_mutex_has_waiters(lock)) + remove_waiter(lock, &waiter); + /* ww_mutex want to report EDEADLK/EALREADY, let them */ + if (!ww_ctx) + rt_mutex_handle_deadlock(ret, chwalk, &waiter); } else if (ww_ctx) { ww_mutex_account_lock(lock, ww_ctx); } @@ -1465,23 +1732,32 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state, /* * Slow path try-lock function: */ -static inline int -rt_mutex_slowtrylock(struct rt_mutex *lock) +static inline int rt_mutex_slowtrylock(struct rt_mutex *lock) { - int ret = 0; + int ret; + + /* + * If the lock already has an owner we fail to get the lock. + * This can be done without taking the @lock->wait_lock as + * it is only being read, and this is a trylock anyway. + */ + if (rt_mutex_owner(lock)) + return 0; + /* + * The mutex has currently no owner. Lock the wait lock and + * try to acquire the lock. + */ if (!raw_spin_trylock(&lock->wait_lock)) - return ret; + return 0; - if (likely(rt_mutex_owner(lock) != current)) { + ret = try_to_take_rt_mutex(lock, current, NULL); - ret = try_to_take_rt_mutex(lock, current, NULL); - /* - * try_to_take_rt_mutex() sets the lock waiters - * bit unconditionally. Clean this up. - */ - fixup_rt_mutex_waiters(lock); - } + /* + * try_to_take_rt_mutex() sets the lock waiters bit + * unconditionally. Clean this up. + */ + fixup_rt_mutex_waiters(lock); raw_spin_unlock(&lock->wait_lock); @@ -1559,33 +1835,35 @@ rt_mutex_slowunlock(struct rt_mutex *lock) */ static inline int rt_mutex_fastlock(struct rt_mutex *lock, int state, - int detect_deadlock, struct ww_acquire_ctx *ww_ctx, + struct ww_acquire_ctx *ww_ctx, int (*slowfn)(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, - int detect_deadlock, + enum rtmutex_chainwalk chwalk, struct ww_acquire_ctx *ww_ctx)) { - if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) { + if (likely(rt_mutex_cmpxchg(lock, NULL, current))) { rt_mutex_deadlock_account_lock(lock, current); return 0; } else - return slowfn(lock, state, NULL, detect_deadlock, ww_ctx); + return slowfn(lock, state, NULL, RT_MUTEX_MIN_CHAINWALK, ww_ctx); } static inline int rt_mutex_timed_fastlock(struct rt_mutex *lock, int state, - struct hrtimer_sleeper *timeout, int detect_deadlock, + struct hrtimer_sleeper *timeout, + enum rtmutex_chainwalk chwalk, struct ww_acquire_ctx *ww_ctx, int (*slowfn)(struct rt_mutex *lock, int state, struct hrtimer_sleeper *timeout, - int detect_deadlock, + enum rtmutex_chainwalk chwalk, struct ww_acquire_ctx *ww_ctx)) { - if (!detect_deadlock && likely(rt_mutex_cmpxchg(lock, NULL, current))) { + if (chwalk == RT_MUTEX_MIN_CHAINWALK && + likely(rt_mutex_cmpxchg(lock, NULL, current))) { rt_mutex_deadlock_account_lock(lock, current); return 0; } else - return slowfn(lock, state, timeout, detect_deadlock, ww_ctx); + return slowfn(lock, state, timeout, chwalk, ww_ctx); } static inline int @@ -1618,7 +1896,7 @@ void __sched rt_mutex_lock(struct rt_mutex *lock) { might_sleep(); - rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, 0, NULL, rt_mutex_slowlock); + rt_mutex_fastlock(lock, TASK_UNINTERRUPTIBLE, NULL, rt_mutex_slowlock); } EXPORT_SYMBOL_GPL(rt_mutex_lock); @@ -1626,41 +1904,48 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock); * rt_mutex_lock_interruptible - lock a rt_mutex interruptible * * @lock: the rt_mutex to be locked - * @detect_deadlock: deadlock detection on/off * * Returns: * 0 on success * -EINTR when interrupted by a signal - * -EDEADLK when the lock would deadlock (when deadlock detection is on) */ -int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock, - int detect_deadlock) +int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock) { might_sleep(); - return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, - detect_deadlock, NULL, rt_mutex_slowlock); + return rt_mutex_fastlock(lock, TASK_INTERRUPTIBLE, NULL, rt_mutex_slowlock); } EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible); +/* + * Futex variant with full deadlock detection. + */ +int rt_mutex_timed_futex_lock(struct rt_mutex *lock, + struct hrtimer_sleeper *timeout) +{ + might_sleep(); + + return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, + RT_MUTEX_FULL_CHAINWALK, NULL, + rt_mutex_slowlock); +} + /** * rt_mutex_lock_killable - lock a rt_mutex killable * * @lock: the rt_mutex to be locked - * @detect_deadlock: deadlock detection on/off * * Returns: * 0 on success * -EINTR when interrupted by a signal * -EDEADLK when the lock would deadlock (when deadlock detection is on) */ -int __sched rt_mutex_lock_killable(struct rt_mutex *lock, - int detect_deadlock) +int __sched rt_mutex_lock_killable(struct rt_mutex *lock) { might_sleep(); return rt_mutex_fastlock(lock, TASK_KILLABLE, - detect_deadlock, NULL, rt_mutex_slowlock); + NULL, rt_mutex_slowlock); } EXPORT_SYMBOL_GPL(rt_mutex_lock_killable); @@ -1671,22 +1956,20 @@ EXPORT_SYMBOL_GPL(rt_mutex_lock_killable); * * @lock: the rt_mutex to be locked * @timeout: timeout structure or NULL (no timeout) - * @detect_deadlock: deadlock detection on/off * * Returns: * 0 on success * -EINTR when interrupted by a signal * -ETIMEDOUT when the timeout expired - * -EDEADLK when the lock would deadlock (when deadlock detection is on) */ int -rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout, - int detect_deadlock) +rt_mutex_timed_lock(struct rt_mutex *lock, struct hrtimer_sleeper *timeout) { might_sleep(); return rt_mutex_timed_fastlock(lock, TASK_INTERRUPTIBLE, timeout, - detect_deadlock, NULL, rt_mutex_slowlock); + RT_MUTEX_MIN_CHAINWALK, + NULL, rt_mutex_slowlock); } EXPORT_SYMBOL_GPL(rt_mutex_timed_lock); @@ -1791,7 +2074,6 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock, * @lock: the rt_mutex to take * @waiter: the pre-initialized rt_mutex_waiter * @task: the task to prepare - * @detect_deadlock: perform deadlock detection (1) or not (0) * * Returns: * 0 - task blocked on lock @@ -1802,7 +2084,7 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock, */ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, - struct task_struct *task, int detect_deadlock) + struct task_struct *task) { int ret; @@ -1843,7 +2125,8 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock, #endif /* We enforce deadlock detection for futexes */ - ret = task_blocks_on_rt_mutex(lock, waiter, task, 1); + ret = task_blocks_on_rt_mutex(lock, waiter, task, + RT_MUTEX_FULL_CHAINWALK); if (ret && !rt_mutex_owner(lock)) { /* @@ -1889,22 +2172,20 @@ struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock) * rt_mutex_finish_proxy_lock() - Complete lock acquisition * @lock: the rt_mutex we were woken on * @to: the timeout, null if none. hrtimer should already have - * been started. + * been started. * @waiter: the pre-initialized rt_mutex_waiter - * @detect_deadlock: perform deadlock detection (1) or not (0) * * Complete the lock acquisition started our behalf by another thread. * * Returns: * 0 - success - * <0 - error, one of -EINTR, -ETIMEDOUT, or -EDEADLK + * <0 - error, one of -EINTR, -ETIMEDOUT * * Special API call for PI-futex requeue support */ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, struct hrtimer_sleeper *to, - struct rt_mutex_waiter *waiter, - int detect_deadlock) + struct rt_mutex_waiter *waiter) { int ret; @@ -1964,7 +2245,7 @@ __ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ww_c might_sleep(); - mutex_acquire(&lock->base.dep_map, 0, 0, _RET_IP_); + mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_); ret = rt_mutex_slowlock(&lock->base.lock, TASK_INTERRUPTIBLE, NULL, 0, ww_ctx); if (ret) mutex_release(&lock->base.dep_map, 1, _RET_IP_); @@ -1982,8 +2263,7 @@ __ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx) might_sleep(); - mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, - _RET_IP_); + mutex_acquire_nest(&lock->base.dep_map, 0, 0, &ww_ctx->dep_map, _RET_IP_); ret = rt_mutex_slowlock(&lock->base.lock, TASK_UNINTERRUPTIBLE, NULL, 0, ww_ctx); if (ret) mutex_release(&lock->base.dep_map, 1, _RET_IP_); @@ -1996,11 +2276,13 @@ EXPORT_SYMBOL_GPL(__ww_mutex_lock); void __sched ww_mutex_unlock(struct ww_mutex *lock) { + int nest = !!lock->ctx; + /* * The unlocking fastpath is the 0->1 transition from 'locked' * into 'unlocked' state: */ - if (lock->ctx) { + if (nest) { #ifdef CONFIG_DEBUG_MUTEXES DEBUG_LOCKS_WARN_ON(!lock->ctx->acquired); #endif @@ -2009,7 +2291,7 @@ void __sched ww_mutex_unlock(struct ww_mutex *lock) lock->ctx = NULL; } - mutex_release(&lock->base.dep_map, 1, _RET_IP_); + mutex_release(&lock->base.dep_map, nest, _RET_IP_); rt_mutex_unlock(&lock->base.lock); } EXPORT_SYMBOL(ww_mutex_unlock); diff --git a/kernel/locking/rtmutex.h b/kernel/locking/rtmutex.h index f6a1f3c133b1..c4060584c407 100644 --- a/kernel/locking/rtmutex.h +++ b/kernel/locking/rtmutex.h @@ -22,10 +22,15 @@ #define debug_rt_mutex_init(m, n) do { } while (0) #define debug_rt_mutex_deadlock(d, a ,l) do { } while (0) #define debug_rt_mutex_print_deadlock(w) do { } while (0) -#define debug_rt_mutex_detect_deadlock(w,d) (d) #define debug_rt_mutex_reset_waiter(w) do { } while (0) static inline void rt_mutex_print_deadlock(struct rt_mutex_waiter *w) { WARN(1, "rtmutex deadlock detected\n"); } + +static inline bool debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *w, + enum rtmutex_chainwalk walk) +{ + return walk == RT_MUTEX_FULL_CHAINWALK; +} diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index ac636d37ec32..c6dcda5e53af 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h @@ -103,6 +103,21 @@ static inline struct task_struct *rt_mutex_owner(struct rt_mutex *lock) } /* + * Constants for rt mutex functions which have a selectable deadlock + * detection. + * + * RT_MUTEX_MIN_CHAINWALK: Stops the lock chain walk when there are + * no further PI adjustments to be made. + * + * RT_MUTEX_FULL_CHAINWALK: Invoke deadlock detection with a full + * walk of the lock chain. + */ +enum rtmutex_chainwalk { + RT_MUTEX_MIN_CHAINWALK, + RT_MUTEX_FULL_CHAINWALK, +}; + +/* * PI-futex support (proxy locking functions, etc.): */ #define PI_WAKEUP_INPROGRESS ((struct rt_mutex_waiter *) 1) @@ -115,12 +130,11 @@ extern void rt_mutex_proxy_unlock(struct rt_mutex *lock, struct task_struct *proxy_owner); extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock, struct rt_mutex_waiter *waiter, - struct task_struct *task, - int detect_deadlock); + struct task_struct *task); extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock, struct hrtimer_sleeper *to, - struct rt_mutex_waiter *waiter, - int detect_deadlock); + struct rt_mutex_waiter *waiter); +extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to); #ifdef CONFIG_DEBUG_RT_MUTEXES # include "rtmutex-debug.h" diff --git a/kernel/printk/console_cmdline.h b/kernel/printk/console_cmdline.h index cbd69d842341..2ca4a8b5fe57 100644 --- a/kernel/printk/console_cmdline.h +++ b/kernel/printk/console_cmdline.h @@ -3,7 +3,7 @@ struct console_cmdline { - char name[8]; /* Name of the driver */ + char name[16]; /* Name of the driver */ int index; /* Minor dev. to use */ char *options; /* Options for the driver */ #ifdef CONFIG_A11Y_BRAILLE_CONSOLE diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 376df3d58dd2..a9be229cb5af 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2370,6 +2370,7 @@ void register_console(struct console *newcon) for (i = 0, c = console_cmdline; i < MAX_CMDLINECONSOLES && c->name[0]; i++, c++) { + BUILD_BUG_ON(sizeof(c->name) != sizeof(newcon->name)); if (strcmp(c->name, newcon->name) != 0) continue; if (newcon->index >= 0 && diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index b14a512da520..d8374f999e9b 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -13,7 +13,7 @@ endif obj-y += core.o proc.o clock.o cputime.o obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o -obj-y += wait.o wait-simple.o completion.o +obj-y += wait.o wait-simple.o work-simple.o completion.o obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o obj-$(CONFIG_SCHEDSTATS) += stats.o diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 708690665e63..ebc7f7820108 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3208,6 +3208,8 @@ void rt_mutex_setprio(struct task_struct *p, int prio) } else { if (dl_prio(oldprio)) p->dl.dl_boosted = 0; + if (rt_prio(oldprio)) + p->rt.timeout = 0; p->sched_class = &fair_sched_class; } diff --git a/kernel/sched/work-simple.c b/kernel/sched/work-simple.c new file mode 100644 index 000000000000..c996f755dba6 --- /dev/null +++ b/kernel/sched/work-simple.c @@ -0,0 +1,172 @@ +/* + * Copyright (C) 2014 BMW Car IT GmbH, Daniel Wagner daniel.wagner@bmw-carit.de + * + * Provides a framework for enqueuing callbacks from irq context + * PREEMPT_RT_FULL safe. The callbacks are executed in kthread context. + */ + +#include <linux/wait-simple.h> +#include <linux/work-simple.h> +#include <linux/kthread.h> +#include <linux/slab.h> +#include <linux/spinlock.h> + +#define SWORK_EVENT_PENDING (1 << 0) + +static DEFINE_MUTEX(worker_mutex); +static struct sworker *glob_worker; + +struct sworker { + struct list_head events; + struct swait_head wq; + + raw_spinlock_t lock; + + struct task_struct *task; + int refs; +}; + +static bool swork_readable(struct sworker *worker) +{ + bool r; + + if (kthread_should_stop()) + return true; + + raw_spin_lock_irq(&worker->lock); + r = !list_empty(&worker->events); + raw_spin_unlock_irq(&worker->lock); + + return r; +} + +static int swork_kthread(void *arg) +{ + struct sworker *worker = arg; + + for (;;) { + swait_event_interruptible(worker->wq, + swork_readable(worker)); + if (kthread_should_stop()) + break; + + raw_spin_lock_irq(&worker->lock); + while (!list_empty(&worker->events)) { + struct swork_event *sev; + + sev = list_first_entry(&worker->events, + struct swork_event, item); + list_del(&sev->item); + raw_spin_unlock_irq(&worker->lock); + + WARN_ON_ONCE(!test_and_clear_bit(SWORK_EVENT_PENDING, + &sev->flags)); + sev->func(sev); + raw_spin_lock_irq(&worker->lock); + } + raw_spin_unlock_irq(&worker->lock); + } + return 0; +} + +static struct sworker *swork_create(void) +{ + struct sworker *worker; + + worker = kzalloc(sizeof(*worker), GFP_KERNEL); + if (!worker) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&worker->events); + raw_spin_lock_init(&worker->lock); + init_swait_head(&worker->wq); + + worker->task = kthread_run(swork_kthread, worker, "kswork"); + if (IS_ERR(worker->task)) { + kfree(worker); + return ERR_PTR(-ENOMEM); + } + + return worker; +} + +static void swork_destroy(struct sworker *worker) +{ + kthread_stop(worker->task); + + WARN_ON(!list_empty(&worker->events)); + kfree(worker); +} + +/** + * swork_queue - queue swork + * + * Returns %false if @work was already on a queue, %true otherwise. + * + * The work is queued and processed on a random CPU + */ +bool swork_queue(struct swork_event *sev) +{ + unsigned long flags; + + if (test_and_set_bit(SWORK_EVENT_PENDING, &sev->flags)) + return false; + + raw_spin_lock_irqsave(&glob_worker->lock, flags); + list_add_tail(&sev->item, &glob_worker->events); + raw_spin_unlock_irqrestore(&glob_worker->lock, flags); + + swait_wake(&glob_worker->wq); + return true; +} +EXPORT_SYMBOL_GPL(swork_queue); + +/** + * swork_get - get an instance of the sworker + * + * Returns an negative error code if the initialization if the worker did not + * work, %0 otherwise. + * + */ +int swork_get(void) +{ + struct sworker *worker; + + mutex_lock(&worker_mutex); + if (!glob_worker) { + worker = swork_create(); + if (IS_ERR(worker)) { + mutex_unlock(&worker_mutex); + return -ENOMEM; + } + + glob_worker = worker; + } + + glob_worker->refs++; + mutex_unlock(&worker_mutex); + + return 0; +} +EXPORT_SYMBOL_GPL(swork_get); + +/** + * swork_put - puts an instance of the sworker + * + * Will destroy the sworker thread. This function must not be called until all + * queued events have been completed. + */ +void swork_put(void) +{ + mutex_lock(&worker_mutex); + + glob_worker->refs--; + if (glob_worker->refs > 0) + goto out; + + swork_destroy(glob_worker); + glob_worker = NULL; +out: + mutex_unlock(&worker_mutex); +} +EXPORT_SYMBOL_GPL(swork_put); diff --git a/kernel/timer.c b/kernel/timer.c index 821749913584..631b42ea242d 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -84,6 +84,7 @@ struct tvec_base { unsigned long timer_jiffies; unsigned long next_timer; unsigned long active_timers; + unsigned long all_timers; struct tvec_root tv1; struct tvec tv2; struct tvec tv3; @@ -352,6 +353,20 @@ void set_timer_slack(struct timer_list *timer, int slack_hz) } EXPORT_SYMBOL_GPL(set_timer_slack); +/* + * If the list is empty, catch up ->timer_jiffies to the current time. + * The caller must hold the tvec_base lock. Returns true if the list + * was empty and therefore ->timer_jiffies was updated. + */ +static bool catchup_timer_jiffies(struct tvec_base *base) +{ + if (!base->all_timers) { + base->timer_jiffies = jiffies; + return true; + } + return false; +} + static void __internal_add_timer(struct tvec_base *base, struct timer_list *timer) { @@ -398,6 +413,7 @@ __internal_add_timer(struct tvec_base *base, struct timer_list *timer) static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) { + (void)catchup_timer_jiffies(base); __internal_add_timer(base, timer); /* * Update base->active_timers and base->next_timer @@ -407,6 +423,7 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer) base->next_timer = timer->expires; base->active_timers++; } + base->all_timers++; } #ifdef CONFIG_TIMER_STATS @@ -686,6 +703,8 @@ detach_expired_timer(struct timer_list *timer, struct tvec_base *base) detach_timer(timer, true); if (!tbase_get_deferrable(timer->base)) base->active_timers--; + base->all_timers--; + (void)catchup_timer_jiffies(base); } static int detach_if_pending(struct timer_list *timer, struct tvec_base *base, @@ -700,6 +719,8 @@ static int detach_if_pending(struct timer_list *timer, struct tvec_base *base, if (timer->expires == base->next_timer) base->next_timer = base->timer_jiffies; } + base->all_timers--; + (void)catchup_timer_jiffies(base); return 1; } @@ -1213,6 +1234,10 @@ static inline void __run_timers(struct tvec_base *base) struct timer_list *timer; spin_lock_irq(&base->lock); + if (catchup_timer_jiffies(base)) { + spin_unlock_irq(&base->lock); + return; + } while (time_after_eq(jiffies, base->timer_jiffies)) { struct list_head work_list; struct list_head *head = &work_list; @@ -1454,6 +1479,8 @@ static void run_timer_softirq(struct softirq_action *h) { struct tvec_base *base = __this_cpu_read(tvec_bases); + hrtimer_run_pending(); + #if defined(CONFIG_IRQ_WORK) && defined(CONFIG_PREEMPT_RT_FULL) irq_work_run(); #endif @@ -1467,52 +1494,8 @@ static void run_timer_softirq(struct softirq_action *h) */ void run_local_timers(void) { - struct tvec_base *base = __this_cpu_read(tvec_bases); - hrtimer_run_queues(); - /* - * We can access this lockless as we are in the timer - * interrupt. If there are no timers queued, nothing to do in - * the timer softirq. - */ -#ifdef CONFIG_PREEMPT_RT_FULL - -#ifndef CONFIG_SMP - /* - * The spin_do_trylock() later may fail as the lock may be hold before - * the interrupt arrived. The spin-lock debugging code will raise a - * warning if the try_lock fails on UP. Since this is only an - * optimization for the FULL_NO_HZ case (not to run the timer softirq on - * an nohz_full CPU) we don't really care and shedule the softirq. - */ raise_softirq(TIMER_SOFTIRQ); - return; -#endif - - /* On RT, irq work runs from softirq */ - if (irq_work_needs_cpu()) { - raise_softirq(TIMER_SOFTIRQ); - return; - } - - if (!spin_do_trylock(&base->lock)) { - raise_softirq(TIMER_SOFTIRQ); - return; - } -#endif - - if (!base->active_timers) - goto out; - - /* Check whether the next pending timer has expired */ - if (time_before_eq(base->next_timer, jiffies)) - raise_softirq(TIMER_SOFTIRQ); -out: -#ifdef CONFIG_PREEMPT_RT_FULL - rt_spin_unlock_after_trylock_in_irq(&base->lock); -#endif - /* The ; ensures that gcc won't complain in the !RT case */ - ; } #ifdef __ARCH_WANT_SYS_ALARM @@ -1692,6 +1675,7 @@ static int init_timers_cpu(int cpu) base->timer_jiffies = jiffies; base->next_timer = base->timer_jiffies; base->active_timers = 0; + base->all_timers = 0; return 0; } diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 1907ebc3a2b8..20ec307d8682 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2929,19 +2929,57 @@ bool flush_work(struct work_struct *work) } EXPORT_SYMBOL_GPL(flush_work); +struct cwt_wait { + wait_queue_t wait; + struct work_struct *work; +}; + +static int cwt_wakefn(wait_queue_t *wait, unsigned mode, int sync, void *key) +{ + struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait); + + if (cwait->work != key) + return 0; + return autoremove_wake_function(wait, mode, sync, key); +} + static bool __cancel_work_timer(struct work_struct *work, bool is_dwork) { + static DECLARE_WAIT_QUEUE_HEAD(cancel_waitq); unsigned long flags; int ret; do { ret = try_to_grab_pending(work, is_dwork, &flags); /* - * If someone else is canceling, wait for the same event it - * would be waiting for before retrying. + * If someone else is already canceling, wait for it to + * finish. flush_work() doesn't work for PREEMPT_NONE + * because we may get scheduled between @work's completion + * and the other canceling task resuming and clearing + * CANCELING - flush_work() will return false immediately + * as @work is no longer busy, try_to_grab_pending() will + * return -ENOENT as @work is still being canceled and the + * other canceling task won't be able to clear CANCELING as + * we're hogging the CPU. + * + * Let's wait for completion using a waitqueue. As this + * may lead to the thundering herd problem, use a custom + * wake function which matches @work along with exclusive + * wait and wakeup. */ - if (unlikely(ret == -ENOENT)) - flush_work(work); + if (unlikely(ret == -ENOENT)) { + struct cwt_wait cwait; + + init_wait(&cwait.wait); + cwait.wait.func = cwt_wakefn; + cwait.work = work; + + prepare_to_wait_exclusive(&cancel_waitq, &cwait.wait, + TASK_UNINTERRUPTIBLE); + if (work_is_canceling(work)) + schedule(); + finish_wait(&cancel_waitq, &cwait.wait); + } } while (unlikely(ret < 0)); /* tell other tasks trying to grab @work to back off */ @@ -2950,6 +2988,16 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork) flush_work(work); clear_work_data(work); + + /* + * Paired with prepare_to_wait() above so that either + * waitqueue_active() is visible here or !work_is_canceling() is + * visible there. + */ + smp_mb(); + if (waitqueue_active(&cancel_waitq)) + __wake_up(&cancel_waitq, TASK_NORMAL, 1, work); + return ret; } diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c index 19ae2366a7a9..b93a6103fa4d 100644 --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c @@ -590,6 +590,8 @@ GENERATE_TESTCASE(init_held_rsem) #include "locking-selftest-spin-hardirq.h" GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_spin) +#ifndef CONFIG_PREEMPT_RT_FULL + #include "locking-selftest-rlock-hardirq.h" GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_hard_rlock) @@ -605,9 +607,12 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_rlock) #include "locking-selftest-wlock-softirq.h" GENERATE_PERMUTATIONS_2_EVENTS(irqsafe1_soft_wlock) +#endif + #undef E1 #undef E2 +#ifndef CONFIG_PREEMPT_RT_FULL /* * Enabling hardirqs with a softirq-safe lock held: */ @@ -640,6 +645,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock) #undef E1 #undef E2 +#endif + /* * Enabling irqs with an irq-safe lock held: */ @@ -663,6 +670,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2A_rlock) #include "locking-selftest-spin-hardirq.h" GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_spin) +#ifndef CONFIG_PREEMPT_RT_FULL + #include "locking-selftest-rlock-hardirq.h" GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_hard_rlock) @@ -678,6 +687,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_rlock) #include "locking-selftest-wlock-softirq.h" GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock) +#endif + #undef E1 #undef E2 @@ -709,6 +720,8 @@ GENERATE_PERMUTATIONS_2_EVENTS(irqsafe2B_soft_wlock) #include "locking-selftest-spin-hardirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_spin) +#ifndef CONFIG_PREEMPT_RT_FULL + #include "locking-selftest-rlock-hardirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_hard_rlock) @@ -724,6 +737,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_rlock) #include "locking-selftest-wlock-softirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock) +#endif + #undef E1 #undef E2 #undef E3 @@ -757,6 +772,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe3_soft_wlock) #include "locking-selftest-spin-hardirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_spin) +#ifndef CONFIG_PREEMPT_RT_FULL + #include "locking-selftest-rlock-hardirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_hard_rlock) @@ -772,10 +789,14 @@ GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_rlock) #include "locking-selftest-wlock-softirq.h" GENERATE_PERMUTATIONS_3_EVENTS(irqsafe4_soft_wlock) +#endif + #undef E1 #undef E2 #undef E3 +#ifndef CONFIG_PREEMPT_RT_FULL + /* * read-lock / write-lock irq inversion. * @@ -838,6 +859,10 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_wlock) #undef E2 #undef E3 +#endif + +#ifndef CONFIG_PREEMPT_RT_FULL + /* * read-lock / write-lock recursion that is actually safe. */ @@ -876,6 +901,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft) #undef E2 #undef E3 +#endif + /* * read-lock / write-lock recursion that is unsafe. */ diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c index 7a85967060a5..f0f5c5c3de12 100644 --- a/lib/lz4/lz4_decompress.c +++ b/lib/lz4/lz4_decompress.c @@ -139,6 +139,9 @@ static int lz4_uncompress(const char *source, char *dest, int osize) /* Error: request to write beyond destination buffer */ if (cpy > oend) goto _output_error; + if ((ref + COPYLENGTH) > oend || + (op + COPYLENGTH) > oend) + goto _output_error; LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); while (op < cpy) *op++ = *ref++; diff --git a/localversion-rt b/localversion-rt index a68b4337d4ce..21988f9ad53f 100644 --- a/localversion-rt +++ b/localversion-rt @@ -1 +1 @@ --rt31 +-rt34 diff --git a/mm/memcontrol.c b/mm/memcontrol.c index fa54408b66b2..be24d2d186fa 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2512,14 +2512,17 @@ static void __init memcg_stock_init(void) */ static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages) { - struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock); + struct memcg_stock_pcp *stock; + int cpu = get_cpu_light(); + + stock = &per_cpu(memcg_stock, cpu); if (stock->cached != memcg) { /* reset if necessary */ drain_stock(stock); stock->cached = memcg; } stock->nr_pages += nr_pages; - put_cpu_var(memcg_stock); + put_cpu_light(); } /* diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index f6f23833de44..c8e450132813 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1016,6 +1016,10 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start) return NULL; arch_refresh_nodedata(nid, pgdat); + } else { + /* Reset the nr_zones and classzone_idx to 0 before reuse */ + pgdat->nr_zones = 0; + pgdat->classzone_idx = 0; } /* we can use NODE_DATA(nid) from here */ @@ -1863,15 +1867,6 @@ void try_offline_node(int nid) if (is_vmalloc_addr(zone->wait_table)) vfree(zone->wait_table); } - - /* - * Since there is no way to guarentee the address of pgdat/zone is not - * on stack of any kernel threads or used by other kernel objects - * without reference counting or other symchronizing method, do not - * reset node_data and free pgdat here. Just reset it to 0 and reuse - * the memory when the node is online again. - */ - memset(pgdat, 0, sizeof(*pgdat)); } EXPORT_SYMBOL(try_offline_node); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 9f45f87a5859..51d8d15f48d7 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -878,8 +878,11 @@ static void bdi_update_write_bandwidth(struct backing_dev_info *bdi, * bw * elapsed + write_bandwidth * (period - elapsed) * write_bandwidth = --------------------------------------------------- * period + * + * @written may have decreased due to account_page_redirty(). + * Avoid underflowing @bw calculation. */ - bw = written - bdi->written_stamp; + bw = written - min(written, bdi->written_stamp); bw *= HZ; if (unlikely(elapsed > period)) { do_div(bw, elapsed); @@ -943,7 +946,7 @@ static void global_update_bandwidth(unsigned long thresh, unsigned long now) { static DEFINE_SPINLOCK(dirty_lock); - static unsigned long update_time; + static unsigned long update_time = INITIAL_JIFFIES; /* * check locklessly first to optimize away locking for the most time diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index d6be3edb7a43..526bf56f4d31 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -283,7 +283,7 @@ static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock, int copylen; ret = -EOPNOTSUPP; - if (m->msg_flags&MSG_OOB) + if (flags & MSG_OOB) goto read_error; skb = skb_recv_datagram(sk, flags, 0 , &ret); diff --git a/net/can/af_can.c b/net/can/af_can.c index a27f8aad9e99..5e9a2272b7a7 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -262,6 +262,9 @@ int can_send(struct sk_buff *skb, int loop) goto inval_skb; } + skb->ip_summed = CHECKSUM_UNNECESSARY; + + skb_reset_mac_header(skb); skb_reset_network_header(skb); skb_reset_transport_header(skb); diff --git a/net/compat.c b/net/compat.c index 275af79c131b..d12529050b29 100644 --- a/net/compat.c +++ b/net/compat.c @@ -71,6 +71,13 @@ int get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg) __get_user(kmsg->msg_controllen, &umsg->msg_controllen) || __get_user(kmsg->msg_flags, &umsg->msg_flags)) return -EFAULT; + + if (!tmp1) + kmsg->msg_namelen = 0; + + if (kmsg->msg_namelen < 0) + return -EINVAL; + if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) kmsg->msg_namelen = sizeof(struct sockaddr_storage); kmsg->msg_name = compat_ptr(tmp1); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index cf9cd13509a7..e731c96eac4b 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -25,6 +25,8 @@ static int zero = 0; static int one = 1; static int ushort_max = USHRT_MAX; +static int min_sndbuf = SOCK_MIN_SNDBUF; +static int min_rcvbuf = SOCK_MIN_RCVBUF; #ifdef CONFIG_RPS static int rps_sock_flow_sysctl(struct ctl_table *table, int write, @@ -223,7 +225,7 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, + .extra1 = &min_sndbuf, }, { .procname = "rmem_max", @@ -231,7 +233,7 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, + .extra1 = &min_rcvbuf, }, { .procname = "wmem_default", @@ -239,7 +241,7 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, + .extra1 = &min_sndbuf, }, { .procname = "rmem_default", @@ -247,7 +249,7 @@ static struct ctl_table net_core_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &one, + .extra1 = &min_rcvbuf, }, { .procname = "dev_weight", diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index e34dccbc4d70..4eeba4e497a0 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -71,6 +71,20 @@ static inline void inet_diag_unlock_handler( mutex_unlock(&inet_diag_table_mutex); } +static size_t inet_sk_attr_size(void) +{ + return nla_total_size(sizeof(struct tcp_info)) + + nla_total_size(1) /* INET_DIAG_SHUTDOWN */ + + nla_total_size(1) /* INET_DIAG_TOS */ + + nla_total_size(1) /* INET_DIAG_TCLASS */ + + nla_total_size(sizeof(struct inet_diag_meminfo)) + + nla_total_size(sizeof(struct inet_diag_msg)) + + nla_total_size(SK_MEMINFO_VARS * sizeof(u32)) + + nla_total_size(TCP_CA_NAME_MAX) + + nla_total_size(sizeof(struct tcpvegas_info)) + + 64; +} + int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct sk_buff *skb, struct inet_diag_req_v2 *req, struct user_namespace *user_ns, @@ -324,9 +338,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s if (err) goto out; - rep = nlmsg_new(sizeof(struct inet_diag_msg) + - sizeof(struct inet_diag_meminfo) + - sizeof(struct tcp_info) + 64, GFP_KERNEL); + rep = nlmsg_new(inet_sk_attr_size(), GFP_KERNEL); if (!rep) { err = -ENOMEM; goto out; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7efa26bb872c..96f64e59d70c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2617,15 +2617,11 @@ void tcp_send_fin(struct sock *sk) } else { /* Socket is locked, keep trying until memory is available. */ for (;;) { - skb = alloc_skb_fclone(MAX_TCP_HEADER, - sk->sk_allocation); + skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation); if (skb) break; yield(); } - - /* Reserve space for headers and prepare control bits. */ - skb_reserve(skb, MAX_TCP_HEADER); /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ tcp_init_nondata_skb(skb, tp->write_seq, TCPHDR_ACK | TCPHDR_FIN); @@ -2899,9 +2895,9 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_fastopen_request *fo = tp->fastopen_req; - int syn_loss = 0, space, i, err = 0, iovlen = fo->data->msg_iovlen; - struct sk_buff *syn_data = NULL, *data; + int syn_loss = 0, space, err = 0; unsigned long last_syn_loss = 0; + struct sk_buff *syn_data; tp->rx_opt.mss_clamp = tp->advmss; /* If MSS is not cached */ tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie, @@ -2932,42 +2928,39 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) /* limit to order-0 allocations */ space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER)); - syn_data = skb_copy_expand(syn, MAX_TCP_HEADER, space, - sk->sk_allocation); - if (syn_data == NULL) + syn_data = sk_stream_alloc_skb(sk, space, sk->sk_allocation); + if (!syn_data) goto fallback; + syn_data->ip_summed = CHECKSUM_PARTIAL; + memcpy(syn_data->cb, syn->cb, sizeof(syn->cb)); + skb_shinfo(syn_data)->gso_segs = 1; + if (unlikely(memcpy_fromiovecend(skb_put(syn_data, space), + fo->data->msg_iov, 0, space))) { + kfree_skb(syn_data); + goto fallback; + } - for (i = 0; i < iovlen && syn_data->len < space; ++i) { - struct iovec *iov = &fo->data->msg_iov[i]; - unsigned char __user *from = iov->iov_base; - int len = iov->iov_len; - - if (syn_data->len + len > space) - len = space - syn_data->len; - else if (i + 1 == iovlen) - /* No more data pending in inet_wait_for_connect() */ - fo->data = NULL; + /* No more data pending in inet_wait_for_connect() */ + if (space == fo->size) + fo->data = NULL; + fo->copied = space; - if (skb_add_data(syn_data, from, len)) - goto fallback; - } + tcp_connect_queue_skb(sk, syn_data); - /* Queue a data-only packet after the regular SYN for retransmission */ - data = pskb_copy(syn_data, sk->sk_allocation); - if (data == NULL) - goto fallback; - TCP_SKB_CB(data)->seq++; - TCP_SKB_CB(data)->tcp_flags &= ~TCPHDR_SYN; - TCP_SKB_CB(data)->tcp_flags = (TCPHDR_ACK|TCPHDR_PSH); - tcp_connect_queue_skb(sk, data); - fo->copied = data->len; + err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation); - if (tcp_transmit_skb(sk, syn_data, 0, sk->sk_allocation) == 0) { + /* Now full SYN+DATA was cloned and sent (or not), + * remove the SYN from the original skb (syn_data) + * we keep in write queue in case of a retransmit, as we + * also have the SYN packet (with no data) in the same queue. + */ + TCP_SKB_CB(syn_data)->seq++; + TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH; + if (!err) { tp->syn_data = (fo->copied > 0); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE); goto done; } - syn_data = NULL; fallback: /* Send a regular SYN with Fast Open cookie request option */ @@ -2976,7 +2969,6 @@ fallback: err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation); if (err) tp->syn_fastopen = 0; - kfree_skb(syn_data); done: fo->cookie.len = -1; /* Exclude Fast Open option for SYN retries */ return err; @@ -2996,13 +2988,10 @@ int tcp_connect(struct sock *sk) return 0; } - buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation); - if (unlikely(buff == NULL)) + buff = sk_stream_alloc_skb(sk, 0, sk->sk_allocation); + if (unlikely(!buff)) return -ENOBUFS; - /* Reserve space for headers. */ - skb_reserve(buff, MAX_TCP_HEADER); - tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); tp->retrans_stamp = TCP_SKB_CB(buff)->when = tcp_time_stamp; tcp_connect_queue_skb(sk, buff); diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index b4d5e1d97c1b..27ca79682efb 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -104,6 +104,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, goto again; flp6->saddr = saddr; } + err = rt->dst.error; goto out; } again: diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c index 612a5ddaf93b..799bafc2af39 100644 --- a/net/llc/sysctl_net_llc.c +++ b/net/llc/sysctl_net_llc.c @@ -18,28 +18,28 @@ static struct ctl_table llc2_timeout_table[] = { { .procname = "ack", .data = &sysctl_llc2_ack_timeout, - .maxlen = sizeof(long), + .maxlen = sizeof(sysctl_llc2_ack_timeout), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "busy", .data = &sysctl_llc2_busy_timeout, - .maxlen = sizeof(long), + .maxlen = sizeof(sysctl_llc2_busy_timeout), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "p", .data = &sysctl_llc2_p_timeout, - .maxlen = sizeof(long), + .maxlen = sizeof(sysctl_llc2_p_timeout), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "rej", .data = &sysctl_llc2_rej_timeout, - .maxlen = sizeof(long), + .maxlen = sizeof(sysctl_llc2_rej_timeout), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index bf7a1bbb975f..e278c64572de 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -57,13 +57,24 @@ struct ieee80211_local; #define IEEE80211_UNSET_POWER_LEVEL INT_MIN /* - * Some APs experience problems when working with U-APSD. Decrease the - * probability of that happening by using legacy mode for all ACs but VO. - * The AP that caused us trouble was a Cisco 4410N. It ignores our - * setting, and always treats non-VO ACs as legacy. + * Some APs experience problems when working with U-APSD. Decreasing the + * probability of that happening by using legacy mode for all ACs but VO isn't + * enough. + * + * Cisco 4410N originally forced us to enable VO by default only because it + * treated non-VO ACs as legacy. + * + * However some APs (notably Netgear R7000) silently reclassify packets to + * different ACs. Since u-APSD ACs require trigger frames for frame retrieval + * clients would never see some frames (e.g. ARP responses) or would fetch them + * accidentally after a long time. + * + * It makes little sense to enable u-APSD queues by default because it needs + * userspace applications to be aware of it to actually take advantage of the + * possible additional powersavings. Implicitly depending on driver autotrigger + * frame support doesn't make much sense. */ -#define IEEE80211_DEFAULT_UAPSD_QUEUES \ - IEEE80211_WMM_IE_STA_QOSINFO_AC_VO +#define IEEE80211_DEFAULT_UAPSD_QUEUES 0 #define IEEE80211_DEFAULT_MAX_SP_LEN \ IEEE80211_WMM_IE_STA_QOSINFO_SP_ALL diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index d96aea0c05ef..638b48e325f9 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2107,6 +2107,9 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) hdr = (struct ieee80211_hdr *) skb->data; mesh_hdr = (struct ieee80211s_hdr *) (skb->data + hdrlen); + if (ieee80211_drop_unencrypted(rx, hdr->frame_control)) + return RX_DROP_MONITOR; + /* frame is in RMC, don't forward */ if (ieee80211_is_data(hdr->frame_control) && is_multicast_ether_addr(hdr->addr1) && diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 27d3f40de3cd..847d2a2c5d05 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -658,16 +658,24 @@ static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) return err; } -static int ip_vs_route_me_harder(int af, struct sk_buff *skb) +static int ip_vs_route_me_harder(int af, struct sk_buff *skb, + unsigned int hooknum) { + if (!sysctl_snat_reroute(skb)) + return 0; + /* Reroute replies only to remote clients (FORWARD and LOCAL_OUT) */ + if (NF_INET_LOCAL_IN == hooknum) + return 0; #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { - if (sysctl_snat_reroute(skb) && ip6_route_me_harder(skb) != 0) + struct dst_entry *dst = skb_dst(skb); + + if (dst->dev && !(dst->dev->flags & IFF_LOOPBACK) && + ip6_route_me_harder(skb) != 0) return 1; } else #endif - if ((sysctl_snat_reroute(skb) || - skb_rtable(skb)->rt_flags & RTCF_LOCAL) && + if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL) && ip_route_me_harder(skb, RTN_LOCAL) != 0) return 1; @@ -790,7 +798,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb, union nf_inet_addr *snet, __u8 protocol, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, - unsigned int offset, unsigned int ihl) + unsigned int offset, unsigned int ihl, + unsigned int hooknum) { unsigned int verdict = NF_DROP; @@ -820,7 +829,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb, #endif ip_vs_nat_icmp(skb, pp, cp, 1); - if (ip_vs_route_me_harder(af, skb)) + if (ip_vs_route_me_harder(af, skb, hooknum)) goto out; /* do the statistics and put it back */ @@ -915,7 +924,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related, snet.ip = iph->saddr; return handle_response_icmp(AF_INET, skb, &snet, cih->protocol, cp, - pp, ciph.len, ihl); + pp, ciph.len, ihl, hooknum); } #ifdef CONFIG_IP_VS_IPV6 @@ -980,7 +989,8 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related, snet.in6 = ciph.saddr.in6; writable = ciph.len; return handle_response_icmp(AF_INET6, skb, &snet, ciph.protocol, cp, - pp, writable, sizeof(struct ipv6hdr)); + pp, writable, sizeof(struct ipv6hdr), + hooknum); } #endif @@ -1039,7 +1049,8 @@ static inline bool is_new_conn(const struct sk_buff *skb, */ static unsigned int handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, - struct ip_vs_conn *cp, struct ip_vs_iphdr *iph) + struct ip_vs_conn *cp, struct ip_vs_iphdr *iph, + unsigned int hooknum) { struct ip_vs_protocol *pp = pd->pp; @@ -1077,7 +1088,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, * if it came from this machine itself. So re-compute * the routing information. */ - if (ip_vs_route_me_harder(af, skb)) + if (ip_vs_route_me_harder(af, skb, hooknum)) goto drop; IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT"); @@ -1180,7 +1191,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) cp = pp->conn_out_get(af, skb, &iph, 0); if (likely(cp)) - return handle_response(af, skb, pd, cp, &iph); + return handle_response(af, skb, pd, cp, &iph, hooknum); if (sysctl_nat_icmp_send(net) && (pp->protocol == IPPROTO_TCP || pp->protocol == IPPROTO_UDP || diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index db801263ee9f..a8027e73b6a2 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -891,6 +891,8 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param, IP_VS_DBG(2, "BACKUP, add new conn. failed\n"); return; } + if (!(flags & IP_VS_CONN_F_TEMPLATE)) + kfree(param->pe_data); } if (opt) @@ -1164,6 +1166,7 @@ static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end) (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL) ); #endif + ip_vs_pe_put(param.pe); return 0; /* Error exit */ out: diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index ad979612238a..7350723aeb15 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -611,8 +611,12 @@ nft_match_select_ops(const struct nft_ctx *ctx, struct xt_match *match = nft_match->ops.data; if (strcmp(match->name, mt_name) == 0 && - match->revision == rev && match->family == family) + match->revision == rev && match->family == family) { + if (!try_module_get(match->me)) + return ERR_PTR(-ENOENT); + return &nft_match->ops; + } } match = xt_request_find_match(family, mt_name, rev); @@ -682,8 +686,12 @@ nft_target_select_ops(const struct nft_ctx *ctx, struct xt_target *target = nft_target->ops.data; if (strcmp(target->name, tg_name) == 0 && - target->revision == rev && target->family == family) + target->revision == rev && target->family == family) { + if (!try_module_get(target->me)) + return ERR_PTR(-ENOENT); + return &nft_target->ops; + } } target = xt_request_find_target(family, tg_name, rev); diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 1ba67931eb1b..13332dbf291d 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -243,12 +243,13 @@ static int extract_icmp6_fields(const struct sk_buff *skb, unsigned int outside_hdrlen, int *protocol, - struct in6_addr **raddr, - struct in6_addr **laddr, + const struct in6_addr **raddr, + const struct in6_addr **laddr, __be16 *rport, - __be16 *lport) + __be16 *lport, + struct ipv6hdr *ipv6_var) { - struct ipv6hdr *inside_iph, _inside_iph; + const struct ipv6hdr *inside_iph; struct icmp6hdr *icmph, _icmph; __be16 *ports, _ports[2]; u8 inside_nexthdr; @@ -263,12 +264,14 @@ extract_icmp6_fields(const struct sk_buff *skb, if (icmph->icmp6_type & ICMPV6_INFOMSG_MASK) return 1; - inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(_icmph), sizeof(_inside_iph), &_inside_iph); + inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(_icmph), + sizeof(*ipv6_var), ipv6_var); if (inside_iph == NULL) return 1; inside_nexthdr = inside_iph->nexthdr; - inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) + sizeof(_inside_iph), + inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) + + sizeof(*ipv6_var), &inside_nexthdr, &inside_fragoff); if (inside_hdrlen < 0) return 1; /* hjm: Packet has no/incomplete transport layer headers. */ @@ -315,10 +318,10 @@ xt_socket_get_sock_v6(struct net *net, const u8 protocol, static bool socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) { - struct ipv6hdr *iph = ipv6_hdr(skb); + struct ipv6hdr ipv6_var, *iph = ipv6_hdr(skb); struct udphdr _hdr, *hp = NULL; struct sock *sk = skb->sk; - struct in6_addr *daddr = NULL, *saddr = NULL; + const struct in6_addr *daddr = NULL, *saddr = NULL; __be16 uninitialized_var(dport), uninitialized_var(sport); int thoff = 0, uninitialized_var(tproto); const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo; @@ -342,7 +345,7 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) } else if (tproto == IPPROTO_ICMPV6) { if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr, - &sport, &dport)) + &sport, &dport, &ipv6_var)) return false; } else { return false; diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c index a817705ce2d0..dba8d0864f18 100644 --- a/net/rds/iw_rdma.c +++ b/net/rds/iw_rdma.c @@ -88,7 +88,9 @@ static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool, int *unpinned); static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr); -static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwdev, struct rdma_cm_id **cm_id) +static int rds_iw_get_device(struct sockaddr_in *src, struct sockaddr_in *dst, + struct rds_iw_device **rds_iwdev, + struct rdma_cm_id **cm_id) { struct rds_iw_device *iwdev; struct rds_iw_cm_id *i_cm_id; @@ -112,15 +114,15 @@ static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwd src_addr->sin_port, dst_addr->sin_addr.s_addr, dst_addr->sin_port, - rs->rs_bound_addr, - rs->rs_bound_port, - rs->rs_conn_addr, - rs->rs_conn_port); + src->sin_addr.s_addr, + src->sin_port, + dst->sin_addr.s_addr, + dst->sin_port); #ifdef WORKING_TUPLE_DETECTION - if (src_addr->sin_addr.s_addr == rs->rs_bound_addr && - src_addr->sin_port == rs->rs_bound_port && - dst_addr->sin_addr.s_addr == rs->rs_conn_addr && - dst_addr->sin_port == rs->rs_conn_port) { + if (src_addr->sin_addr.s_addr == src->sin_addr.s_addr && + src_addr->sin_port == src->sin_port && + dst_addr->sin_addr.s_addr == dst->sin_addr.s_addr && + dst_addr->sin_port == dst->sin_port) { #else /* FIXME - needs to compare the local and remote * ipaddr/port tuple, but the ipaddr is the only @@ -128,7 +130,7 @@ static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwd * zero'ed. It doesn't appear to be properly populated * during connection setup... */ - if (src_addr->sin_addr.s_addr == rs->rs_bound_addr) { + if (src_addr->sin_addr.s_addr == src->sin_addr.s_addr) { #endif spin_unlock_irq(&iwdev->spinlock); *rds_iwdev = iwdev; @@ -180,19 +182,13 @@ int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_i { struct sockaddr_in *src_addr, *dst_addr; struct rds_iw_device *rds_iwdev_old; - struct rds_sock rs; struct rdma_cm_id *pcm_id; int rc; src_addr = (struct sockaddr_in *)&cm_id->route.addr.src_addr; dst_addr = (struct sockaddr_in *)&cm_id->route.addr.dst_addr; - rs.rs_bound_addr = src_addr->sin_addr.s_addr; - rs.rs_bound_port = src_addr->sin_port; - rs.rs_conn_addr = dst_addr->sin_addr.s_addr; - rs.rs_conn_port = dst_addr->sin_port; - - rc = rds_iw_get_device(&rs, &rds_iwdev_old, &pcm_id); + rc = rds_iw_get_device(src_addr, dst_addr, &rds_iwdev_old, &pcm_id); if (rc) rds_iw_remove_cm_id(rds_iwdev, cm_id); @@ -598,9 +594,17 @@ void *rds_iw_get_mr(struct scatterlist *sg, unsigned long nents, struct rds_iw_device *rds_iwdev; struct rds_iw_mr *ibmr = NULL; struct rdma_cm_id *cm_id; + struct sockaddr_in src = { + .sin_addr.s_addr = rs->rs_bound_addr, + .sin_port = rs->rs_bound_port, + }; + struct sockaddr_in dst = { + .sin_addr.s_addr = rs->rs_conn_addr, + .sin_port = rs->rs_conn_port, + }; int ret; - ret = rds_iw_get_device(rs, &rds_iwdev, &cm_id); + ret = rds_iw_get_device(&src, &dst, &rds_iwdev, &cm_id); if (ret || !cm_id) { ret = -ENODEV; goto out; diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c index b5cb2aa08f33..35773ad6d23d 100644 --- a/net/rds/sysctl.c +++ b/net/rds/sysctl.c @@ -71,14 +71,14 @@ static struct ctl_table rds_sysctl_rds_table[] = { { .procname = "max_unacked_packets", .data = &rds_sysctl_max_unacked_packets, - .maxlen = sizeof(unsigned long), + .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "max_unacked_bytes", .data = &rds_sysctl_max_unacked_bytes, - .maxlen = sizeof(unsigned long), + .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c index 34b5490dde65..4949f753686c 100644 --- a/net/rxrpc/ar-recvmsg.c +++ b/net/rxrpc/ar-recvmsg.c @@ -87,7 +87,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock, if (!skb) { /* nothing remains on the queue */ if (copied && - (msg->msg_flags & MSG_PEEK || timeo == 0)) + (flags & MSG_PEEK || timeo == 0)) goto out; /* wait for a message to turn up */ diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index a4acaf2bcf18..22ca25818120 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -349,9 +349,9 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) if (!svc_xprt_has_something_to_do(xprt)) return; - cpu = get_cpu(); + cpu = get_cpu_light(); pool = svc_pool_for_cpu(xprt->xpt_server, cpu); - put_cpu(); + put_cpu_light(); spin_lock_bh(&pool->sp_lock); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 18d73df72531..c260243dbe07 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4190,6 +4190,16 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) if (parse_station_flags(info, dev->ieee80211_ptr->iftype, ¶ms)) return -EINVAL; + /* HT/VHT requires QoS, but if we don't have that just ignore HT/VHT + * as userspace might just pass through the capabilities from the IEs + * directly, rather than enforcing this restriction and returning an + * error in this case. + */ + if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_WME))) { + params.ht_capa = NULL; + params.vht_capa = NULL; + } + /* When you run into this, adjust the code below for the new flag */ BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7); diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c index 3bab89eb21d6..e90ab0e20db8 100644 --- a/security/integrity/evm/evm_crypto.c +++ b/security/integrity/evm/evm_crypto.c @@ -137,7 +137,7 @@ static int evm_calc_hmac_or_hash(struct dentry *dentry, int error; int size; - if (!inode->i_op || !inode->i_op->getxattr) + if (!inode->i_op->getxattr) return -EOPNOTSUPP; desc = init_desc(type); if (IS_ERR(desc)) diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c index 7e71e066198f..690cd632bd5b 100644 --- a/security/integrity/evm/evm_main.c +++ b/security/integrity/evm/evm_main.c @@ -62,7 +62,7 @@ static int evm_find_protected_xattrs(struct dentry *dentry) int error; int count = 0; - if (!inode->i_op || !inode->i_op->getxattr) + if (!inode->i_op->getxattr) return -EOPNOTSUPP; for (xattr = evm_config_xattrnames; *xattr != NULL; xattr++) { diff --git a/security/security.c b/security/security.c index 919cad93ac82..8b774f362a3d 100644 --- a/security/security.c +++ b/security/security.c @@ -433,11 +433,20 @@ int security_path_link(struct dentry *old_dentry, struct path *new_dir, } int security_path_rename(struct path *old_dir, struct dentry *old_dentry, - struct path *new_dir, struct dentry *new_dentry) + struct path *new_dir, struct dentry *new_dentry, + unsigned int flags) { if (unlikely(IS_PRIVATE(old_dentry->d_inode) || (new_dentry->d_inode && IS_PRIVATE(new_dentry->d_inode)))) return 0; + + if (flags & RENAME_EXCHANGE) { + int err = security_ops->path_rename(new_dir, new_dentry, + old_dir, old_dentry); + if (err) + return err; + } + return security_ops->path_rename(old_dir, old_dentry, new_dir, new_dentry); } @@ -524,11 +533,20 @@ int security_inode_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, } int security_inode_rename(struct inode *old_dir, struct dentry *old_dentry, - struct inode *new_dir, struct dentry *new_dentry) + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) { if (unlikely(IS_PRIVATE(old_dentry->d_inode) || (new_dentry->d_inode && IS_PRIVATE(new_dentry->d_inode)))) return 0; + + if (flags & RENAME_EXCHANGE) { + int err = security_ops->inode_rename(new_dir, new_dentry, + old_dir, old_dentry); + if (err) + return err; + } + return security_ops->inode_rename(old_dir, old_dentry, new_dir, new_dentry); } diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index d60c0ee66387..6c4cbd97a673 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -152,7 +152,7 @@ static ssize_t sel_write_enforce(struct file *file, const char __user *buf, goto out; /* No partial writes. */ - length = EINVAL; + length = -EINVAL; if (*ppos != 0) goto out; diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c index 80a09c37cac8..bed745c8b1a3 100644 --- a/security/tomoyo/realpath.c +++ b/security/tomoyo/realpath.c @@ -173,7 +173,7 @@ static char *tomoyo_get_local_path(struct dentry *dentry, char * const buffer, * Use filesystem name if filesystem does not support rename() * operation. */ - if (inode->i_op && !inode->i_op->rename) + if (!inode->i_op->rename && !inode->i_op->rename2) goto prepend_filesystem_name; } /* Prepend device name. */ @@ -282,7 +282,8 @@ char *tomoyo_realpath_from_path(struct path *path) * Get local name for filesystems without rename() operation * or dentry without vfsmount. */ - if (!path->mnt || (inode->i_op && !inode->i_op->rename)) + if (!path->mnt || + (!inode->i_op->rename && !inode->i_op->rename2)) pos = tomoyo_get_local_path(path->dentry, buf, buf_len - 1); /* Get absolute name for the rest. */ diff --git a/sound/core/control.c b/sound/core/control.c index 98a29b26c5f4..f2082a35b890 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -1168,6 +1168,10 @@ static int snd_ctl_elem_add(struct snd_ctl_file *file, if (info->count < 1) return -EINVAL; + if (!*info->id.name) + return -EINVAL; + if (strnlen(info->id.name, sizeof(info->id.name)) >= sizeof(info->id.name)) + return -EINVAL; access = info->access == 0 ? SNDRV_CTL_ELEM_ACCESS_READWRITE : (info->access & (SNDRV_CTL_ELEM_ACCESS_READWRITE| SNDRV_CTL_ELEM_ACCESS_INACTIVE| diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index d9a09bdd09db..9a23bdea97d8 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -653,12 +653,45 @@ static int get_amp_val_to_activate(struct hda_codec *codec, hda_nid_t nid, return val; } +/* is this a stereo widget or a stereo-to-mono mix? */ +static bool is_stereo_amps(struct hda_codec *codec, hda_nid_t nid, int dir) +{ + unsigned int wcaps = get_wcaps(codec, nid); + hda_nid_t conn; + + if (wcaps & AC_WCAP_STEREO) + return true; + if (dir != HDA_INPUT || get_wcaps_type(wcaps) != AC_WID_AUD_MIX) + return false; + if (snd_hda_get_num_conns(codec, nid) != 1) + return false; + if (snd_hda_get_connections(codec, nid, &conn, 1) < 0) + return false; + return !!(get_wcaps(codec, conn) & AC_WCAP_STEREO); +} + /* initialize the amp value (only at the first time) */ static void init_amp(struct hda_codec *codec, hda_nid_t nid, int dir, int idx) { unsigned int caps = query_amp_caps(codec, nid, dir); int val = get_amp_val_to_activate(codec, nid, dir, caps, false); - snd_hda_codec_amp_init_stereo(codec, nid, dir, idx, 0xff, val); + + if (is_stereo_amps(codec, nid, dir)) + snd_hda_codec_amp_init_stereo(codec, nid, dir, idx, 0xff, val); + else + snd_hda_codec_amp_init(codec, nid, 0, dir, idx, 0xff, val); +} + +/* update the amp, doing in stereo or mono depending on NID */ +static int update_amp(struct hda_codec *codec, hda_nid_t nid, int dir, int idx, + unsigned int mask, unsigned int val) +{ + if (is_stereo_amps(codec, nid, dir)) + return snd_hda_codec_amp_stereo(codec, nid, dir, idx, + mask, val); + else + return snd_hda_codec_amp_update(codec, nid, 0, dir, idx, + mask, val); } /* calculate amp value mask we can modify; @@ -698,7 +731,7 @@ static void activate_amp(struct hda_codec *codec, hda_nid_t nid, int dir, return; val &= mask; - snd_hda_codec_amp_stereo(codec, nid, dir, idx, mask, val); + update_amp(codec, nid, dir, idx, mask, val); } static void activate_amp_out(struct hda_codec *codec, struct nid_path *path, @@ -4337,13 +4370,11 @@ static void mute_all_mixer_nid(struct hda_codec *codec, hda_nid_t mix) has_amp = nid_has_mute(codec, mix, HDA_INPUT); for (i = 0; i < nums; i++) { if (has_amp) - snd_hda_codec_amp_stereo(codec, mix, - HDA_INPUT, i, - 0xff, HDA_AMP_MUTE); + update_amp(codec, mix, HDA_INPUT, i, + 0xff, HDA_AMP_MUTE); else if (nid_has_volume(codec, conn[i], HDA_OUTPUT)) - snd_hda_codec_amp_stereo(codec, conn[i], - HDA_OUTPUT, 0, - 0xff, HDA_AMP_MUTE); + update_amp(codec, conn[i], HDA_OUTPUT, 0, + 0xff, HDA_AMP_MUTE); } } diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 2f3059b50ffa..84e8879cc372 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -959,7 +959,7 @@ static unsigned int azx_rirb_get_response(struct hda_bus *bus, } } - if (!bus->no_response_fallback) + if (bus->no_response_fallback) return -1; if (!chip->polling_mode && chip->poll_count < 2) { diff --git a/sound/pci/hda/hda_proc.c b/sound/pci/hda/hda_proc.c index ce5a6da83419..05e19f78b4cb 100644 --- a/sound/pci/hda/hda_proc.c +++ b/sound/pci/hda/hda_proc.c @@ -134,13 +134,38 @@ static void print_amp_caps(struct snd_info_buffer *buffer, (caps & AC_AMPCAP_MUTE) >> AC_AMPCAP_MUTE_SHIFT); } +/* is this a stereo widget or a stereo-to-mono mix? */ +static bool is_stereo_amps(struct hda_codec *codec, hda_nid_t nid, + int dir, unsigned int wcaps, int indices) +{ + hda_nid_t conn; + + if (wcaps & AC_WCAP_STEREO) + return true; + /* check for a stereo-to-mono mix; it must be: + * only a single connection, only for input, and only a mixer widget + */ + if (indices != 1 || dir != HDA_INPUT || + get_wcaps_type(wcaps) != AC_WID_AUD_MIX) + return false; + + if (snd_hda_get_raw_connections(codec, nid, &conn, 1) < 0) + return false; + /* the connection source is a stereo? */ + wcaps = snd_hda_param_read(codec, conn, AC_PAR_AUDIO_WIDGET_CAP); + return !!(wcaps & AC_WCAP_STEREO); +} + static void print_amp_vals(struct snd_info_buffer *buffer, struct hda_codec *codec, hda_nid_t nid, - int dir, int stereo, int indices) + int dir, unsigned int wcaps, int indices) { unsigned int val; + bool stereo; int i; + stereo = is_stereo_amps(codec, nid, dir, wcaps, indices); + dir = dir == HDA_OUTPUT ? AC_AMP_GET_OUTPUT : AC_AMP_GET_INPUT; for (i = 0; i < indices; i++) { snd_iprintf(buffer, " ["); @@ -757,12 +782,10 @@ static void print_codec_info(struct snd_info_entry *entry, (codec->single_adc_amp && wid_type == AC_WID_AUD_IN)) print_amp_vals(buffer, codec, nid, HDA_INPUT, - wid_caps & AC_WCAP_STEREO, - 1); + wid_caps, 1); else print_amp_vals(buffer, codec, nid, HDA_INPUT, - wid_caps & AC_WCAP_STEREO, - conn_len); + wid_caps, conn_len); } if (wid_caps & AC_WCAP_OUT_AMP) { snd_iprintf(buffer, " Amp-Out caps: "); @@ -771,11 +794,10 @@ static void print_codec_info(struct snd_info_entry *entry, if (wid_type == AC_WID_PIN && codec->pin_amp_workaround) print_amp_vals(buffer, codec, nid, HDA_OUTPUT, - wid_caps & AC_WCAP_STEREO, - conn_len); + wid_caps, conn_len); else print_amp_vals(buffer, codec, nid, HDA_OUTPUT, - wid_caps & AC_WCAP_STEREO, 1); + wid_caps, 1); } switch (wid_type) { diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c index fc492ac24caa..51e208022cc8 100644 --- a/sound/pci/hda/patch_cirrus.c +++ b/sound/pci/hda/patch_cirrus.c @@ -396,6 +396,7 @@ static const struct snd_pci_quirk cs420x_fixup_tbl[] = { SND_PCI_QUIRK(0x106b, 0x1c00, "MacBookPro 8,1", CS420X_MBP81), SND_PCI_QUIRK(0x106b, 0x2000, "iMac 12,2", CS420X_IMAC27_122), SND_PCI_QUIRK(0x106b, 0x2800, "MacBookPro 10,1", CS420X_MBP101), + SND_PCI_QUIRK(0x106b, 0x5600, "MacBookAir 5,2", CS420X_MBP81), SND_PCI_QUIRK(0x106b, 0x5b00, "MacBookAir 4,2", CS420X_MBA42), SND_PCI_QUIRK_VENDOR(0x106b, "Apple", CS420X_APPLE), {} /* terminator */ @@ -587,6 +588,7 @@ static int patch_cs420x(struct hda_codec *codec) return -ENOMEM; spec->gen.automute_hook = cs_automute; + codec->single_adc_amp = 1; snd_hda_pick_fixup(codec, cs420x_models, cs420x_fixup_tbl, cs420x_fixups); diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index ffc19464b978..976493c4a695 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -3232,6 +3232,7 @@ enum { CXT_PINCFG_LENOVO_TP410, CXT_PINCFG_LEMOTE_A1004, CXT_PINCFG_LEMOTE_A1205, + CXT_PINCFG_COMPAQ_CQ60, CXT_FIXUP_STEREO_DMIC, CXT_FIXUP_INC_MIC_BOOST, CXT_FIXUP_HEADPHONE_MIC_PIN, @@ -3368,6 +3369,15 @@ static const struct hda_fixup cxt_fixups[] = { .type = HDA_FIXUP_PINS, .v.pins = cxt_pincfg_lemote, }, + [CXT_PINCFG_COMPAQ_CQ60] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + /* 0x17 was falsely set up as a mic, it should 0x1d */ + { 0x17, 0x400001f0 }, + { 0x1d, 0x97a70120 }, + { } + } + }, [CXT_FIXUP_STEREO_DMIC] = { .type = HDA_FIXUP_FUNC, .v.func = cxt_fixup_stereo_dmic, @@ -3411,6 +3421,7 @@ static const struct hda_fixup cxt_fixups[] = { }; static const struct snd_pci_quirk cxt5051_fixups[] = { + SND_PCI_QUIRK(0x103c, 0x360b, "Compaq CQ60", CXT_PINCFG_COMPAQ_CQ60), SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo X200", CXT_PINCFG_LENOVO_X200), {} }; diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 910f2dbe1b24..ca26373ebe70 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -271,7 +271,7 @@ static void alc_auto_setup_eapd(struct hda_codec *codec, bool on) { /* We currently only handle front, HP */ static hda_nid_t pins[] = { - 0x0f, 0x10, 0x14, 0x15, 0 + 0x0f, 0x10, 0x14, 0x15, 0x17, 0 }; hda_nid_t *p; for (p = pins; *p; p++) @@ -2885,6 +2885,8 @@ static void alc283_init(struct hda_codec *codec) if (!hp_pin) return; + + msleep(30); hp_pin_sense = snd_hda_jack_detect(codec, hp_pin); /* Index 0x43 Direct Drive HP AMP LPM Control 1 */ @@ -3951,6 +3953,7 @@ enum { ALC269_FIXUP_QUANTA_MUTE, ALC269_FIXUP_LIFEBOOK, ALC269_FIXUP_LIFEBOOK_EXTMIC, + ALC269_FIXUP_LIFEBOOK_HP_PIN, ALC269_FIXUP_AMIC, ALC269_FIXUP_DMIC, ALC269VB_FIXUP_AMIC, @@ -4085,6 +4088,13 @@ static const struct hda_fixup alc269_fixups[] = { { } }, }, + [ALC269_FIXUP_LIFEBOOK_HP_PIN] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x21, 0x0221102f }, /* HP out */ + { } + }, + }, [ALC269_FIXUP_AMIC] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { @@ -4538,6 +4548,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x104d, 0x9084, "Sony VAIO", ALC275_FIXUP_SONY_HWEQ), SND_PCI_QUIRK_VENDOR(0x104d, "Sony VAIO", ALC269_FIXUP_SONY_VAIO), SND_PCI_QUIRK(0x10cf, 0x1475, "Lifebook", ALC269_FIXUP_LIFEBOOK), + SND_PCI_QUIRK(0x10cf, 0x15dc, "Lifebook T731", ALC269_FIXUP_LIFEBOOK_HP_PIN), SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC), SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE), SND_PCI_QUIRK(0x17aa, 0x215e, "Thinkpad L512", ALC269_FIXUP_SKU_IGNORE), diff --git a/sound/soc/codecs/adav80x.c b/sound/soc/codecs/adav80x.c index f78b27a7c461..23454e9a5d3a 100644 --- a/sound/soc/codecs/adav80x.c +++ b/sound/soc/codecs/adav80x.c @@ -319,7 +319,7 @@ static int adav80x_put_deemph(struct snd_kcontrol *kcontrol, { struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); struct adav80x *adav80x = snd_soc_codec_get_drvdata(codec); - unsigned int deemph = ucontrol->value.enumerated.item[0]; + unsigned int deemph = ucontrol->value.integer.value[0]; if (deemph > 1) return -EINVAL; @@ -335,7 +335,7 @@ static int adav80x_get_deemph(struct snd_kcontrol *kcontrol, struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); struct adav80x *adav80x = snd_soc_codec_get_drvdata(codec); - ucontrol->value.enumerated.item[0] = adav80x->deemph; + ucontrol->value.integer.value[0] = adav80x->deemph; return 0; }; diff --git a/sound/soc/codecs/ak4641.c b/sound/soc/codecs/ak4641.c index 94cbe508dd37..d7184726f8a0 100644 --- a/sound/soc/codecs/ak4641.c +++ b/sound/soc/codecs/ak4641.c @@ -76,7 +76,7 @@ static int ak4641_put_deemph(struct snd_kcontrol *kcontrol, { struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); struct ak4641_priv *ak4641 = snd_soc_codec_get_drvdata(codec); - int deemph = ucontrol->value.enumerated.item[0]; + int deemph = ucontrol->value.integer.value[0]; if (deemph > 1) return -EINVAL; @@ -92,7 +92,7 @@ static int ak4641_get_deemph(struct snd_kcontrol *kcontrol, struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); struct ak4641_priv *ak4641 = snd_soc_codec_get_drvdata(codec); - ucontrol->value.enumerated.item[0] = ak4641->deemph; + ucontrol->value.integer.value[0] = ak4641->deemph; return 0; }; diff --git a/sound/soc/codecs/cs4271.c b/sound/soc/codecs/cs4271.c index ce05fd93dc74..a0ad41ac5574 100644 --- a/sound/soc/codecs/cs4271.c +++ b/sound/soc/codecs/cs4271.c @@ -288,7 +288,7 @@ static int cs4271_get_deemph(struct snd_kcontrol *kcontrol, struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); struct cs4271_private *cs4271 = snd_soc_codec_get_drvdata(codec); - ucontrol->value.enumerated.item[0] = cs4271->deemph; + ucontrol->value.integer.value[0] = cs4271->deemph; return 0; } @@ -298,7 +298,7 @@ static int cs4271_put_deemph(struct snd_kcontrol *kcontrol, struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); struct cs4271_private *cs4271 = snd_soc_codec_get_drvdata(codec); - cs4271->deemph = ucontrol->value.enumerated.item[0]; + cs4271->deemph = ucontrol->value.integer.value[0]; return cs4271_set_deemph(codec); } diff --git a/sound/soc/codecs/pcm1681.c b/sound/soc/codecs/pcm1681.c index 73f9c3630e2c..651e2fe2c31f 100644 --- a/sound/soc/codecs/pcm1681.c +++ b/sound/soc/codecs/pcm1681.c @@ -118,7 +118,7 @@ static int pcm1681_get_deemph(struct snd_kcontrol *kcontrol, struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); struct pcm1681_private *priv = snd_soc_codec_get_drvdata(codec); - ucontrol->value.enumerated.item[0] = priv->deemph; + ucontrol->value.integer.value[0] = priv->deemph; return 0; } @@ -129,7 +129,7 @@ static int pcm1681_put_deemph(struct snd_kcontrol *kcontrol, struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); struct pcm1681_private *priv = snd_soc_codec_get_drvdata(codec); - priv->deemph = ucontrol->value.enumerated.item[0]; + priv->deemph = ucontrol->value.integer.value[0]; return pcm1681_set_deemph(codec); } diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c index 715589ff0eda..e93c36fd3073 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c @@ -1198,13 +1198,7 @@ static int sgtl5000_set_power_regs(struct snd_soc_codec *codec) /* Enable VDDC charge pump */ ana_pwr |= SGTL5000_VDDC_CHRGPMP_POWERUP; } else if (vddio >= 3100 && vdda >= 3100) { - /* - * if vddio and vddd > 3.1v, - * charge pump should be clean before set ana_pwr - */ - snd_soc_update_bits(codec, SGTL5000_CHIP_ANA_POWER, - SGTL5000_VDDC_CHRGPMP_POWERUP, 0); - + ana_pwr &= ~SGTL5000_VDDC_CHRGPMP_POWERUP; /* VDDC use VDDIO rail */ lreg_ctrl |= SGTL5000_VDDC_ASSN_OVRD; lreg_ctrl |= SGTL5000_VDDC_MAN_ASSN_VDDIO << diff --git a/sound/soc/codecs/tas5086.c b/sound/soc/codecs/tas5086.c index a895a5e4bdf2..c6c65001457d 100644 --- a/sound/soc/codecs/tas5086.c +++ b/sound/soc/codecs/tas5086.c @@ -275,7 +275,7 @@ static int tas5086_get_deemph(struct snd_kcontrol *kcontrol, struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); struct tas5086_private *priv = snd_soc_codec_get_drvdata(codec); - ucontrol->value.enumerated.item[0] = priv->deemph; + ucontrol->value.integer.value[0] = priv->deemph; return 0; } @@ -286,7 +286,7 @@ static int tas5086_put_deemph(struct snd_kcontrol *kcontrol, struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); struct tas5086_private *priv = snd_soc_codec_get_drvdata(codec); - priv->deemph = ucontrol->value.enumerated.item[0]; + priv->deemph = ucontrol->value.integer.value[0]; return tas5086_set_deemph(codec); } diff --git a/sound/soc/codecs/wm2000.c b/sound/soc/codecs/wm2000.c index 8ae50274ea8f..1a9f4574b65b 100644 --- a/sound/soc/codecs/wm2000.c +++ b/sound/soc/codecs/wm2000.c @@ -610,7 +610,7 @@ static int wm2000_anc_mode_get(struct snd_kcontrol *kcontrol, struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); struct wm2000_priv *wm2000 = dev_get_drvdata(codec->dev); - ucontrol->value.enumerated.item[0] = wm2000->anc_active; + ucontrol->value.integer.value[0] = wm2000->anc_active; return 0; } @@ -620,7 +620,7 @@ static int wm2000_anc_mode_put(struct snd_kcontrol *kcontrol, { struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); struct wm2000_priv *wm2000 = dev_get_drvdata(codec->dev); - int anc_active = ucontrol->value.enumerated.item[0]; + int anc_active = ucontrol->value.integer.value[0]; int ret; if (anc_active > 1) @@ -643,7 +643,7 @@ static int wm2000_speaker_get(struct snd_kcontrol *kcontrol, struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); struct wm2000_priv *wm2000 = dev_get_drvdata(codec->dev); - ucontrol->value.enumerated.item[0] = wm2000->spk_ena; + ucontrol->value.integer.value[0] = wm2000->spk_ena; return 0; } @@ -653,7 +653,7 @@ static int wm2000_speaker_put(struct snd_kcontrol *kcontrol, { struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); struct wm2000_priv *wm2000 = dev_get_drvdata(codec->dev); - int val = ucontrol->value.enumerated.item[0]; + int val = ucontrol->value.integer.value[0]; int ret; if (val > 1) diff --git a/sound/soc/codecs/wm8731.c b/sound/soc/codecs/wm8731.c index 029720366ff8..e593722574de 100644 --- a/sound/soc/codecs/wm8731.c +++ b/sound/soc/codecs/wm8731.c @@ -122,7 +122,7 @@ static int wm8731_get_deemph(struct snd_kcontrol *kcontrol, struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); struct wm8731_priv *wm8731 = snd_soc_codec_get_drvdata(codec); - ucontrol->value.enumerated.item[0] = wm8731->deemph; + ucontrol->value.integer.value[0] = wm8731->deemph; return 0; } @@ -132,7 +132,7 @@ static int wm8731_put_deemph(struct snd_kcontrol *kcontrol, { struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); struct wm8731_priv *wm8731 = snd_soc_codec_get_drvdata(codec); - int deemph = ucontrol->value.enumerated.item[0]; + int deemph = ucontrol->value.integer.value[0]; int ret = 0; if (deemph > 1) diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c index eebcb1da3b7b..ae7d76efe063 100644 --- a/sound/soc/codecs/wm8903.c +++ b/sound/soc/codecs/wm8903.c @@ -442,7 +442,7 @@ static int wm8903_get_deemph(struct snd_kcontrol *kcontrol, struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); struct wm8903_priv *wm8903 = snd_soc_codec_get_drvdata(codec); - ucontrol->value.enumerated.item[0] = wm8903->deemph; + ucontrol->value.integer.value[0] = wm8903->deemph; return 0; } @@ -452,7 +452,7 @@ static int wm8903_put_deemph(struct snd_kcontrol *kcontrol, { struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); struct wm8903_priv *wm8903 = snd_soc_codec_get_drvdata(codec); - int deemph = ucontrol->value.enumerated.item[0]; + int deemph = ucontrol->value.integer.value[0]; int ret = 0; if (deemph > 1) diff --git a/sound/soc/codecs/wm8904.c b/sound/soc/codecs/wm8904.c index 53bbfac6a83a..66cb9e95b5eb 100644 --- a/sound/soc/codecs/wm8904.c +++ b/sound/soc/codecs/wm8904.c @@ -523,7 +523,7 @@ static int wm8904_get_deemph(struct snd_kcontrol *kcontrol, struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); struct wm8904_priv *wm8904 = snd_soc_codec_get_drvdata(codec); - ucontrol->value.enumerated.item[0] = wm8904->deemph; + ucontrol->value.integer.value[0] = wm8904->deemph; return 0; } @@ -532,7 +532,7 @@ static int wm8904_put_deemph(struct snd_kcontrol *kcontrol, { struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); struct wm8904_priv *wm8904 = snd_soc_codec_get_drvdata(codec); - int deemph = ucontrol->value.enumerated.item[0]; + int deemph = ucontrol->value.integer.value[0]; if (deemph > 1) return -EINVAL; diff --git a/sound/soc/codecs/wm8955.c b/sound/soc/codecs/wm8955.c index 82c8ba975720..1c1fc6119758 100644 --- a/sound/soc/codecs/wm8955.c +++ b/sound/soc/codecs/wm8955.c @@ -393,7 +393,7 @@ static int wm8955_get_deemph(struct snd_kcontrol *kcontrol, struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); struct wm8955_priv *wm8955 = snd_soc_codec_get_drvdata(codec); - ucontrol->value.enumerated.item[0] = wm8955->deemph; + ucontrol->value.integer.value[0] = wm8955->deemph; return 0; } @@ -402,7 +402,7 @@ static int wm8955_put_deemph(struct snd_kcontrol *kcontrol, { struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); struct wm8955_priv *wm8955 = snd_soc_codec_get_drvdata(codec); - int deemph = ucontrol->value.enumerated.item[0]; + int deemph = ucontrol->value.integer.value[0]; if (deemph > 1) return -EINVAL; diff --git a/sound/soc/codecs/wm8960.c b/sound/soc/codecs/wm8960.c index 942ef8427347..2a0bfb848512 100644 --- a/sound/soc/codecs/wm8960.c +++ b/sound/soc/codecs/wm8960.c @@ -181,7 +181,7 @@ static int wm8960_get_deemph(struct snd_kcontrol *kcontrol, struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); struct wm8960_priv *wm8960 = snd_soc_codec_get_drvdata(codec); - ucontrol->value.enumerated.item[0] = wm8960->deemph; + ucontrol->value.integer.value[0] = wm8960->deemph; return 0; } @@ -190,7 +190,7 @@ static int wm8960_put_deemph(struct snd_kcontrol *kcontrol, { struct snd_soc_codec *codec = snd_kcontrol_chip(kcontrol); struct wm8960_priv *wm8960 = snd_soc_codec_get_drvdata(codec); - int deemph = ucontrol->value.enumerated.item[0]; + int deemph = ucontrol->value.integer.value[0]; if (deemph > 1) return -EINVAL; diff --git a/sound/soc/jz4740/Makefile b/sound/soc/jz4740/Makefile index be873c1b0c20..d32c540555c4 100644 --- a/sound/soc/jz4740/Makefile +++ b/sound/soc/jz4740/Makefile @@ -1,10 +1,8 @@ # # Jz4740 Platform Support # -snd-soc-jz4740-objs := jz4740-pcm.o snd-soc-jz4740-i2s-objs := jz4740-i2s.o -obj-$(CONFIG_SND_JZ4740_SOC) += snd-soc-jz4740.o obj-$(CONFIG_SND_JZ4740_SOC_I2S) += snd-soc-jz4740-i2s.o # Jz4740 Machine Support diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index 5a723df670b4..a82ec53b8fb3 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -178,6 +178,7 @@ static const struct rc_config { { USB_ID(0x041e, 0x3040), 2, 2, 6, 6, 2, 0x6e91 }, /* Live! 24-bit */ { USB_ID(0x041e, 0x3042), 0, 1, 1, 1, 1, 0x000d }, /* Usb X-Fi S51 */ { USB_ID(0x041e, 0x30df), 0, 1, 1, 1, 1, 0x000d }, /* Usb X-Fi S51 Pro */ + { USB_ID(0x041e, 0x3237), 0, 1, 1, 1, 1, 0x000d }, /* Usb X-Fi S51 Pro */ { USB_ID(0x041e, 0x3048), 2, 2, 6, 6, 2, 0x6e91 }, /* Toshiba SB0500 */ }; diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 83bddbdb90e9..5293b5ac8b9d 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -1773,6 +1773,36 @@ YAMAHA_DEVICE(0x7010, "UB99"), } } }, +{ + USB_DEVICE(0x0582, 0x0159), + .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) { + /* .vendor_name = "Roland", */ + /* .product_name = "UA-22", */ + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = (const struct snd_usb_audio_quirk[]) { + { + .ifnum = 0, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + { + .ifnum = 1, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + { + .ifnum = 2, + .type = QUIRK_MIDI_FIXED_ENDPOINT, + .data = & (const struct snd_usb_midi_endpoint_info) { + .out_cables = 0x0001, + .in_cables = 0x0001 + } + }, + { + .ifnum = -1 + } + } + } +}, /* this catches most recent vendor-specific Roland devices */ { .match_flags = USB_DEVICE_ID_MATCH_VENDOR | |